@juspay/neurolink 9.41.0 → 9.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +7 -1
  3. package/dist/auth/anthropicOAuth.d.ts +18 -3
  4. package/dist/auth/anthropicOAuth.js +149 -4
  5. package/dist/auth/providers/firebase.js +5 -1
  6. package/dist/auth/providers/jwt.js +5 -1
  7. package/dist/auth/providers/workos.js +5 -1
  8. package/dist/auth/sessionManager.d.ts +1 -1
  9. package/dist/auth/sessionManager.js +58 -27
  10. package/dist/browser/neurolink.min.js +354 -334
  11. package/dist/cli/commands/mcp.d.ts +6 -0
  12. package/dist/cli/commands/mcp.js +188 -181
  13. package/dist/cli/commands/proxy.d.ts +2 -1
  14. package/dist/cli/commands/proxy.js +713 -431
  15. package/dist/cli/commands/task.js +3 -0
  16. package/dist/cli/factories/commandFactory.d.ts +2 -0
  17. package/dist/cli/factories/commandFactory.js +38 -0
  18. package/dist/cli/parser.js +4 -3
  19. package/dist/client/aiSdkAdapter.js +3 -0
  20. package/dist/client/streamingClient.js +30 -10
  21. package/dist/core/baseProvider.d.ts +6 -1
  22. package/dist/core/baseProvider.js +208 -230
  23. package/dist/core/factory.d.ts +3 -0
  24. package/dist/core/factory.js +138 -188
  25. package/dist/core/modules/GenerationHandler.js +3 -2
  26. package/dist/core/redisConversationMemoryManager.js +7 -3
  27. package/dist/evaluation/BatchEvaluator.js +4 -1
  28. package/dist/evaluation/hooks/observabilityHooks.js +5 -3
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +24 -9
  31. package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  32. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  33. package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
  34. package/dist/evaluation/scorers/scorerRegistry.js +353 -282
  35. package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
  36. package/dist/lib/auth/anthropicOAuth.js +149 -4
  37. package/dist/lib/auth/providers/firebase.js +5 -1
  38. package/dist/lib/auth/providers/jwt.js +5 -1
  39. package/dist/lib/auth/providers/workos.js +5 -1
  40. package/dist/lib/auth/sessionManager.d.ts +1 -1
  41. package/dist/lib/auth/sessionManager.js +58 -27
  42. package/dist/lib/client/aiSdkAdapter.js +3 -0
  43. package/dist/lib/client/streamingClient.js +30 -10
  44. package/dist/lib/core/baseProvider.d.ts +6 -1
  45. package/dist/lib/core/baseProvider.js +208 -230
  46. package/dist/lib/core/factory.d.ts +3 -0
  47. package/dist/lib/core/factory.js +138 -188
  48. package/dist/lib/core/modules/GenerationHandler.js +3 -2
  49. package/dist/lib/core/redisConversationMemoryManager.js +7 -3
  50. package/dist/lib/evaluation/BatchEvaluator.js +4 -1
  51. package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
  52. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  53. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +24 -9
  54. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  55. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  56. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
  57. package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
  58. package/dist/lib/mcp/toolRegistry.d.ts +2 -0
  59. package/dist/lib/mcp/toolRegistry.js +32 -31
  60. package/dist/lib/neurolink.d.ts +41 -2
  61. package/dist/lib/neurolink.js +1616 -1681
  62. package/dist/lib/observability/otelBridge.d.ts +2 -2
  63. package/dist/lib/observability/otelBridge.js +12 -3
  64. package/dist/lib/providers/amazonBedrock.js +2 -4
  65. package/dist/lib/providers/anthropic.d.ts +9 -5
  66. package/dist/lib/providers/anthropic.js +19 -14
  67. package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
  68. package/dist/lib/providers/anthropicBaseProvider.js +5 -4
  69. package/dist/lib/providers/azureOpenai.d.ts +1 -1
  70. package/dist/lib/providers/azureOpenai.js +5 -4
  71. package/dist/lib/providers/googleAiStudio.js +30 -6
  72. package/dist/lib/providers/googleVertex.d.ts +10 -0
  73. package/dist/lib/providers/googleVertex.js +437 -423
  74. package/dist/lib/providers/huggingFace.d.ts +3 -3
  75. package/dist/lib/providers/huggingFace.js +6 -8
  76. package/dist/lib/providers/litellm.d.ts +1 -0
  77. package/dist/lib/providers/litellm.js +76 -55
  78. package/dist/lib/providers/mistral.js +2 -1
  79. package/dist/lib/providers/ollama.js +93 -23
  80. package/dist/lib/providers/openAI.d.ts +2 -0
  81. package/dist/lib/providers/openAI.js +141 -141
  82. package/dist/lib/providers/openRouter.js +2 -1
  83. package/dist/lib/providers/openaiCompatible.d.ts +4 -4
  84. package/dist/lib/providers/openaiCompatible.js +4 -4
  85. package/dist/lib/proxy/claudeFormat.d.ts +3 -2
  86. package/dist/lib/proxy/claudeFormat.js +27 -14
  87. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  88. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  89. package/dist/lib/proxy/modelRouter.js +3 -0
  90. package/dist/lib/proxy/oauthFetch.d.ts +1 -1
  91. package/dist/lib/proxy/oauthFetch.js +289 -316
  92. package/dist/lib/proxy/proxyConfig.js +46 -24
  93. package/dist/lib/proxy/proxyEnv.d.ts +19 -0
  94. package/dist/lib/proxy/proxyEnv.js +73 -0
  95. package/dist/lib/proxy/proxyFetch.js +291 -217
  96. package/dist/lib/proxy/proxyTracer.d.ts +133 -0
  97. package/dist/lib/proxy/proxyTracer.js +645 -0
  98. package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
  99. package/dist/lib/proxy/rawStreamCapture.js +83 -0
  100. package/dist/lib/proxy/requestLogger.d.ts +32 -5
  101. package/dist/lib/proxy/requestLogger.js +503 -47
  102. package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
  103. package/dist/lib/proxy/sseInterceptor.js +427 -0
  104. package/dist/lib/proxy/usageStats.d.ts +4 -3
  105. package/dist/lib/proxy/usageStats.js +25 -12
  106. package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
  107. package/dist/lib/rag/chunking/markdownChunker.js +15 -6
  108. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +17 -3
  109. package/dist/lib/server/routes/claudeProxyRoutes.js +3032 -1349
  110. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
  111. package/dist/lib/services/server/ai/observability/instrumentation.js +337 -161
  112. package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
  113. package/dist/lib/tasks/backends/bullmqBackend.js +35 -22
  114. package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
  115. package/dist/lib/tasks/store/redisTaskStore.js +54 -39
  116. package/dist/lib/tasks/taskManager.d.ts +5 -0
  117. package/dist/lib/tasks/taskManager.js +158 -30
  118. package/dist/lib/telemetry/index.d.ts +2 -1
  119. package/dist/lib/telemetry/index.js +2 -1
  120. package/dist/lib/telemetry/telemetryService.d.ts +3 -0
  121. package/dist/lib/telemetry/telemetryService.js +69 -5
  122. package/dist/lib/types/cli.d.ts +10 -0
  123. package/dist/lib/types/proxyTypes.d.ts +160 -5
  124. package/dist/lib/types/streamTypes.d.ts +25 -3
  125. package/dist/lib/utils/messageBuilder.js +3 -2
  126. package/dist/lib/utils/providerHealth.d.ts +19 -0
  127. package/dist/lib/utils/providerHealth.js +279 -33
  128. package/dist/lib/utils/providerUtils.js +17 -22
  129. package/dist/lib/utils/toolChoice.d.ts +4 -0
  130. package/dist/lib/utils/toolChoice.js +7 -0
  131. package/dist/mcp/toolRegistry.d.ts +2 -0
  132. package/dist/mcp/toolRegistry.js +32 -31
  133. package/dist/neurolink.d.ts +41 -2
  134. package/dist/neurolink.js +1616 -1681
  135. package/dist/observability/otelBridge.d.ts +2 -2
  136. package/dist/observability/otelBridge.js +12 -3
  137. package/dist/providers/amazonBedrock.js +2 -4
  138. package/dist/providers/anthropic.d.ts +9 -5
  139. package/dist/providers/anthropic.js +19 -14
  140. package/dist/providers/anthropicBaseProvider.d.ts +3 -3
  141. package/dist/providers/anthropicBaseProvider.js +5 -4
  142. package/dist/providers/azureOpenai.d.ts +1 -1
  143. package/dist/providers/azureOpenai.js +5 -4
  144. package/dist/providers/googleAiStudio.js +30 -6
  145. package/dist/providers/googleVertex.d.ts +10 -0
  146. package/dist/providers/googleVertex.js +437 -423
  147. package/dist/providers/huggingFace.d.ts +3 -3
  148. package/dist/providers/huggingFace.js +6 -7
  149. package/dist/providers/litellm.d.ts +1 -0
  150. package/dist/providers/litellm.js +76 -55
  151. package/dist/providers/mistral.js +2 -1
  152. package/dist/providers/ollama.js +93 -23
  153. package/dist/providers/openAI.d.ts +2 -0
  154. package/dist/providers/openAI.js +141 -141
  155. package/dist/providers/openRouter.js +2 -1
  156. package/dist/providers/openaiCompatible.d.ts +4 -4
  157. package/dist/providers/openaiCompatible.js +4 -3
  158. package/dist/proxy/claudeFormat.d.ts +3 -2
  159. package/dist/proxy/claudeFormat.js +27 -14
  160. package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  161. package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  162. package/dist/proxy/modelRouter.js +3 -0
  163. package/dist/proxy/oauthFetch.d.ts +1 -1
  164. package/dist/proxy/oauthFetch.js +289 -316
  165. package/dist/proxy/proxyConfig.js +46 -24
  166. package/dist/proxy/proxyEnv.d.ts +19 -0
  167. package/dist/proxy/proxyEnv.js +72 -0
  168. package/dist/proxy/proxyFetch.js +291 -217
  169. package/dist/proxy/proxyTracer.d.ts +133 -0
  170. package/dist/proxy/proxyTracer.js +644 -0
  171. package/dist/proxy/rawStreamCapture.d.ts +10 -0
  172. package/dist/proxy/rawStreamCapture.js +82 -0
  173. package/dist/proxy/requestLogger.d.ts +32 -5
  174. package/dist/proxy/requestLogger.js +503 -47
  175. package/dist/proxy/sseInterceptor.d.ts +97 -0
  176. package/dist/proxy/sseInterceptor.js +426 -0
  177. package/dist/proxy/usageStats.d.ts +4 -3
  178. package/dist/proxy/usageStats.js +25 -12
  179. package/dist/rag/chunkers/MarkdownChunker.js +13 -5
  180. package/dist/rag/chunking/markdownChunker.js +15 -6
  181. package/dist/server/routes/claudeProxyRoutes.d.ts +17 -3
  182. package/dist/server/routes/claudeProxyRoutes.js +3032 -1349
  183. package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
  184. package/dist/services/server/ai/observability/instrumentation.js +337 -161
  185. package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
  186. package/dist/tasks/backends/bullmqBackend.js +35 -22
  187. package/dist/tasks/store/redisTaskStore.d.ts +1 -0
  188. package/dist/tasks/store/redisTaskStore.js +54 -39
  189. package/dist/tasks/taskManager.d.ts +5 -0
  190. package/dist/tasks/taskManager.js +158 -30
  191. package/dist/telemetry/index.d.ts +2 -1
  192. package/dist/telemetry/index.js +2 -1
  193. package/dist/telemetry/telemetryService.d.ts +3 -0
  194. package/dist/telemetry/telemetryService.js +69 -5
  195. package/dist/types/cli.d.ts +10 -0
  196. package/dist/types/proxyTypes.d.ts +160 -5
  197. package/dist/types/streamTypes.d.ts +25 -3
  198. package/dist/utils/messageBuilder.js +3 -2
  199. package/dist/utils/providerHealth.d.ts +19 -0
  200. package/dist/utils/providerHealth.js +279 -33
  201. package/dist/utils/providerUtils.js +18 -22
  202. package/dist/utils/toolChoice.d.ts +4 -0
  203. package/dist/utils/toolChoice.js +6 -0
  204. package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
  205. package/docs/changelog.md +252 -0
  206. package/package.json +19 -2
  207. package/scripts/observability/check-proxy-telemetry.mjs +235 -0
  208. package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
  209. package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
  210. package/scripts/observability/manage-local-openobserve.sh +215 -0
  211. package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
  212. package/scripts/observability/proxy-observability.env.example +23 -0
@@ -50,18 +50,21 @@ import { getMetricsAggregator, MetricsAggregator, } from "./observability/metric
50
50
  import { SpanStatus, SpanType } from "./observability/types/spanTypes.js";
51
51
  import { SpanSerializer } from "./observability/utils/spanSerializer.js";
52
52
  import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
53
+ import { TaskManager } from "./tasks/taskManager.js";
54
+ import { createTaskTools } from "./tasks/tools/taskTools.js";
53
55
  import { ATTR } from "./telemetry/attributes.js";
54
56
  import { tracers } from "./telemetry/tracers.js";
57
+ import { CircuitBreakerOpenError } from "./types/circuitBreakerErrors.js";
55
58
  import { ConversationMemoryError } from "./types/conversation.js";
56
59
  import { AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/errors.js";
57
60
  import { getConversationMessages, storeConversationTurn, } from "./utils/conversationMemory.js";
58
61
  // Enhanced error handling imports
59
62
  import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableError, logStructuredError, NeuroLinkError, withRetry, withTimeout, } from "./utils/errorHandling.js";
60
- import { CircuitBreakerOpenError } from "./types/circuitBreakerErrors.js";
61
63
  // Factory processing imports
62
64
  import { createCleanStreamOptions, enhanceTextGenerationOptions, processFactoryOptions, processStreamingFactoryOptions, validateFactoryConfig, } from "./utils/factoryProcessing.js";
63
65
  import { logger, mcpLogger } from "./utils/logger.js";
64
66
  import { createCustomToolServerInfo, detectCategory, } from "./utils/mcpDefaults.js";
67
+ import { resolveModel } from "./utils/modelAliasResolver.js";
65
68
  // Import orchestration components
66
69
  import { ModelRouter } from "./utils/modelRouter.js";
67
70
  import { getBestProvider } from "./utils/providerUtils.js";
@@ -72,11 +75,8 @@ import { BinaryTaskClassifier } from "./utils/taskClassifier.js";
72
75
  // Transformation utilities
73
76
  import { extractToolNames, optimizeToolForCollection, transformAvailableTools, transformParamsForLogging, transformToolExecutions, transformToolExecutionsForMCP, transformToolsForMCP, transformToolsToDescriptions, transformToolsToExpectedFormat, } from "./utils/transformationUtils.js";
74
77
  import { isNonNullObject } from "./utils/typeUtils.js";
75
- import { resolveModel } from "./utils/modelAliasResolver.js";
76
78
  import { getWorkflow } from "./workflow/core/workflowRegistry.js";
77
79
  import { runWorkflow } from "./workflow/core/workflowRunner.js";
78
- import { TaskManager } from "./tasks/taskManager.js";
79
- import { createTaskTools } from "./tasks/tools/taskTools.js";
80
80
  /**
81
81
  * NL-002: Classify MCP error messages into categories for AI disambiguation.
82
82
  * Returns a human-readable error category based on error message content.
@@ -339,6 +339,137 @@ export class NeuroLink {
339
339
  }
340
340
  return await callback();
341
341
  }
342
+ createMetricsTraceContext() {
343
+ return {
344
+ traceId: crypto.randomUUID().replace(/-/g, ""),
345
+ parentSpanId: crypto.randomUUID().replace(/-/g, "").substring(0, 16),
346
+ };
347
+ }
348
+ enforceSessionBudget(maxBudgetUsd) {
349
+ if (maxBudgetUsd === undefined ||
350
+ maxBudgetUsd <= 0 ||
351
+ this._sessionCostUsd < maxBudgetUsd) {
352
+ return;
353
+ }
354
+ throw new NeuroLinkError({
355
+ code: "SESSION_BUDGET_EXCEEDED",
356
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${maxBudgetUsd.toFixed(4)} limit`,
357
+ category: ErrorCategory.VALIDATION,
358
+ severity: ErrorSeverity.HIGH,
359
+ retriable: false,
360
+ context: {
361
+ spent: this._sessionCostUsd,
362
+ limit: maxBudgetUsd,
363
+ },
364
+ });
365
+ }
366
+ assertInputText(text, message) {
367
+ if (!text || typeof text !== "string") {
368
+ throw new Error(message);
369
+ }
370
+ }
371
+ async applyAuthenticatedRequestContext(options) {
372
+ if (options.auth?.token) {
373
+ const { AuthError } = await import("./auth/errors.js");
374
+ await this.ensureAuthProvider();
375
+ if (!this.authProvider) {
376
+ throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
377
+ }
378
+ let authResult;
379
+ try {
380
+ authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
381
+ }
382
+ catch (error) {
383
+ if (error instanceof Error &&
384
+ "feature" in error &&
385
+ error.feature === "Auth") {
386
+ throw error;
387
+ }
388
+ throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${error instanceof Error ? error.message : String(error)}`);
389
+ }
390
+ if (!authResult.valid) {
391
+ throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
392
+ }
393
+ if (!authResult.user) {
394
+ throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
395
+ }
396
+ if (!authResult.user.id) {
397
+ throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
398
+ }
399
+ options.context = {
400
+ ...(options.context || {}),
401
+ userId: authResult.user.id,
402
+ userEmail: authResult.user.email,
403
+ userRoles: authResult.user.roles,
404
+ };
405
+ }
406
+ if (!options.requestContext) {
407
+ return;
408
+ }
409
+ const tokenDerivedFields = options.auth?.token && this.authProvider
410
+ ? {
411
+ userId: options.context?.userId,
412
+ userEmail: options.context?.userEmail,
413
+ userRoles: options.context?.userRoles,
414
+ }
415
+ : {};
416
+ options.context = {
417
+ ...(options.context || {}),
418
+ ...options.requestContext,
419
+ ...tokenDerivedFields,
420
+ };
421
+ }
422
+ applyGenerateLifecycleMiddleware(options) {
423
+ if (!options.onFinish && !options.onError) {
424
+ return;
425
+ }
426
+ options.middleware = {
427
+ ...options.middleware,
428
+ middlewareConfig: {
429
+ ...options.middleware?.middlewareConfig,
430
+ lifecycle: {
431
+ ...options.middleware?.middlewareConfig?.lifecycle,
432
+ enabled: true,
433
+ config: {
434
+ ...options.middleware?.middlewareConfig?.lifecycle?.config,
435
+ ...(options.onFinish !== undefined
436
+ ? { onFinish: options.onFinish }
437
+ : {}),
438
+ ...(options.onError !== undefined
439
+ ? { onError: options.onError }
440
+ : {}),
441
+ },
442
+ },
443
+ },
444
+ };
445
+ }
446
+ applyStreamLifecycleMiddleware(options) {
447
+ if (!options.onFinish && !options.onError && !options.onChunk) {
448
+ return;
449
+ }
450
+ options.middleware = {
451
+ ...options.middleware,
452
+ middlewareConfig: {
453
+ ...options.middleware?.middlewareConfig,
454
+ lifecycle: {
455
+ ...options.middleware?.middlewareConfig?.lifecycle,
456
+ enabled: true,
457
+ config: {
458
+ ...options.middleware?.middlewareConfig?.lifecycle?.config,
459
+ ...(options.onFinish !== undefined
460
+ ? { onFinish: options.onFinish }
461
+ : {}),
462
+ ...(options.onError !== undefined
463
+ ? { onError: options.onError }
464
+ : {}),
465
+ ...(options.onChunk !== undefined
466
+ ? { onChunk: options.onChunk }
467
+ : {}),
468
+ },
469
+ },
470
+ },
471
+ };
472
+ }
342
473
  initializeMemoryConfig() {
343
474
  const memory = this.conversationMemoryConfig?.conversationMemory?.memory;
344
475
  if (!memory?.enabled) {
@@ -2336,426 +2467,313 @@ Current user's request: ${currentInput}`;
2336
2467
  * @since 1.0.0
2337
2468
  */
2338
2469
  async generate(optionsOrPrompt) {
2339
- return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, async (generateSpan) => {
2340
- // Set metrics trace context for parent-child span linking.
2341
- // The generation span will be the root (no parentSpanId).
2342
- // Tool spans will be children of the root span via rootSpanId.
2343
- const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
2344
- const metricsRootSpanId = crypto
2345
- .randomUUID()
2346
- .replace(/-/g, "")
2347
- .substring(0, 16);
2348
- // Scope trace context to this request via AsyncLocalStorage
2349
- // so concurrent generate/stream calls don't race.
2350
- return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsRootSpanId }, async () => {
2351
- try {
2352
- const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
2353
- // Convert string prompt to full options
2354
- // Shallow-copy caller's object to avoid mutating their original reference
2355
- const options = typeof optionsOrPrompt === "string"
2356
- ? { input: { text: optionsOrPrompt } }
2357
- : { ...optionsOrPrompt };
2358
- // NL-004: Resolve model aliases/deprecations before processing
2359
- options.model = resolveModel(options.model, this.modelAliasConfig);
2360
- // MCP Enhancement: propagate disableToolCache to tool execution
2361
- this._disableToolCacheForCurrentRequest =
2362
- !!options.disableToolCache;
2363
- // Set span attributes for observability
2364
- generateSpan.setAttribute("neurolink.provider", options.provider || "default");
2365
- generateSpan.setAttribute("neurolink.model", options.model || "default");
2366
- generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
2367
- ? optionsOrPrompt.length
2368
- : options.input?.text?.length || 0);
2369
- generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
2370
- // Validate prompt
2371
- if (!options.input?.text ||
2372
- typeof options.input.text !== "string") {
2373
- throw new Error("Input text is required and must be a non-empty string");
2374
- }
2375
- // Check budget limit before making API call
2376
- if (options.maxBudgetUsd !== undefined &&
2377
- options.maxBudgetUsd > 0 &&
2378
- this._sessionCostUsd >= options.maxBudgetUsd) {
2379
- throw new NeuroLinkError({
2380
- code: "SESSION_BUDGET_EXCEEDED",
2381
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
2382
- category: ErrorCategory.VALIDATION,
2383
- severity: ErrorSeverity.HIGH,
2384
- retriable: false,
2385
- context: {
2386
- spent: this._sessionCostUsd,
2387
- limit: options.maxBudgetUsd,
2388
- },
2389
- });
2390
- }
2391
- // Auto-inject lifecycle middleware when callbacks are provided
2392
- // (must happen before workflow/PPT early returns so those paths get middleware too)
2393
- if (options.onFinish || options.onError) {
2394
- options.middleware = {
2395
- ...options.middleware,
2396
- middlewareConfig: {
2397
- ...options.middleware?.middlewareConfig,
2398
- lifecycle: {
2399
- ...options.middleware?.middlewareConfig?.lifecycle,
2400
- enabled: true,
2401
- config: {
2402
- ...options.middleware?.middlewareConfig?.lifecycle
2403
- ?.config,
2404
- onFinish: options.onFinish,
2405
- onError: options.onError,
2406
- },
2407
- },
2408
- },
2409
- };
2410
- }
2411
- // Handle per-call auth token validation
2412
- if (options.auth?.token) {
2413
- const { AuthError } = await import("./auth/errors.js");
2414
- await this.ensureAuthProvider();
2415
- if (!this.authProvider) {
2416
- throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
2417
- }
2418
- let authResult;
2419
- try {
2420
- authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
2421
- }
2422
- catch (err) {
2423
- // Rethrow auth errors as-is; wrap anything else
2424
- if (err instanceof Error &&
2425
- "feature" in err &&
2426
- err.feature === "Auth") {
2427
- throw err;
2428
- }
2429
- throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${err instanceof Error ? err.message : String(err)}`);
2430
- }
2431
- if (!authResult.valid) {
2432
- throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
2433
- }
2434
- // Fail closed: token valid but no user identity is a provider bug
2435
- if (!authResult.user) {
2436
- throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
2437
- }
2438
- if (!authResult.user.id) {
2439
- throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
2440
- }
2441
- // Merge validated user into context
2442
- options.context = {
2443
- ...(options.context || {}),
2444
- userId: authResult.user.id,
2445
- userEmail: authResult.user.email,
2446
- userRoles: authResult.user.roles,
2447
- };
2448
- }
2449
- // Handle pre-validated requestContext
2450
- if (options.requestContext) {
2451
- // When auth token was validated, token-derived identity fields
2452
- // MUST take precedence over requestContext to prevent privilege escalation.
2453
- const tokenDerivedFields = options.auth?.token && this.authProvider
2454
- ? {
2455
- userId: options.context?.userId,
2456
- userEmail: options.context?.userEmail,
2457
- userRoles: options.context?.userRoles,
2458
- }
2459
- : {};
2460
- options.context = {
2461
- ...(options.context || {}),
2462
- ...options.requestContext,
2463
- ...tokenDerivedFields,
2464
- };
2465
- }
2466
- // Check if workflow is requested
2467
- if (options.workflow || options.workflowConfig) {
2468
- return await this.generateWithWorkflow(options);
2469
- }
2470
- // Check if PPT output mode is requested
2471
- if (options.output?.mode === "ppt") {
2472
- const pptResult = await this.generateWithPPT(options);
2473
- generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
2474
- if (pptResult.analytics) {
2475
- generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
2476
- generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
2477
- generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
2478
- }
2479
- generateSpan.setStatus({ code: SpanStatusCode.OK });
2480
- return pptResult;
2481
- }
2482
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
2483
- return await this.setLangfuseContextFromOptions(options, async () => {
2484
- const startTime = Date.now();
2485
- // Apply orchestration if enabled and no specific provider/model requested
2486
- if (this.enableOrchestration &&
2487
- !options.provider &&
2488
- !options.model) {
2489
- try {
2490
- const orchestratedOptions = await this.applyOrchestration(options);
2491
- logger.debug("Orchestration applied", {
2492
- originalProvider: options.provider || "auto",
2493
- orchestratedProvider: orchestratedOptions.provider,
2494
- orchestratedModel: orchestratedOptions.model,
2495
- prompt: options.input.text.substring(0, 100),
2496
- });
2497
- // Use orchestrated options
2498
- Object.assign(options, orchestratedOptions);
2499
- // Re-resolve model alias in case orchestration returned an alias
2500
- if (orchestratedOptions.model) {
2501
- options.model = resolveModel(options.model, this.modelAliasConfig);
2502
- }
2503
- }
2504
- catch (error) {
2505
- logger.warn("Orchestration failed, continuing with original options", {
2506
- error: error instanceof Error
2507
- ? error.message
2508
- : String(error),
2509
- originalProvider: options.provider || "auto",
2510
- });
2511
- // Continue with original options if orchestration fails
2512
- }
2513
- }
2514
- // Emit generation start event (NeuroLink format - keep existing)
2515
- this.emitter.emit("generation:start", {
2516
- provider: options.provider || "auto",
2517
- timestamp: startTime,
2518
- });
2519
- // ADD: Bedrock-compatible response:start event
2520
- this.emitter.emit("response:start");
2521
- // ADD: Bedrock-compatible message event
2522
- this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
2523
- // Process factory configuration
2524
- const factoryResult = processFactoryOptions(options);
2525
- // Validate factory configuration if present
2526
- if (factoryResult.hasFactoryConfig && options.factoryConfig) {
2527
- const validation = validateFactoryConfig(options.factoryConfig);
2528
- if (!validation.isValid) {
2529
- logger.warn("Invalid factory configuration detected", {
2530
- errors: validation.errors,
2531
- });
2532
- // Continue with warning rather than throwing - graceful degradation
2533
- }
2534
- }
2535
- // RAG Integration: If rag config is provided, prepare the RAG search tool
2536
- if (options.rag?.files?.length) {
2537
- try {
2538
- const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2539
- const ragResult = await prepareRAGTool(options.rag, options.provider);
2540
- // Inject the RAG tool into the tools record
2541
- if (!options.tools) {
2542
- options.tools = {};
2543
- }
2544
- options.tools[ragResult.toolName] = ragResult.tool;
2545
- // Inject RAG-aware system prompt so the AI uses the RAG tool first
2546
- const ragSystemInstruction = [
2547
- `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2548
- `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2549
- `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2550
- `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2551
- `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2552
- ].join(" ");
2553
- options.systemPrompt =
2554
- (options.systemPrompt || "") + ragSystemInstruction;
2555
- logger.info("[RAG] Tool injected into generate()", {
2556
- toolName: ragResult.toolName,
2557
- filesLoaded: ragResult.filesLoaded,
2558
- chunksIndexed: ragResult.chunksIndexed,
2559
- });
2560
- }
2561
- catch (error) {
2562
- logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2563
- error: error instanceof Error
2564
- ? error.message
2565
- : String(error),
2566
- });
2567
- }
2568
- }
2569
- // Memory retrieval for generate path
2570
- if (this.shouldReadMemory(options.memory, options.context?.userId) &&
2571
- options.context?.userId) {
2572
- try {
2573
- options.input.text = await this.retrieveMemory(options.input.text, options.context.userId, options.memory?.additionalUsers);
2574
- logger.debug("Memory retrieval successful (generate)");
2575
- }
2576
- catch (error) {
2577
- logger.warn("Memory retrieval failed (generate):", error);
2578
- }
2579
- }
2580
- // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
2581
- const baseOptions = {
2582
- prompt: options.input.text,
2583
- provider: options.provider,
2584
- model: options.model,
2585
- temperature: options.temperature,
2586
- maxTokens: options.maxTokens,
2587
- systemPrompt: options.systemPrompt,
2588
- schema: options.schema,
2589
- output: options.output,
2590
- tools: options.tools, // Includes RAG tools if rag config was provided
2591
- disableTools: options.disableTools,
2592
- toolFilter: options.toolFilter,
2593
- excludeTools: options.excludeTools,
2594
- maxSteps: options.maxSteps,
2595
- toolChoice: options.toolChoice,
2596
- prepareStep: options.prepareStep,
2597
- enableAnalytics: options.enableAnalytics,
2598
- enableEvaluation: options.enableEvaluation,
2599
- context: options.context,
2600
- evaluationDomain: options.evaluationDomain,
2601
- toolUsageContext: options.toolUsageContext,
2602
- input: options.input, // This includes text, images, and content arrays
2603
- region: options.region,
2604
- tts: options.tts,
2605
- fileRegistry: this.fileRegistry,
2606
- abortSignal: options.abortSignal,
2607
- skipToolPromptInjection: options.skipToolPromptInjection,
2608
- middleware: options.middleware,
2609
- // Pass through conversation messages for task continuation and external callers
2610
- conversationMessages: options.conversationMessages,
2611
- };
2612
- // Auto-map top-level sessionId/userId to context for convenience
2613
- // Tests and users may pass sessionId/userId as top-level options
2614
- const extraContext = options;
2615
- if (extraContext.sessionId || extraContext.userId) {
2616
- baseOptions.context = {
2617
- ...baseOptions.context,
2618
- ...(extraContext.sessionId &&
2619
- !baseOptions.context?.sessionId
2620
- ? { sessionId: extraContext.sessionId }
2621
- : {}),
2622
- ...(extraContext.userId && !baseOptions.context?.userId
2623
- ? { userId: extraContext.userId }
2624
- : {}),
2625
- };
2626
- }
2627
- // Apply factory enhancement using centralized utilities
2628
- const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
2629
- // Pass conversation memory config if available
2630
- if (this.conversationMemory) {
2631
- textOptions.conversationMemoryConfig =
2632
- this.conversationMemory.config;
2633
- // Include original prompt for context summarization
2634
- textOptions.originalPrompt = originalPrompt;
2635
- }
2636
- // Detect and execute domain-specific tools
2637
- const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
2638
- // Update prompt with tool results if available
2639
- if (enhancedPrompt !== textOptions.prompt) {
2640
- textOptions.prompt = enhancedPrompt;
2641
- logger.debug("Enhanced prompt with tool results", {
2642
- originalLength: options.input.text.length,
2643
- enhancedLength: enhancedPrompt.length,
2644
- toolResults: toolResults.length,
2645
- });
2646
- }
2647
- const textResult = await this.generateTextInternal(textOptions);
2648
- // Emit generation completion event (NeuroLink format - enhanced with content)
2649
- this.emitter.emit("generation:end", {
2650
- provider: textResult.provider,
2651
- responseTime: Date.now() - startTime,
2652
- toolsUsed: textResult.toolsUsed,
2653
- timestamp: Date.now(),
2654
- result: textResult, // Enhanced: include full result
2655
- prompt: options.input?.text ||
2656
- options.prompt,
2657
- temperature: textOptions.temperature,
2658
- maxTokens: textOptions.maxTokens,
2659
- });
2660
- // ADD: Bedrock-compatible response:end event with content
2661
- this.emitter.emit("response:end", textResult.content || "");
2662
- // ADD: Bedrock-compatible message event
2663
- this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
2664
- // Convert back to GenerateResult
2665
- const generateResult = {
2666
- content: textResult.content,
2667
- finishReason: textResult.finishReason,
2668
- provider: textResult.provider,
2669
- model: textResult.model,
2670
- usage: textResult.usage
2671
- ? {
2672
- input: textResult.usage.input || 0,
2673
- output: textResult.usage.output || 0,
2674
- total: textResult.usage.total || 0,
2675
- }
2676
- : undefined,
2677
- responseTime: textResult.responseTime,
2678
- toolsUsed: textResult.toolsUsed,
2679
- toolExecutions: transformToolExecutions(textResult.toolExecutions),
2680
- enhancedWithTools: textResult.enhancedWithTools,
2681
- availableTools: transformAvailableTools(textResult.availableTools),
2682
- analytics: textResult.analytics,
2683
- // CRITICAL FIX: Include imageOutput for image generation models
2684
- imageOutput: textResult.imageOutput,
2685
- evaluation: textResult.evaluation
2686
- ? {
2687
- ...textResult.evaluation,
2688
- isOffTopic: textResult.evaluation.isOffTopic ?? false,
2689
- alertSeverity: textResult.evaluation.alertSeverity ??
2690
- "none",
2691
- reasoning: textResult.evaluation.reasoning ??
2692
- "No evaluation provided",
2693
- evaluationModel: textResult.evaluation.evaluationModel ?? "unknown",
2694
- evaluationTime: textResult.evaluation.evaluationTime ?? Date.now(),
2695
- evaluationDomain: textResult.evaluation.evaluationDomain ??
2696
- textOptions.evaluationDomain ??
2697
- factoryResult.domainType,
2698
- }
2699
- : undefined,
2700
- audio: textResult.audio,
2701
- video: textResult.video,
2702
- ppt: textResult.ppt,
2703
- // NL-007: Copy retry metadata from MCP generation path
2704
- ...(textResult.retries && { retries: textResult.retries }),
2705
- };
2706
- // Accumulate session cost for budget tracking
2707
- if (generateResult.analytics?.cost &&
2708
- generateResult.analytics.cost > 0) {
2709
- this._sessionCostUsd += generateResult.analytics.cost;
2710
- }
2711
- this.scheduleGenerateMemoryStorage(options, originalPrompt, generateResult);
2712
- // Set completion span attributes
2713
- generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2714
- generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2715
- generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2716
- generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2717
- generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2718
- generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2719
- // NL-007: Expose retry count in OTel span
2720
- generateSpan.setAttribute("generate.retry_count", generateResult.retries?.count || 0);
2721
- generateSpan.setStatus({ code: SpanStatusCode.OK });
2722
- return generateResult;
2723
- });
2470
+ return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan));
2471
+ }
2472
+ async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
2473
+ return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
2474
+ }
2475
+ async executeGenerateRequest(optionsOrPrompt, generateSpan) {
2476
+ try {
2477
+ const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
2478
+ const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
2479
+ if (earlyResult) {
2480
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2481
+ return earlyResult;
2482
+ }
2483
+ const result = await this.setLangfuseContextFromOptions(options, () => this.runStandardGenerateRequest(options, originalPrompt, generateSpan));
2484
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2485
+ return result;
2486
+ }
2487
+ catch (error) {
2488
+ generateSpan.setStatus({
2489
+ code: SpanStatusCode.ERROR,
2490
+ message: error instanceof Error ? error.message : String(error),
2491
+ });
2492
+ this.emitGenerateErrorEvent(optionsOrPrompt, error);
2493
+ throw error;
2494
+ }
2495
+ finally {
2496
+ this._disableToolCacheForCurrentRequest = false;
2497
+ generateSpan.end();
2498
+ }
2499
+ }
2500
+ async prepareGenerateRequest(optionsOrPrompt, generateSpan) {
2501
+ const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
2502
+ const options = typeof optionsOrPrompt === "string"
2503
+ ? { input: { text: optionsOrPrompt } }
2504
+ : { ...optionsOrPrompt };
2505
+ options.model = resolveModel(options.model, this.modelAliasConfig);
2506
+ this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
2507
+ generateSpan.setAttribute("neurolink.provider", options.provider || "default");
2508
+ generateSpan.setAttribute("neurolink.model", options.model || "default");
2509
+ generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
2510
+ ? optionsOrPrompt.length
2511
+ : options.input?.text?.length || 0);
2512
+ generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
2513
+ this.assertInputText(options.input?.text, "Input text is required and must be a non-empty string");
2514
+ this.enforceSessionBudget(options.maxBudgetUsd);
2515
+ this.applyGenerateLifecycleMiddleware(options);
2516
+ await this.applyAuthenticatedRequestContext(options);
2517
+ return { options, originalPrompt };
2518
+ }
2519
+ async maybeHandleEarlyGenerateResult(options, generateSpan) {
2520
+ if (options.workflow || options.workflowConfig) {
2521
+ return this.generateWithWorkflow(options);
2522
+ }
2523
+ if (options.output?.mode !== "ppt") {
2524
+ return null;
2525
+ }
2526
+ const pptResult = await this.generateWithPPT(options);
2527
+ generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
2528
+ if (pptResult.analytics) {
2529
+ generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
2530
+ generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
2531
+ generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
2532
+ }
2533
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2534
+ return pptResult;
2535
+ }
2536
+ async runStandardGenerateRequest(options, originalPrompt, generateSpan) {
2537
+ const startTime = Date.now();
2538
+ await this.maybeApplyGenerateOrchestration(options);
2539
+ this.emitter.emit("generation:start", {
2540
+ provider: options.provider || "auto",
2541
+ timestamp: startTime,
2542
+ });
2543
+ this.emitter.emit("response:start");
2544
+ this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
2545
+ const factoryResult = processFactoryOptions(options);
2546
+ if (factoryResult.hasFactoryConfig && options.factoryConfig) {
2547
+ const validation = validateFactoryConfig(options.factoryConfig);
2548
+ if (!validation.isValid) {
2549
+ logger.warn("Invalid factory configuration detected", {
2550
+ errors: validation.errors,
2551
+ });
2552
+ }
2553
+ }
2554
+ await this.prepareGenerateAugmentations(options);
2555
+ const textOptions = await this.buildGenerateTextOptions(options, originalPrompt, factoryResult);
2556
+ const textResult = await this.generateTextInternal(textOptions);
2557
+ return this.finalizeGenerateRequestResult({
2558
+ generateSpan,
2559
+ options,
2560
+ textOptions,
2561
+ textResult,
2562
+ factoryResult,
2563
+ originalPrompt,
2564
+ startTime,
2565
+ });
2566
+ }
2567
+ async maybeApplyGenerateOrchestration(options) {
2568
+ if (!this.enableOrchestration || options.provider || options.model) {
2569
+ return;
2570
+ }
2571
+ try {
2572
+ const orchestratedOptions = await this.applyOrchestration(options);
2573
+ logger.debug("Orchestration applied", {
2574
+ originalProvider: options.provider || "auto",
2575
+ orchestratedProvider: orchestratedOptions.provider,
2576
+ orchestratedModel: orchestratedOptions.model,
2577
+ prompt: options.input.text.substring(0, 100),
2578
+ });
2579
+ Object.assign(options, orchestratedOptions);
2580
+ if (orchestratedOptions.model) {
2581
+ options.model = resolveModel(options.model, this.modelAliasConfig);
2582
+ }
2583
+ }
2584
+ catch (error) {
2585
+ logger.warn("Orchestration failed, continuing with original options", {
2586
+ error: error instanceof Error ? error.message : String(error),
2587
+ originalProvider: options.provider || "auto",
2588
+ });
2589
+ }
2590
+ }
2591
+ async prepareGenerateAugmentations(options) {
2592
+ if (options.rag?.files?.length) {
2593
+ try {
2594
+ const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2595
+ const ragResult = await prepareRAGTool(options.rag, options.provider);
2596
+ if (!options.tools) {
2597
+ options.tools = {};
2724
2598
  }
2725
- catch (error) {
2726
- generateSpan.setStatus({
2727
- code: SpanStatusCode.ERROR,
2728
- message: error instanceof Error ? error.message : String(error),
2729
- });
2730
- // Emit generation:end on error so metrics listeners still record the failure.
2731
- // Note: variables declared inside try blocks are not accessible in error
2732
- // handlers, so we extract what we can from the original input.
2733
- const errProvider = typeof optionsOrPrompt === "object"
2734
- ? optionsOrPrompt.provider || "unknown"
2735
- : "unknown";
2736
- const errModel = typeof optionsOrPrompt === "object"
2737
- ? optionsOrPrompt.model || "unknown"
2738
- : "unknown";
2739
- try {
2740
- this.emitter.emit("generation:end", {
2741
- provider: errProvider,
2742
- model: errModel,
2743
- responseTime: 0,
2744
- error: error instanceof Error ? error.message : String(error),
2745
- success: false,
2746
- });
2747
- }
2748
- catch (emitError) {
2749
- void emitError; // non-blocking — error event emission is best-effort
2750
- }
2751
- throw error;
2599
+ options.tools[ragResult.toolName] =
2600
+ ragResult.tool;
2601
+ options.systemPrompt =
2602
+ (options.systemPrompt || "") +
2603
+ [
2604
+ `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2605
+ `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2606
+ `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2607
+ `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2608
+ `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2609
+ ].join(" ");
2610
+ logger.info("[RAG] Tool injected into generate()", {
2611
+ toolName: ragResult.toolName,
2612
+ filesLoaded: ragResult.filesLoaded,
2613
+ chunksIndexed: ragResult.chunksIndexed,
2614
+ });
2615
+ }
2616
+ catch (error) {
2617
+ logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2618
+ error: error instanceof Error ? error.message : String(error),
2619
+ });
2620
+ }
2621
+ }
2622
+ if (!this.shouldReadMemory(options.memory, options.context?.userId) ||
2623
+ !options.context?.userId) {
2624
+ return;
2625
+ }
2626
+ try {
2627
+ options.input.text = await this.retrieveMemory(options.input.text, options.context.userId, options.memory?.additionalUsers);
2628
+ logger.debug("Memory retrieval successful (generate)");
2629
+ }
2630
+ catch (error) {
2631
+ logger.warn("Memory retrieval failed (generate):", error);
2632
+ }
2633
+ }
2634
+ async buildGenerateTextOptions(options, originalPrompt, factoryResult) {
2635
+ const baseOptions = {
2636
+ prompt: options.input.text,
2637
+ provider: options.provider,
2638
+ model: options.model,
2639
+ temperature: options.temperature,
2640
+ maxTokens: options.maxTokens,
2641
+ systemPrompt: options.systemPrompt,
2642
+ schema: options.schema,
2643
+ output: options.output,
2644
+ tools: options.tools,
2645
+ disableTools: options.disableTools,
2646
+ toolFilter: options.toolFilter,
2647
+ excludeTools: options.excludeTools,
2648
+ maxSteps: options.maxSteps,
2649
+ toolChoice: options.toolChoice,
2650
+ prepareStep: options.prepareStep,
2651
+ enableAnalytics: options.enableAnalytics,
2652
+ enableEvaluation: options.enableEvaluation,
2653
+ context: options.context,
2654
+ evaluationDomain: options.evaluationDomain,
2655
+ toolUsageContext: options.toolUsageContext,
2656
+ input: options.input,
2657
+ region: options.region,
2658
+ tts: options.tts,
2659
+ fileRegistry: this.fileRegistry,
2660
+ abortSignal: options.abortSignal,
2661
+ skipToolPromptInjection: options.skipToolPromptInjection,
2662
+ middleware: options.middleware,
2663
+ conversationMessages: options.conversationMessages,
2664
+ };
2665
+ const extraContext = options;
2666
+ if (extraContext.sessionId || extraContext.userId) {
2667
+ baseOptions.context = {
2668
+ ...baseOptions.context,
2669
+ ...(extraContext.sessionId && !baseOptions.context?.sessionId
2670
+ ? { sessionId: extraContext.sessionId }
2671
+ : {}),
2672
+ ...(extraContext.userId && !baseOptions.context?.userId
2673
+ ? { userId: extraContext.userId }
2674
+ : {}),
2675
+ };
2676
+ }
2677
+ const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
2678
+ if (this.conversationMemory) {
2679
+ textOptions.conversationMemoryConfig = this.conversationMemory.config;
2680
+ textOptions.originalPrompt = originalPrompt;
2681
+ }
2682
+ const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
2683
+ if (enhancedPrompt !== textOptions.prompt) {
2684
+ textOptions.prompt = enhancedPrompt;
2685
+ logger.debug("Enhanced prompt with tool results", {
2686
+ originalLength: options.input.text.length,
2687
+ enhancedLength: enhancedPrompt.length,
2688
+ toolResults: toolResults.length,
2689
+ });
2690
+ }
2691
+ return textOptions;
2692
+ }
2693
+ finalizeGenerateRequestResult(params) {
2694
+ const { generateSpan, options, textOptions, textResult, factoryResult, originalPrompt, startTime, } = params;
2695
+ this.emitter.emit("generation:end", {
2696
+ provider: textResult.provider,
2697
+ responseTime: Date.now() - startTime,
2698
+ toolsUsed: textResult.toolsUsed,
2699
+ timestamp: Date.now(),
2700
+ result: textResult,
2701
+ prompt: options.input?.text || options.prompt,
2702
+ temperature: textOptions.temperature,
2703
+ maxTokens: textOptions.maxTokens,
2704
+ });
2705
+ this.emitter.emit("response:end", textResult.content || "");
2706
+ this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
2707
+ const generateResult = {
2708
+ content: textResult.content,
2709
+ finishReason: textResult.finishReason,
2710
+ provider: textResult.provider,
2711
+ model: textResult.model,
2712
+ usage: textResult.usage
2713
+ ? {
2714
+ input: textResult.usage.input || 0,
2715
+ output: textResult.usage.output || 0,
2716
+ total: textResult.usage.total || 0,
2752
2717
  }
2753
- finally {
2754
- this._disableToolCacheForCurrentRequest = false;
2755
- generateSpan.end();
2718
+ : undefined,
2719
+ responseTime: textResult.responseTime,
2720
+ toolsUsed: textResult.toolsUsed,
2721
+ toolExecutions: transformToolExecutions(textResult.toolExecutions),
2722
+ enhancedWithTools: textResult.enhancedWithTools,
2723
+ availableTools: transformAvailableTools(textResult.availableTools),
2724
+ analytics: textResult.analytics,
2725
+ imageOutput: textResult.imageOutput,
2726
+ evaluation: textResult.evaluation
2727
+ ? {
2728
+ ...textResult.evaluation,
2729
+ isOffTopic: textResult.evaluation.isOffTopic ?? false,
2730
+ alertSeverity: textResult.evaluation.alertSeverity ?? "none",
2731
+ reasoning: textResult.evaluation.reasoning ?? "No evaluation provided",
2732
+ evaluationModel: textResult.evaluation.evaluationModel ?? "unknown",
2733
+ evaluationTime: textResult.evaluation.evaluationTime ?? Date.now(),
2734
+ evaluationDomain: textResult.evaluation.evaluationDomain ??
2735
+ textOptions.evaluationDomain ??
2736
+ factoryResult.domainType,
2756
2737
  }
2757
- }); // end metricsTraceContextStorage.run
2758
- });
2738
+ : undefined,
2739
+ audio: textResult.audio,
2740
+ video: textResult.video,
2741
+ ppt: textResult.ppt,
2742
+ ...(textResult.retries && { retries: textResult.retries }),
2743
+ };
2744
+ if (generateResult.analytics?.cost && generateResult.analytics.cost > 0) {
2745
+ this._sessionCostUsd += generateResult.analytics.cost;
2746
+ }
2747
+ this.scheduleGenerateMemoryStorage(options, originalPrompt, generateResult);
2748
+ generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2749
+ generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2750
+ generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2751
+ generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2752
+ generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2753
+ generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2754
+ generateSpan.setAttribute("generate.retry_count", generateResult.retries?.count || 0);
2755
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2756
+ return generateResult;
2757
+ }
2758
+ emitGenerateErrorEvent(optionsOrPrompt, error) {
2759
+ const errProvider = typeof optionsOrPrompt === "object"
2760
+ ? optionsOrPrompt.provider || "unknown"
2761
+ : "unknown";
2762
+ const errModel = typeof optionsOrPrompt === "object"
2763
+ ? optionsOrPrompt.model || "unknown"
2764
+ : "unknown";
2765
+ try {
2766
+ this.emitter.emit("generation:end", {
2767
+ provider: errProvider,
2768
+ model: errModel,
2769
+ responseTime: 0,
2770
+ error: error instanceof Error ? error.message : String(error),
2771
+ success: false,
2772
+ });
2773
+ }
2774
+ catch (emitError) {
2775
+ void emitError;
2776
+ }
2759
2777
  }
2760
2778
  /**
2761
2779
  * Schedule non-blocking memory storage after generate completes.
@@ -3084,253 +3102,247 @@ Current user's request: ${currentInput}`;
3084
3102
  * 5. Store conversation turn for future context
3085
3103
  */
3086
3104
  async generateTextInternal(options) {
3087
- return tracers.sdk.startActiveSpan("neurolink.generateTextInternal", { kind: SpanKind.INTERNAL }, async (internalSpan) => {
3088
- try {
3089
- const generateInternalId = `generate-internal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3090
- const existingRequestId = options.context?.requestId;
3091
- const requestId = typeof existingRequestId === "string" && existingRequestId
3092
- ? existingRequestId
3093
- : `req-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3094
- options.context = { ...options.context, requestId };
3095
- const generateInternalStartTime = Date.now();
3096
- const generateInternalHrTimeStart = process.hrtime.bigint();
3097
- const functionTag = "NeuroLink.generateTextInternal";
3098
- // Set span attributes for internal generation
3099
- internalSpan.setAttribute("neurolink.request_id", requestId);
3100
- internalSpan.setAttribute("neurolink.has_conversation_memory", !!this.conversationMemory);
3101
- internalSpan.setAttribute("neurolink.provider", options.provider || "auto");
3102
- internalSpan.setAttribute("neurolink.model", options.model || "default");
3103
- this.logGenerateTextInternalStart(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, options, functionTag);
3104
- this.emitGenerationStartEvents(options);
3105
- try {
3106
- await this.initializeConversationMemoryForGeneration(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart);
3107
- const mcpResult = await this.attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
3108
- if (mcpResult) {
3109
- logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS (MCP path)`, {
3110
- provider: mcpResult.provider,
3111
- model: mcpResult.model,
3112
- responseTimeMs: Date.now() - generateInternalStartTime,
3113
- tokensUsed: mcpResult.usage?.total || 0,
3114
- toolsUsed: mcpResult.toolsUsed?.length || 0,
3115
- ...(mcpResult.usage?.cacheCreationTokens !== undefined && {
3116
- cacheCreationTokens: mcpResult.usage.cacheCreationTokens,
3117
- }),
3118
- ...(mcpResult.usage?.cacheReadTokens !== undefined && {
3119
- cacheReadTokens: mcpResult.usage.cacheReadTokens,
3120
- }),
3121
- ...(mcpResult.usage?.cacheSavingsPercent !== undefined && {
3122
- cacheSavingsPercent: mcpResult.usage.cacheSavingsPercent,
3123
- }),
3124
- });
3125
- {
3126
- const memStoreStart = Date.now();
3127
- try {
3128
- await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
3129
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.OK);
3130
- }
3131
- catch (memErr) {
3132
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
3133
- }
3134
- }
3135
- this.emitter.emit("response:end", mcpResult.content || "");
3136
- internalSpan.setAttribute("neurolink.path", "mcp");
3137
- internalSpan.setAttribute("neurolink.tokens.input", mcpResult.usage?.input || 0);
3138
- internalSpan.setAttribute("neurolink.tokens.output", mcpResult.usage?.output || 0);
3139
- internalSpan.setAttribute("neurolink.result_provider", mcpResult.provider || "unknown");
3140
- internalSpan.setStatus({ code: SpanStatusCode.OK });
3141
- return mcpResult;
3142
- }
3143
- if (options.abortSignal?.aborted) {
3144
- throw new DOMException("The operation was aborted", "AbortError");
3145
- }
3146
- // Save original messages for smart overflow recovery (Solution 6)
3147
- // directProviderGeneration may compact messages; if provider still rejects,
3148
- // the catch block needs the originals for a more effective retry
3149
- if (this.conversationMemory) {
3150
- const originalMessages = await getConversationMessages(this.conversationMemory, options);
3151
- options._originalConversationMessages = originalMessages
3152
- ? [...originalMessages]
3153
- : undefined;
3154
- }
3155
- const directResult = await this.directProviderGeneration(options);
3156
- logger.debug(`[${functionTag}] Direct generation successful`);
3157
- logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS`, {
3158
- provider: directResult.provider,
3159
- model: directResult.model,
3160
- responseTimeMs: Date.now() - generateInternalStartTime,
3161
- tokensUsed: directResult.usage?.total || 0,
3162
- toolsUsed: directResult.toolsUsed?.length || 0,
3163
- ...(directResult.usage?.cacheCreationTokens !== undefined && {
3164
- cacheCreationTokens: directResult.usage.cacheCreationTokens,
3165
- }),
3166
- ...(directResult.usage?.cacheReadTokens !== undefined && {
3167
- cacheReadTokens: directResult.usage.cacheReadTokens,
3168
- }),
3169
- ...(directResult.usage?.cacheSavingsPercent !== undefined && {
3170
- cacheSavingsPercent: directResult.usage.cacheSavingsPercent,
3171
- }),
3172
- });
3173
- {
3174
- const memStoreStart = Date.now();
3175
- try {
3176
- await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
3177
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.OK);
3178
- }
3179
- catch (memErr) {
3180
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
3181
- }
3182
- }
3183
- this.emitter.emit("response:end", directResult.content || "");
3184
- this.emitter.emit("message", `Text generation completed successfully`);
3185
- internalSpan.setAttribute("neurolink.path", "direct");
3186
- internalSpan.setAttribute("neurolink.tokens.input", directResult.usage?.input || 0);
3187
- internalSpan.setAttribute("neurolink.tokens.output", directResult.usage?.output || 0);
3188
- internalSpan.setAttribute("neurolink.result_provider", directResult.provider || "unknown");
3189
- internalSpan.setStatus({ code: SpanStatusCode.OK });
3190
- return directResult;
3191
- }
3192
- catch (error) {
3193
- // Check if this is a context overflow error - attempt recovery
3194
- if (isContextOverflowError(error) && this.conversationMemory) {
3195
- logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
3196
- error: error instanceof Error ? error.message : String(error),
3197
- overflowProvider: getContextOverflowProvider(error),
3198
- });
3199
- try {
3200
- // IMPROVEMENT 1: Extract actual token count from provider error if available
3201
- const actualOverflow = parseProviderOverflowDetails(error);
3202
- // IMPROVEMENT 2: Use ORIGINAL messages (not already-compacted ones)
3203
- const originalMessages = options._originalConversationMessages ??
3204
- (await getConversationMessages(this.conversationMemory, options));
3205
- // IMPROVEMENT 3: Calculate precise reduction target
3206
- const recoveryBudget = checkContextBudget({
3207
- provider: options.provider || "openai",
3208
- model: options.model,
3209
- maxTokens: options.maxTokens,
3210
- currentPrompt: options.prompt,
3211
- systemPrompt: options.systemPrompt,
3212
- });
3213
- // Use provider's reported token count if available (more accurate than our estimate)
3214
- const actualTokens = actualOverflow?.actualTokens ??
3215
- recoveryBudget.estimatedInputTokens;
3216
- const budgetTokens = actualOverflow?.budgetTokens ??
3217
- recoveryBudget.availableInputTokens;
3218
- // Target = 70% of budget (aggressive safety margin for recovery)
3219
- const compactionTarget = Math.floor(budgetTokens * 0.7);
3220
- // IMPROVEMENT 4: Calculate adaptive truncation fraction from actual numbers
3221
- const requiredReduction = actualTokens > 0
3222
- ? (actualTokens - compactionTarget) / actualTokens
3223
- : 0.5;
3224
- const compactor = new ContextCompactor({
3225
- enableSummarize: false, // Skip LLM call for recovery (speed)
3226
- enablePrune: true,
3227
- enableDeduplicate: true,
3228
- enableTruncate: true,
3229
- truncationFraction: Math.min(0.9, requiredReduction + 0.15),
3230
- });
3231
- const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3232
- if (compactionResult.compacted) {
3233
- const repairedResult = repairToolPairs(compactionResult.messages);
3234
- // IMPROVEMENT 5: Verify BEFORE retrying
3235
- const verifyBudget = checkContextBudget({
3236
- provider: options.provider || "openai",
3237
- model: options.model,
3238
- maxTokens: options.maxTokens,
3239
- systemPrompt: options.systemPrompt,
3240
- currentPrompt: options.prompt,
3241
- conversationMessages: repairedResult.messages,
3242
- });
3243
- if (!verifyBudget.withinBudget) {
3244
- logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
3245
- estimatedTokens: verifyBudget.estimatedInputTokens,
3246
- availableTokens: verifyBudget.availableInputTokens,
3247
- });
3248
- throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
3249
- `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
3250
- `but budget is ${budgetTokens} tokens.`, {
3251
- estimatedTokens: compactionResult.tokensAfter,
3252
- availableTokens: budgetTokens,
3253
- stagesUsed: compactionResult.stagesUsed,
3254
- breakdown: verifyBudget.breakdown,
3255
- });
3256
- }
3257
- logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
3258
- tokensSaved: compactionResult.tokensSaved,
3259
- compactionTarget,
3260
- verifiedTokens: verifyBudget.estimatedInputTokens,
3261
- verifiedBudget: verifyBudget.availableInputTokens,
3262
- });
3263
- // Single verified retry
3264
- return await this.directProviderGeneration({
3265
- ...options,
3266
- conversationMessages: repairedResult.messages,
3267
- });
3268
- }
3269
- }
3270
- catch (retryError) {
3271
- // If the retry error is our own ContextBudgetExceededError, re-throw it
3272
- if (retryError instanceof ContextBudgetExceededError) {
3273
- throw retryError;
3274
- }
3275
- logger.error(`[${functionTag}] Recovery attempt failed`, {
3276
- error: retryError instanceof Error
3277
- ? retryError.message
3278
- : String(retryError),
3279
- });
3280
- }
3281
- }
3282
- // If the generation was aborted (e.g., coding task short-circuit via AbortController),
3283
- // still store the conversation turn so that:
3284
- // 1. The Redis conversation entry is created (if first turn)
3285
- // 2. setImmediate triggers generateConversationTitle() for the session
3286
- // 3. The caller's syncTitleFromRedis() can find the SDK-generated title
3287
- if (isAbortError(error)) {
3288
- logger.info(`[${functionTag}] Generation aborted — storing conversation turn for title generation`, {
3289
- hasMemory: !!this.conversationMemory,
3290
- memoryType: this.conversationMemory?.constructor?.name || "NONE",
3291
- sessionId: options.context?.sessionId ||
3292
- "unknown",
3293
- });
3294
- try {
3295
- const abortedResult = {
3296
- content: "[generation was interrupted]",
3297
- provider: options.provider || "unknown",
3298
- model: options.model || "unknown",
3299
- responseTime: Date.now() - generateInternalStartTime,
3300
- };
3301
- await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(generateInternalStartTime), requestId), 5000);
3302
- }
3303
- catch (storeError) {
3304
- logger.warn(`[${functionTag}] Failed to store conversation turn after abort`, {
3305
- error: storeError instanceof Error
3306
- ? storeError.message
3307
- : String(storeError),
3308
- });
3309
- }
3310
- }
3311
- else {
3312
- logger.error(`[${functionTag}] All generation methods failed`, {
3313
- error: error instanceof Error ? error.message : String(error),
3314
- });
3315
- }
3316
- this.emitter.emit("response:end", "");
3317
- this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
3318
- throw error;
3319
- }
3320
- }
3321
- catch (spanError) {
3322
- internalSpan.setStatus({
3323
- code: SpanStatusCode.ERROR,
3324
- message: spanError instanceof Error
3325
- ? spanError.message
3326
- : String(spanError),
3105
+ return tracers.sdk.startActiveSpan("neurolink.generateTextInternal", { kind: SpanKind.INTERNAL }, (internalSpan) => this.executeGenerateTextInternalWithSpan(options, internalSpan));
3106
+ }
3107
+ async executeGenerateTextInternalWithSpan(options, internalSpan) {
3108
+ try {
3109
+ const context = this.initializeGenerateTextInternalContext(options);
3110
+ internalSpan.setAttribute("neurolink.request_id", context.requestId);
3111
+ internalSpan.setAttribute("neurolink.has_conversation_memory", !!this.conversationMemory);
3112
+ internalSpan.setAttribute("neurolink.provider", options.provider || "auto");
3113
+ internalSpan.setAttribute("neurolink.model", options.model || "default");
3114
+ this.logGenerateTextInternalStart(context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart, options, context.functionTag);
3115
+ this.emitGenerationStartEvents(options);
3116
+ return await this.runGenerateTextInternalFlow(options, internalSpan, context);
3117
+ }
3118
+ catch (error) {
3119
+ internalSpan.setStatus({
3120
+ code: SpanStatusCode.ERROR,
3121
+ message: error instanceof Error ? error.message : String(error),
3122
+ });
3123
+ throw error;
3124
+ }
3125
+ finally {
3126
+ internalSpan.end();
3127
+ }
3128
+ }
3129
+ initializeGenerateTextInternalContext(options) {
3130
+ const generateInternalId = `generate-internal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3131
+ const existingRequestId = options.context?.requestId;
3132
+ const requestId = typeof existingRequestId === "string" && existingRequestId
3133
+ ? existingRequestId
3134
+ : `req-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3135
+ options.context = { ...options.context, requestId };
3136
+ return {
3137
+ generateInternalId,
3138
+ generateInternalStartTime: Date.now(),
3139
+ generateInternalHrTimeStart: process.hrtime.bigint(),
3140
+ functionTag: "NeuroLink.generateTextInternal",
3141
+ requestId,
3142
+ };
3143
+ }
3144
+ async runGenerateTextInternalFlow(options, internalSpan, context) {
3145
+ try {
3146
+ await this.initializeConversationMemoryForGeneration(context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart);
3147
+ const mcpResult = await this.attemptMCPGeneration(options, context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart, context.functionTag);
3148
+ if (mcpResult) {
3149
+ return this.finalizeGenerateTextInternalResult({
3150
+ path: "mcp",
3151
+ result: mcpResult,
3152
+ options,
3153
+ internalSpan,
3154
+ requestId: context.requestId,
3155
+ startTime: context.generateInternalStartTime,
3327
3156
  });
3328
- throw spanError;
3329
3157
  }
3330
- finally {
3331
- internalSpan.end();
3158
+ if (options.abortSignal?.aborted) {
3159
+ throw new DOMException("The operation was aborted", "AbortError");
3160
+ }
3161
+ await this.captureOriginalConversationMessagesForRecovery(options);
3162
+ const directResult = await this.directProviderGeneration(options);
3163
+ logger.debug(`[${context.functionTag}] Direct generation successful`);
3164
+ return this.finalizeGenerateTextInternalResult({
3165
+ path: "direct",
3166
+ result: directResult,
3167
+ options,
3168
+ internalSpan,
3169
+ requestId: context.requestId,
3170
+ startTime: context.generateInternalStartTime,
3171
+ });
3172
+ }
3173
+ catch (error) {
3174
+ const recoveredResult = await this.handleGenerateTextInternalFailure(options, context, error);
3175
+ if (recoveredResult) {
3176
+ return recoveredResult;
3332
3177
  }
3178
+ throw error;
3179
+ }
3180
+ }
3181
+ async captureOriginalConversationMessagesForRecovery(options) {
3182
+ if (!this.conversationMemory) {
3183
+ return;
3184
+ }
3185
+ const originalMessages = await getConversationMessages(this.conversationMemory, options);
3186
+ options._originalConversationMessages = originalMessages
3187
+ ? [...originalMessages]
3188
+ : undefined;
3189
+ }
3190
+ async finalizeGenerateTextInternalResult(params) {
3191
+ const { path, result, options, internalSpan, requestId, startTime } = params;
3192
+ logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS${path === "mcp" ? " (MCP path)" : ""}`, {
3193
+ provider: result.provider,
3194
+ model: result.model,
3195
+ responseTimeMs: Date.now() - startTime,
3196
+ tokensUsed: result.usage?.total || 0,
3197
+ toolsUsed: result.toolsUsed?.length || 0,
3198
+ ...(result.usage?.cacheCreationTokens !== undefined && {
3199
+ cacheCreationTokens: result.usage.cacheCreationTokens,
3200
+ }),
3201
+ ...(result.usage?.cacheReadTokens !== undefined && {
3202
+ cacheReadTokens: result.usage.cacheReadTokens,
3203
+ }),
3204
+ ...(result.usage?.cacheSavingsPercent !== undefined && {
3205
+ cacheSavingsPercent: result.usage.cacheSavingsPercent,
3206
+ }),
3333
3207
  });
3208
+ const memStoreStart = Date.now();
3209
+ try {
3210
+ await storeConversationTurn(this.conversationMemory, options, result, new Date(startTime), requestId);
3211
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": path }, Date.now() - memStoreStart, SpanStatus.OK);
3212
+ }
3213
+ catch (memoryError) {
3214
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": path }, Date.now() - memStoreStart, SpanStatus.ERROR, memoryError instanceof Error
3215
+ ? memoryError.message
3216
+ : String(memoryError));
3217
+ }
3218
+ this.emitter.emit("response:end", result.content || "");
3219
+ if (path === "direct") {
3220
+ this.emitter.emit("message", "Text generation completed successfully");
3221
+ }
3222
+ internalSpan.setAttribute("neurolink.path", path);
3223
+ internalSpan.setAttribute("neurolink.tokens.input", result.usage?.input || 0);
3224
+ internalSpan.setAttribute("neurolink.tokens.output", result.usage?.output || 0);
3225
+ internalSpan.setAttribute("neurolink.result_provider", result.provider || "unknown");
3226
+ internalSpan.setStatus({ code: SpanStatusCode.OK });
3227
+ return result;
3228
+ }
3229
+ async handleGenerateTextInternalFailure(options, context, error) {
3230
+ const recoveredResult = await this.tryRecoverGenerateTextOverflow(options, context.functionTag, error);
3231
+ if (recoveredResult) {
3232
+ return recoveredResult;
3233
+ }
3234
+ if (isAbortError(error)) {
3235
+ logger.info(`[${context.functionTag}] Generation aborted — storing conversation turn for title generation`, {
3236
+ hasMemory: !!this.conversationMemory,
3237
+ memoryType: this.conversationMemory?.constructor?.name || "NONE",
3238
+ sessionId: options.context?.sessionId ||
3239
+ "unknown",
3240
+ });
3241
+ try {
3242
+ const abortedResult = {
3243
+ content: "[generation was interrupted]",
3244
+ provider: options.provider || "unknown",
3245
+ model: options.model || "unknown",
3246
+ responseTime: Date.now() - context.generateInternalStartTime,
3247
+ };
3248
+ await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(context.generateInternalStartTime), context.requestId), 5000);
3249
+ }
3250
+ catch (storeError) {
3251
+ logger.warn(`[${context.functionTag}] Failed to store conversation turn after abort`, {
3252
+ error: storeError instanceof Error
3253
+ ? storeError.message
3254
+ : String(storeError),
3255
+ });
3256
+ }
3257
+ }
3258
+ else {
3259
+ logger.error(`[${context.functionTag}] All generation methods failed`, {
3260
+ error: error instanceof Error ? error.message : String(error),
3261
+ });
3262
+ }
3263
+ this.emitter.emit("response:end", "");
3264
+ this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
3265
+ return null;
3266
+ }
3267
+ async tryRecoverGenerateTextOverflow(options, functionTag, error) {
3268
+ if (!isContextOverflowError(error) || !this.conversationMemory) {
3269
+ return null;
3270
+ }
3271
+ logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
3272
+ error: error instanceof Error ? error.message : String(error),
3273
+ overflowProvider: getContextOverflowProvider(error),
3274
+ });
3275
+ try {
3276
+ const actualOverflow = parseProviderOverflowDetails(error);
3277
+ const originalMessages = options._originalConversationMessages ??
3278
+ (await getConversationMessages(this.conversationMemory, options));
3279
+ const recoveryBudget = checkContextBudget({
3280
+ provider: options.provider || "openai",
3281
+ model: options.model,
3282
+ maxTokens: options.maxTokens,
3283
+ currentPrompt: options.prompt,
3284
+ systemPrompt: options.systemPrompt,
3285
+ });
3286
+ const actualTokens = actualOverflow?.actualTokens ?? recoveryBudget.estimatedInputTokens;
3287
+ const budgetTokens = actualOverflow?.budgetTokens ?? recoveryBudget.availableInputTokens;
3288
+ const compactionTarget = Math.floor(budgetTokens * 0.7);
3289
+ const requiredReduction = actualTokens > 0
3290
+ ? (actualTokens - compactionTarget) / actualTokens
3291
+ : 0.5;
3292
+ const compactor = new ContextCompactor({
3293
+ enableSummarize: false,
3294
+ enablePrune: true,
3295
+ enableDeduplicate: true,
3296
+ enableTruncate: true,
3297
+ truncationFraction: Math.min(0.9, requiredReduction + 0.15),
3298
+ });
3299
+ const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3300
+ if (!compactionResult.compacted) {
3301
+ return null;
3302
+ }
3303
+ const repairedResult = repairToolPairs(compactionResult.messages);
3304
+ const verifyBudget = checkContextBudget({
3305
+ provider: options.provider || "openai",
3306
+ model: options.model,
3307
+ maxTokens: options.maxTokens,
3308
+ systemPrompt: options.systemPrompt,
3309
+ currentPrompt: options.prompt,
3310
+ conversationMessages: repairedResult.messages,
3311
+ });
3312
+ if (!verifyBudget.withinBudget) {
3313
+ logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
3314
+ estimatedTokens: verifyBudget.estimatedInputTokens,
3315
+ availableTokens: verifyBudget.availableInputTokens,
3316
+ });
3317
+ throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
3318
+ `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
3319
+ `but budget is ${budgetTokens} tokens.`, {
3320
+ estimatedTokens: compactionResult.tokensAfter,
3321
+ availableTokens: budgetTokens,
3322
+ stagesUsed: compactionResult.stagesUsed,
3323
+ breakdown: verifyBudget.breakdown,
3324
+ });
3325
+ }
3326
+ logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
3327
+ tokensSaved: compactionResult.tokensSaved,
3328
+ compactionTarget,
3329
+ verifiedTokens: verifyBudget.estimatedInputTokens,
3330
+ verifiedBudget: verifyBudget.availableInputTokens,
3331
+ });
3332
+ return this.directProviderGeneration({
3333
+ ...options,
3334
+ conversationMessages: repairedResult.messages,
3335
+ });
3336
+ }
3337
+ catch (retryError) {
3338
+ if (retryError instanceof ContextBudgetExceededError) {
3339
+ throw retryError;
3340
+ }
3341
+ logger.error(`[${functionTag}] Recovery attempt failed`, {
3342
+ error: retryError instanceof Error ? retryError.message : String(retryError),
3343
+ });
3344
+ return null;
3345
+ }
3334
3346
  }
3335
3347
  /**
3336
3348
  * Log generateTextInternal start with comprehensive analysis
@@ -3529,292 +3541,21 @@ Current user's request: ${currentInput}`;
3529
3541
  const tryMCPHrTimeStart = process.hrtime.bigint();
3530
3542
  const functionTag = "NeuroLink.tryMCPGeneration";
3531
3543
  try {
3532
- // Initialize MCP if needed
3533
- await this.initializeMCP();
3534
- if (!this.mcpInitialized) {
3535
- logger.warn(`[NeuroLink] ⚠️ LOG_POINT_T004_MCP_NOT_AVAILABLE`, {
3536
- logPoint: "T004_MCP_NOT_AVAILABLE",
3537
- tryMCPId,
3538
- timestamp: new Date().toISOString(),
3539
- elapsedMs: Date.now() - tryMCPStartTime,
3540
- elapsedNs: (process.hrtime.bigint() - tryMCPHrTimeStart).toString(),
3541
- mcpInitialized: this.mcpInitialized,
3542
- mcpComponents: {
3543
- hasExternalServerManager: !!this.externalServerManager,
3544
- hasToolRegistry: !!this.toolRegistry,
3545
- hasProviderRegistry: !!AIProviderFactory,
3546
- },
3547
- fallbackReason: "MCP_NOT_INITIALIZED",
3548
- message: "MCP not available - returning null for fallback to direct generation",
3549
- });
3550
- return null; // Skip MCP if not available
3551
- }
3552
- // Context creation removed - was never used
3553
- // Determine provider
3554
- const providerName = options.provider === "auto" || !options.provider
3555
- ? await getBestProvider()
3556
- : options.provider;
3557
- // Get available tools
3558
- let availableTools = await this.getAllAvailableTools();
3559
- // NL-001: Filter out tools with OPEN circuit breakers
3560
- const { tools: circuitBreakerFilteredTools, unavailableTools } = this.toolRegistry.getAvailableTools(this.toolCircuitBreakers);
3561
- // Intersect: keep only tools that pass both getAllAvailableTools and circuit breaker filtering
3562
- const cbFilteredNames = new Set(circuitBreakerFilteredTools.map((t) => t.name));
3563
- availableTools = availableTools.filter((t) => cbFilteredNames.has(t.name));
3564
- // Apply per-call tool filtering for system prompt tool descriptions
3565
- availableTools = this.applyToolInfoFiltering(availableTools, options);
3566
- const targetTool = availableTools.find((t) => t.name.includes("SuccessRateSRByTime") ||
3567
- t.name.includes("juspay-analytics"));
3568
- logger.debug("Available tools for AI prompt generation", {
3569
- toolsCount: availableTools.length,
3570
- toolNames: availableTools.map((t) => t.name),
3571
- unavailableToolsCount: unavailableTools.length,
3572
- unavailableTools: unavailableTools,
3573
- hasTargetTool: !!targetTool,
3574
- targetToolDetails: targetTool
3575
- ? {
3576
- name: targetTool.name,
3577
- description: targetTool.description,
3578
- server: targetTool.server,
3579
- }
3580
- : null,
3581
- });
3582
- // NL-001: Inject system note about unavailable tools
3583
- let circuitBreakerNote = "";
3584
- if (unavailableTools.length > 0) {
3585
- circuitBreakerNote = `\n\nNOTE: The following tools are temporarily unavailable due to repeated failures: ${unavailableTools.join(", ")}. Do not attempt to call these tools.`;
3586
- }
3587
- // Create tool-aware system prompt (skip if skipToolPromptInjection is true)
3588
- const enhancedSystemPrompt = options.skipToolPromptInjection
3589
- ? (options.systemPrompt || "") + circuitBreakerNote
3590
- : this.createToolAwareSystemPrompt(options.systemPrompt, availableTools) + circuitBreakerNote;
3591
- logger.debug("Tool-aware system prompt created", {
3592
- requestId,
3593
- originalPromptLength: options.systemPrompt?.length || 0,
3594
- enhancedPromptLength: enhancedSystemPrompt.length,
3595
- skippedToolInjection: !!options.skipToolPromptInjection,
3596
- enhancedPromptPreview: enhancedSystemPrompt.substring(0, 80) + "...",
3597
- });
3598
- logger.debug("[Observability] System prompt metadata", {
3599
- requestId,
3600
- systemPromptLength: enhancedSystemPrompt.length,
3601
- systemPromptHash: enhancedSystemPrompt.length > 0
3602
- ? `sha256:${enhancedSystemPrompt.slice(0, 8)}...`
3603
- : "empty",
3604
- hasCustomSystemPrompt: !!options.systemPrompt,
3605
- });
3606
- // Get conversation messages for context
3607
- let conversationMessages = await getConversationMessages(this.conversationMemory, options);
3608
- if (logger.shouldLog("debug")) {
3609
- try {
3610
- logger.debug("[Observability] Conversation history summary", {
3611
- requestId,
3612
- messageCount: conversationMessages?.length || 0,
3613
- messages: conversationMessages?.map((msg, i) => {
3614
- let contentLength;
3615
- if (typeof msg.content === "string") {
3616
- contentLength = msg.content.length;
3617
- }
3618
- else {
3619
- try {
3620
- contentLength = JSON.stringify(msg.content).length;
3621
- }
3622
- catch {
3623
- contentLength = 0;
3624
- }
3625
- }
3626
- return {
3627
- index: i,
3628
- role: msg.role,
3629
- contentLength,
3630
- contentPreview: typeof msg.content === "string"
3631
- ? msg.content.substring(0, 200)
3632
- : "[multimodal]",
3633
- };
3634
- }),
3635
- });
3636
- }
3637
- catch {
3638
- // Ignore serialization errors in debug logging
3639
- }
3640
- }
3641
- logger.debug("[Observability] Available tools for LLM", {
3642
- requestId,
3643
- toolCount: availableTools?.length || 0,
3644
- toolNames: availableTools?.map((t) => t.name) || [],
3645
- });
3646
- // Pre-generation budget check
3647
- const budgetResult = checkContextBudget({
3648
- provider: providerName,
3649
- model: options.model,
3650
- maxTokens: options.maxTokens,
3651
- systemPrompt: enhancedSystemPrompt,
3652
- conversationMessages: conversationMessages,
3653
- currentPrompt: options.prompt,
3654
- toolDefinitions: availableTools,
3655
- });
3656
- logger.info("[TokenBudget] Token breakdown", {
3657
- requestId,
3658
- system: budgetResult.breakdown?.systemPrompt || 0,
3659
- history: budgetResult.breakdown?.conversationHistory || 0,
3660
- tools: budgetResult.breakdown?.toolDefinitions || 0,
3661
- currentPrompt: budgetResult.breakdown?.currentPrompt || 0,
3662
- files: budgetResult.breakdown?.fileAttachments || 0,
3663
- total: budgetResult.estimatedInputTokens,
3664
- budget: budgetResult.availableInputTokens,
3665
- usagePercent: Math.round(budgetResult.usageRatio * 1000) / 10,
3666
- conversationMessageCount: conversationMessages?.length || 0,
3667
- shouldCompact: budgetResult.shouldCompact,
3668
- });
3669
- const messageCount = conversationMessages?.length || 0;
3670
- const compactionSessionId = this.getCompactionSessionId(options);
3671
- if (budgetResult.shouldCompact &&
3672
- this.conversationMemory &&
3673
- messageCount >
3674
- (this.lastCompactionMessageCount.get(compactionSessionId) ?? 0)) {
3675
- logger.info("[NeuroLink] Context budget exceeded, triggering auto-compaction", {
3676
- usageRatio: budgetResult.usageRatio,
3677
- estimatedTokens: budgetResult.estimatedInputTokens,
3678
- availableTokens: budgetResult.availableInputTokens,
3679
- });
3680
- const compactor = new ContextCompactor({
3681
- provider: providerName,
3682
- summarizationProvider: this.conversationMemoryConfig?.conversationMemory
3683
- ?.summarizationProvider,
3684
- summarizationModel: this.conversationMemoryConfig?.conversationMemory
3685
- ?.summarizationModel,
3686
- });
3687
- const compactionResult = await compactor.compact(conversationMessages, budgetResult.availableInputTokens, this.conversationMemoryConfig?.conversationMemory, requestId);
3688
- if (compactionResult.compacted) {
3689
- const repairedResult = repairToolPairs(compactionResult.messages);
3690
- conversationMessages = repairedResult.messages;
3691
- this.lastCompactionMessageCount.set(compactionSessionId, conversationMessages.length);
3692
- logger.info("[NeuroLink] Context compacted successfully", {
3693
- stagesUsed: compactionResult.stagesUsed,
3694
- tokensSaved: compactionResult.tokensSaved,
3695
- });
3696
- }
3697
- // POST-COMPACTION BUDGET RE-CHECK (BUG-003 fix)
3698
- const postCompactBudget = checkContextBudget({
3699
- provider: providerName,
3700
- model: options.model,
3701
- maxTokens: options.maxTokens,
3702
- systemPrompt: enhancedSystemPrompt,
3703
- conversationMessages: conversationMessages,
3704
- currentPrompt: options.prompt,
3705
- toolDefinitions: availableTools,
3706
- });
3707
- if (!postCompactBudget.withinBudget) {
3708
- const overageRatio = postCompactBudget.usageRatio - 1.0;
3709
- logger.warn("[NeuroLink] Post-compaction still over budget, attempting emergency content truncation", {
3710
- requestId,
3711
- estimatedTokens: postCompactBudget.estimatedInputTokens,
3712
- availableTokens: postCompactBudget.availableInputTokens,
3713
- overagePercent: Math.round(overageRatio * 100),
3714
- stagesUsedInCompaction: compactionResult.stagesUsed,
3715
- });
3716
- // Emergency: truncate the content of the longest messages
3717
- conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
3718
- // Final check after emergency truncation
3719
- const finalBudget = checkContextBudget({
3720
- provider: providerName,
3721
- model: options.model,
3722
- maxTokens: options.maxTokens,
3723
- systemPrompt: enhancedSystemPrompt,
3724
- conversationMessages: conversationMessages,
3725
- currentPrompt: options.prompt,
3726
- toolDefinitions: availableTools,
3727
- });
3728
- if (!finalBudget.withinBudget) {
3729
- throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3730
- `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3731
- `Budget: ${finalBudget.availableInputTokens} tokens. ` +
3732
- `Conversation is too large to fit in the model's context window.`, {
3733
- estimatedTokens: finalBudget.estimatedInputTokens,
3734
- availableTokens: finalBudget.availableInputTokens,
3735
- stagesUsed: compactionResult.stagesUsed,
3736
- breakdown: finalBudget.breakdown,
3737
- });
3738
- }
3739
- }
3544
+ const generationContext = await this.prepareMCPGenerationContext(options, requestId, tryMCPId, tryMCPStartTime, tryMCPHrTimeStart);
3545
+ if (!generationContext) {
3546
+ return null;
3740
3547
  }
3741
- // Create provider and generate (with confidence that context fits)
3742
- const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
3743
- this, // Pass SDK instance
3744
- options.region);
3745
- // Propagate trace context for parent-child span hierarchy
3746
- provider.setTraceContext(this._metricsTraceContext);
3747
- // ADD: Emit connection events for all providers (Bedrock-compatible)
3748
- this.emitter.emit("connected");
3749
- this.emitter.emit("message", `${providerName} provider initialized successfully`);
3750
- // Enable tool execution for the provider using BaseProvider method
3751
- provider.setupToolExecutor({
3752
- customTools: this.getCustomTools(),
3753
- executeTool: (toolName, params) => this.executeTool(toolName, params, {
3754
- disableToolCache: options.disableToolCache,
3755
- }),
3756
- }, functionTag);
3757
- logger.debug("[Observability] User input to LLM", {
3548
+ const conversationMessages = await this.ensureMCPGenerationBudget(options, requestId, generationContext.providerName, generationContext.enhancedSystemPrompt, generationContext.availableTools, generationContext.conversationMessages);
3549
+ return this.generateWithMCPProvider({
3550
+ options,
3758
3551
  requestId,
3759
- promptPreview: options.prompt?.substring(0, 200),
3760
- promptLength: options.prompt?.length || 0,
3761
- model: options.model,
3762
- maxTokens: options.maxTokens,
3763
- temperature: options.temperature,
3764
- maxSteps: options.maxSteps,
3765
- skipToolPromptInjection: options.skipToolPromptInjection,
3766
- });
3767
- const result = await provider.generate({
3768
- ...options,
3769
- systemPrompt: enhancedSystemPrompt,
3770
- conversationMessages, // Inject conversation history
3771
- });
3772
- const responseTime = Date.now() - tryMCPStartTime;
3773
- // Enhanced result validation - consider tool executions as valid results
3774
- const hasContent = result && result.content && result.content.trim().length > 0;
3775
- const hasToolExecutions = result && result.toolExecutions && result.toolExecutions.length > 0;
3776
- // Log detailed result analysis for debugging
3777
- mcpLogger.debug(`[${functionTag}] Result validation:`, {
3778
- hasResult: !!result,
3779
- hasContent,
3780
- hasToolExecutions,
3781
- contentLength: result?.content?.length || 0,
3782
- toolExecutionsCount: result?.toolExecutions?.length || 0,
3783
- toolsUsedCount: result?.toolsUsed?.length || 0,
3784
- });
3785
- // Accept result if it has content OR successful tool executions
3786
- if (!hasContent && !hasToolExecutions) {
3787
- mcpLogger.debug(`[${functionTag}] Result rejected: no content and no tool executions`);
3788
- return null; // Let caller fall back to direct generation
3789
- }
3790
- // Transform tool executions with enhanced preservation
3791
- const transformedToolExecutions = transformToolExecutionsForMCP(result.toolExecutions);
3792
- // Log transformation results
3793
- mcpLogger.debug(`[${functionTag}] Tool execution transformation:`, {
3794
- originalCount: result?.toolExecutions?.length || 0,
3795
- transformedCount: transformedToolExecutions.length,
3796
- transformedTools: transformedToolExecutions.map((te) => te.toolName),
3552
+ functionTag,
3553
+ tryMCPStartTime,
3554
+ providerName: generationContext.providerName,
3555
+ availableTools: generationContext.availableTools,
3556
+ enhancedSystemPrompt: generationContext.enhancedSystemPrompt,
3557
+ conversationMessages,
3797
3558
  });
3798
- // Return enhanced result with preserved tool information
3799
- return {
3800
- content: result.content || "", // Ensure content is never undefined
3801
- provider: providerName,
3802
- model: result.model,
3803
- usage: result.usage,
3804
- responseTime,
3805
- finishReason: result.finishReason,
3806
- toolsUsed: result.toolsUsed || [],
3807
- toolExecutions: transformedToolExecutions,
3808
- enhancedWithTools: Boolean(hasToolExecutions), // Mark as enhanced if tools were actually used
3809
- availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
3810
- audio: result.audio,
3811
- video: result.video,
3812
- ppt: result.ppt,
3813
- imageOutput: result.imageOutput,
3814
- // Include analytics and evaluation from BaseProvider
3815
- analytics: result.analytics,
3816
- evaluation: result.evaluation,
3817
- };
3818
3559
  }
3819
3560
  catch (error) {
3820
3561
  // Immediately propagate AbortError — never swallow aborted requests
@@ -3843,6 +3584,299 @@ Current user's request: ${currentInput}`;
3843
3584
  return null; // Let caller fall back
3844
3585
  }
3845
3586
  }
3587
+ async prepareMCPGenerationContext(options, requestId, tryMCPId, tryMCPStartTime, tryMCPHrTimeStart) {
3588
+ await this.initializeMCP();
3589
+ if (!this.mcpInitialized) {
3590
+ logger.warn(`[NeuroLink] ⚠️ LOG_POINT_T004_MCP_NOT_AVAILABLE`, {
3591
+ logPoint: "T004_MCP_NOT_AVAILABLE",
3592
+ tryMCPId,
3593
+ timestamp: new Date().toISOString(),
3594
+ elapsedMs: Date.now() - tryMCPStartTime,
3595
+ elapsedNs: (process.hrtime.bigint() - tryMCPHrTimeStart).toString(),
3596
+ mcpInitialized: this.mcpInitialized,
3597
+ mcpComponents: {
3598
+ hasExternalServerManager: !!this.externalServerManager,
3599
+ hasToolRegistry: !!this.toolRegistry,
3600
+ hasProviderRegistry: !!AIProviderFactory,
3601
+ },
3602
+ fallbackReason: "MCP_NOT_INITIALIZED",
3603
+ message: "MCP not available - returning null for fallback to direct generation",
3604
+ });
3605
+ return null;
3606
+ }
3607
+ const providerName = options.provider === "auto" || !options.provider
3608
+ ? await getBestProvider()
3609
+ : options.provider;
3610
+ let availableTools = await this.getAllAvailableTools();
3611
+ const { tools: circuitBreakerFilteredTools, unavailableTools } = this.toolRegistry.getAvailableTools(this.toolCircuitBreakers);
3612
+ const cbFilteredNames = new Set(circuitBreakerFilteredTools.map((tool) => tool.name));
3613
+ availableTools = availableTools.filter((tool) => cbFilteredNames.has(tool.name));
3614
+ availableTools = this.applyToolInfoFiltering(availableTools, options);
3615
+ const targetTool = availableTools.find((tool) => tool.name.includes("SuccessRateSRByTime") ||
3616
+ tool.name.includes("juspay-analytics"));
3617
+ logger.debug("Available tools for AI prompt generation", {
3618
+ toolsCount: availableTools.length,
3619
+ toolNames: availableTools.map((tool) => tool.name),
3620
+ unavailableToolsCount: unavailableTools.length,
3621
+ unavailableTools,
3622
+ hasTargetTool: !!targetTool,
3623
+ targetToolDetails: targetTool
3624
+ ? {
3625
+ name: targetTool.name,
3626
+ description: targetTool.description,
3627
+ server: targetTool.server,
3628
+ }
3629
+ : null,
3630
+ });
3631
+ const circuitBreakerNote = unavailableTools.length > 0
3632
+ ? `\n\nNOTE: The following tools are temporarily unavailable due to repeated failures: ${unavailableTools.join(", ")}. Do not attempt to call these tools.`
3633
+ : "";
3634
+ const enhancedSystemPrompt = options.skipToolPromptInjection
3635
+ ? (options.systemPrompt || "") + circuitBreakerNote
3636
+ : this.createToolAwareSystemPrompt(options.systemPrompt, availableTools) +
3637
+ circuitBreakerNote;
3638
+ logger.debug("Tool-aware system prompt created", {
3639
+ requestId,
3640
+ originalPromptLength: options.systemPrompt?.length || 0,
3641
+ enhancedPromptLength: enhancedSystemPrompt.length,
3642
+ skippedToolInjection: !!options.skipToolPromptInjection,
3643
+ enhancedPromptPreview: enhancedSystemPrompt.substring(0, 80) + "...",
3644
+ });
3645
+ logger.debug("[Observability] System prompt metadata", {
3646
+ requestId,
3647
+ systemPromptLength: enhancedSystemPrompt.length,
3648
+ systemPromptHash: enhancedSystemPrompt.length > 0
3649
+ ? `sha256:${enhancedSystemPrompt.slice(0, 8)}...`
3650
+ : "empty",
3651
+ hasCustomSystemPrompt: !!options.systemPrompt,
3652
+ });
3653
+ const conversationMessages = (await getConversationMessages(this.conversationMemory, options));
3654
+ this.logMCPConversationSummary(requestId, conversationMessages);
3655
+ logger.debug("[Observability] Available tools for LLM", {
3656
+ requestId,
3657
+ toolCount: availableTools.length,
3658
+ toolNames: availableTools.map((tool) => tool.name),
3659
+ });
3660
+ return {
3661
+ providerName,
3662
+ availableTools,
3663
+ enhancedSystemPrompt,
3664
+ conversationMessages,
3665
+ };
3666
+ }
3667
+ logMCPConversationSummary(requestId, conversationMessages) {
3668
+ if (!logger.shouldLog("debug")) {
3669
+ return;
3670
+ }
3671
+ try {
3672
+ logger.debug("[Observability] Conversation history summary", {
3673
+ requestId,
3674
+ messageCount: conversationMessages.length,
3675
+ messages: conversationMessages.map((message, index) => {
3676
+ let contentLength;
3677
+ if (typeof message.content === "string") {
3678
+ contentLength = message.content.length;
3679
+ }
3680
+ else {
3681
+ try {
3682
+ contentLength = JSON.stringify(message.content).length;
3683
+ }
3684
+ catch {
3685
+ contentLength = 0;
3686
+ }
3687
+ }
3688
+ return {
3689
+ index,
3690
+ role: message.role,
3691
+ contentLength,
3692
+ contentPreview: typeof message.content === "string"
3693
+ ? message.content.substring(0, 200)
3694
+ : "[multimodal]",
3695
+ };
3696
+ }),
3697
+ });
3698
+ }
3699
+ catch {
3700
+ // Ignore serialization errors in debug logging
3701
+ }
3702
+ }
3703
+ async ensureMCPGenerationBudget(options, requestId, providerName, enhancedSystemPrompt, availableTools, conversationMessages) {
3704
+ const budgetResult = checkContextBudget({
3705
+ provider: providerName,
3706
+ model: options.model,
3707
+ maxTokens: options.maxTokens,
3708
+ systemPrompt: enhancedSystemPrompt,
3709
+ conversationMessages: conversationMessages,
3710
+ currentPrompt: options.prompt,
3711
+ toolDefinitions: availableTools,
3712
+ });
3713
+ logger.info("[TokenBudget] Token breakdown", {
3714
+ requestId,
3715
+ system: budgetResult.breakdown?.systemPrompt || 0,
3716
+ history: budgetResult.breakdown?.conversationHistory || 0,
3717
+ tools: budgetResult.breakdown?.toolDefinitions || 0,
3718
+ currentPrompt: budgetResult.breakdown?.currentPrompt || 0,
3719
+ files: budgetResult.breakdown?.fileAttachments || 0,
3720
+ total: budgetResult.estimatedInputTokens,
3721
+ budget: budgetResult.availableInputTokens,
3722
+ usagePercent: Math.round(budgetResult.usageRatio * 1000) / 10,
3723
+ conversationMessageCount: conversationMessages.length,
3724
+ shouldCompact: budgetResult.shouldCompact,
3725
+ });
3726
+ const compactionSessionId = this.getCompactionSessionId(options);
3727
+ const lastCompactionCount = this.lastCompactionMessageCount.get(compactionSessionId) ?? 0;
3728
+ if (!budgetResult.shouldCompact ||
3729
+ !this.conversationMemory ||
3730
+ conversationMessages.length <= lastCompactionCount) {
3731
+ return conversationMessages;
3732
+ }
3733
+ return this.compactMCPConversationForBudget({
3734
+ options,
3735
+ requestId,
3736
+ providerName,
3737
+ enhancedSystemPrompt,
3738
+ availableTools,
3739
+ conversationMessages,
3740
+ availableInputTokens: budgetResult.availableInputTokens,
3741
+ usageRatio: budgetResult.usageRatio,
3742
+ estimatedInputTokens: budgetResult.estimatedInputTokens,
3743
+ compactionSessionId,
3744
+ });
3745
+ }
3746
+ async compactMCPConversationForBudget(context) {
3747
+ const { options, requestId, providerName, enhancedSystemPrompt, availableTools, conversationMessages, availableInputTokens, usageRatio, estimatedInputTokens, compactionSessionId, } = context;
3748
+ logger.info("[NeuroLink] Context budget exceeded, triggering auto-compaction", {
3749
+ usageRatio,
3750
+ estimatedTokens: estimatedInputTokens,
3751
+ availableTokens: availableInputTokens,
3752
+ });
3753
+ const compactor = new ContextCompactor({
3754
+ provider: providerName,
3755
+ summarizationProvider: this.conversationMemoryConfig?.conversationMemory
3756
+ ?.summarizationProvider,
3757
+ summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
3758
+ });
3759
+ const compactionResult = await compactor.compact(conversationMessages, availableInputTokens, this.conversationMemoryConfig?.conversationMemory, requestId);
3760
+ let compactedMessages = conversationMessages;
3761
+ if (compactionResult.compacted) {
3762
+ const repairedResult = repairToolPairs(compactionResult.messages);
3763
+ compactedMessages = repairedResult.messages;
3764
+ this.lastCompactionMessageCount.set(compactionSessionId, compactedMessages.length);
3765
+ logger.info("[NeuroLink] Context compacted successfully", {
3766
+ stagesUsed: compactionResult.stagesUsed,
3767
+ tokensSaved: compactionResult.tokensSaved,
3768
+ });
3769
+ }
3770
+ const postCompactBudget = checkContextBudget({
3771
+ provider: providerName,
3772
+ model: options.model,
3773
+ maxTokens: options.maxTokens,
3774
+ systemPrompt: enhancedSystemPrompt,
3775
+ conversationMessages: compactedMessages,
3776
+ currentPrompt: options.prompt,
3777
+ toolDefinitions: availableTools,
3778
+ });
3779
+ if (postCompactBudget.withinBudget) {
3780
+ return compactedMessages;
3781
+ }
3782
+ const overageRatio = postCompactBudget.usageRatio - 1.0;
3783
+ logger.warn("[NeuroLink] Post-compaction still over budget, attempting emergency content truncation", {
3784
+ requestId,
3785
+ estimatedTokens: postCompactBudget.estimatedInputTokens,
3786
+ availableTokens: postCompactBudget.availableInputTokens,
3787
+ overagePercent: Math.round(overageRatio * 100),
3788
+ stagesUsedInCompaction: compactionResult.stagesUsed,
3789
+ });
3790
+ compactedMessages = emergencyContentTruncation(compactedMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
3791
+ const finalBudget = checkContextBudget({
3792
+ provider: providerName,
3793
+ model: options.model,
3794
+ maxTokens: options.maxTokens,
3795
+ systemPrompt: enhancedSystemPrompt,
3796
+ conversationMessages: compactedMessages,
3797
+ currentPrompt: options.prompt,
3798
+ toolDefinitions: availableTools,
3799
+ });
3800
+ if (!finalBudget.withinBudget) {
3801
+ throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3802
+ `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3803
+ `Budget: ${finalBudget.availableInputTokens} tokens. ` +
3804
+ `Conversation is too large to fit in the model's context window.`, {
3805
+ estimatedTokens: finalBudget.estimatedInputTokens,
3806
+ availableTokens: finalBudget.availableInputTokens,
3807
+ stagesUsed: compactionResult.stagesUsed,
3808
+ breakdown: finalBudget.breakdown,
3809
+ });
3810
+ }
3811
+ return compactedMessages;
3812
+ }
3813
+ async generateWithMCPProvider(context) {
3814
+ const { options, requestId, functionTag, tryMCPStartTime, providerName, availableTools, enhancedSystemPrompt, conversationMessages, } = context;
3815
+ const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, this, options.region);
3816
+ provider.setTraceContext(this._metricsTraceContext);
3817
+ this.emitter.emit("connected");
3818
+ this.emitter.emit("message", `${providerName} provider initialized successfully`);
3819
+ provider.setupToolExecutor({
3820
+ customTools: this.getCustomTools(),
3821
+ executeTool: (toolName, params) => this.executeTool(toolName, params, {
3822
+ disableToolCache: options.disableToolCache,
3823
+ }),
3824
+ }, functionTag);
3825
+ logger.debug("[Observability] User input to LLM", {
3826
+ requestId,
3827
+ promptPreview: options.prompt?.substring(0, 200),
3828
+ promptLength: options.prompt?.length || 0,
3829
+ model: options.model,
3830
+ maxTokens: options.maxTokens,
3831
+ temperature: options.temperature,
3832
+ maxSteps: options.maxSteps,
3833
+ skipToolPromptInjection: options.skipToolPromptInjection,
3834
+ });
3835
+ const result = await provider.generate({
3836
+ ...options,
3837
+ systemPrompt: enhancedSystemPrompt,
3838
+ conversationMessages,
3839
+ });
3840
+ const responseTime = Date.now() - tryMCPStartTime;
3841
+ const hasContent = !!(result?.content && result.content.trim().length > 0);
3842
+ const hasToolExecutions = !!(result?.toolExecutions && result.toolExecutions.length > 0);
3843
+ mcpLogger.debug(`[${functionTag}] Result validation:`, {
3844
+ hasResult: !!result,
3845
+ hasContent,
3846
+ hasToolExecutions,
3847
+ contentLength: result?.content?.length || 0,
3848
+ toolExecutionsCount: result?.toolExecutions?.length || 0,
3849
+ toolsUsedCount: result?.toolsUsed?.length || 0,
3850
+ });
3851
+ if (!hasContent && !hasToolExecutions) {
3852
+ mcpLogger.debug(`[${functionTag}] Result rejected: no content and no tool executions`);
3853
+ return null;
3854
+ }
3855
+ const transformedToolExecutions = transformToolExecutionsForMCP(result.toolExecutions);
3856
+ mcpLogger.debug(`[${functionTag}] Tool execution transformation:`, {
3857
+ originalCount: result?.toolExecutions?.length || 0,
3858
+ transformedCount: transformedToolExecutions.length,
3859
+ transformedTools: transformedToolExecutions.map((te) => te.toolName),
3860
+ });
3861
+ return {
3862
+ content: result.content || "",
3863
+ provider: providerName,
3864
+ model: result.model,
3865
+ usage: result.usage,
3866
+ responseTime,
3867
+ finishReason: result.finishReason,
3868
+ toolsUsed: result.toolsUsed || [],
3869
+ toolExecutions: transformedToolExecutions,
3870
+ enhancedWithTools: Boolean(hasToolExecutions),
3871
+ availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
3872
+ audio: result.audio,
3873
+ video: result.video,
3874
+ ppt: result.ppt,
3875
+ imageOutput: result.imageOutput,
3876
+ analytics: result.analytics,
3877
+ evaluation: result.evaluation,
3878
+ };
3879
+ }
3846
3880
  /**
3847
3881
  * Direct provider generation (no MCP, no recursion)
3848
3882
  */
@@ -4210,360 +4244,285 @@ Current user's request: ${currentInput}`;
4210
4244
  * @throws {Error} When conversation memory operations fail (if enabled)
4211
4245
  */
4212
4246
  async stream(options) {
4213
- // Shallow-copy caller's object to avoid mutating their original reference
4214
- options = { ...options };
4215
- // Set metrics trace context for parent-child span linking
4216
- const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
4217
- const metricsParentSpanId = crypto
4218
- .randomUUID()
4219
- .replace(/-/g, "")
4220
- .substring(0, 16);
4221
- // Scope trace context to this request via AsyncLocalStorage
4222
- // so concurrent generate/stream calls don't race.
4223
- return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsParentSpanId }, async () => {
4224
- // Manual span lifecycle: the span must stay open until the stream is fully consumed,
4225
- // NOT when the StreamResult object is returned. withSpan would end the span too early
4226
- // because streaming results resolve lazily via the async generator.
4227
- const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
4228
- kind: SpanKind.INTERNAL,
4229
- attributes: {
4230
- [ATTR.NL_PROVIDER]: options.provider || "default",
4231
- [ATTR.GEN_AI_MODEL]: options.model || "default",
4232
- [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
4233
- [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
4234
- [ATTR.NL_STREAM_MODE]: true,
4235
- },
4247
+ return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...options }));
4248
+ }
4249
+ async executeStreamRequest(options) {
4250
+ const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
4251
+ kind: SpanKind.INTERNAL,
4252
+ attributes: {
4253
+ [ATTR.NL_PROVIDER]: options.provider || "default",
4254
+ [ATTR.GEN_AI_MODEL]: options.model || "default",
4255
+ [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
4256
+ [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
4257
+ [ATTR.NL_STREAM_MODE]: true,
4258
+ },
4259
+ });
4260
+ const spanStartTime = Date.now();
4261
+ this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
4262
+ try {
4263
+ options.model = resolveModel(options.model, this.modelAliasConfig);
4264
+ const startTime = Date.now();
4265
+ const hrTimeStart = process.hrtime.bigint();
4266
+ const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
4267
+ const originalPrompt = options.input.text;
4268
+ options.fileRegistry = this.fileRegistry;
4269
+ await this.validateStreamRequestOptions(options, startTime);
4270
+ const workflowResult = await this.maybeHandleWorkflowStreamRequest({
4271
+ options,
4272
+ startTime,
4273
+ streamSpan,
4274
+ spanStartTime,
4275
+ });
4276
+ if (workflowResult) {
4277
+ return workflowResult;
4278
+ }
4279
+ return this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
4280
+ options,
4281
+ streamSpan,
4282
+ spanStartTime,
4283
+ startTime,
4284
+ hrTimeStart,
4285
+ streamId,
4286
+ originalPrompt,
4287
+ }));
4288
+ }
4289
+ catch (error) {
4290
+ streamSpan.setStatus({
4291
+ code: SpanStatusCode.ERROR,
4292
+ message: error instanceof Error ? error.message : String(error),
4236
4293
  });
4237
- const spanStartTime = Date.now();
4238
- // MCP Enhancement: propagate disableToolCache to tool execution
4239
- this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
4294
+ if (error instanceof Error) {
4295
+ streamSpan.recordException(error);
4296
+ }
4297
+ streamSpan.end();
4298
+ throw error;
4299
+ }
4300
+ }
4301
+ async validateStreamRequestOptions(options, startTime) {
4302
+ await this.validateStreamInput(options);
4303
+ this.enforceSessionBudget(options.maxBudgetUsd);
4304
+ await this.applyAuthenticatedRequestContext(options);
4305
+ this.emitStreamStartEvents(options, startTime);
4306
+ this.applyStreamLifecycleMiddleware(options);
4307
+ }
4308
+ async maybeHandleWorkflowStreamRequest(params) {
4309
+ if (!params.options.workflow && !params.options.workflowConfig) {
4310
+ return null;
4311
+ }
4312
+ const result = await this.streamWithWorkflow(params.options, params.startTime);
4313
+ const originalWorkflowStream = result.stream;
4314
+ const self = this;
4315
+ result.stream = (async function* () {
4240
4316
  try {
4241
- // NL-004: Resolve model aliases/deprecations before processing
4242
- options.model = resolveModel(options.model, this.modelAliasConfig);
4243
- const startTime = Date.now();
4244
- const hrTimeStart = process.hrtime.bigint();
4245
- const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
4246
- const originalPrompt = options.input.text; // Store the original prompt for memory storage
4247
- // Inject file registry for lazy on-demand file processing
4248
- options.fileRegistry = this.fileRegistry;
4249
- await this.validateStreamInput(options);
4250
- // Check budget limit before making API call
4251
- if (options.maxBudgetUsd !== undefined &&
4252
- options.maxBudgetUsd > 0 &&
4253
- this._sessionCostUsd >= options.maxBudgetUsd) {
4254
- throw new NeuroLinkError({
4255
- code: "SESSION_BUDGET_EXCEEDED",
4256
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
4257
- category: ErrorCategory.VALIDATION,
4258
- severity: ErrorSeverity.HIGH,
4259
- retriable: false,
4260
- context: {
4261
- spent: this._sessionCostUsd,
4262
- limit: options.maxBudgetUsd,
4263
- },
4264
- });
4317
+ for await (const chunk of originalWorkflowStream) {
4318
+ yield chunk;
4265
4319
  }
4266
- // Handle per-call auth token validation
4267
- if (options.auth?.token) {
4268
- const { AuthError } = await import("./auth/errors.js");
4269
- await this.ensureAuthProvider();
4270
- if (!this.authProvider) {
4271
- throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
4272
- }
4273
- let authResult;
4274
- try {
4275
- authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
4276
- }
4277
- catch (err) {
4278
- // Rethrow auth errors as-is; wrap anything else
4279
- if (err instanceof Error &&
4280
- "feature" in err &&
4281
- err.feature === "Auth") {
4282
- throw err;
4320
+ params.streamSpan.setStatus({ code: SpanStatusCode.OK });
4321
+ }
4322
+ catch (error) {
4323
+ params.streamSpan.setStatus({
4324
+ code: SpanStatusCode.ERROR,
4325
+ message: error instanceof Error ? error.message : String(error),
4326
+ });
4327
+ throw error;
4328
+ }
4329
+ finally {
4330
+ self._disableToolCacheForCurrentRequest = false;
4331
+ params.streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - params.spanStartTime);
4332
+ params.streamSpan.end();
4333
+ }
4334
+ })();
4335
+ return result;
4336
+ }
4337
+ async runStandardStreamRequest(params) {
4338
+ const { options, streamSpan, spanStartTime, startTime, hrTimeStart, streamId, originalPrompt, } = params;
4339
+ try {
4340
+ const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
4341
+ const { stream: mcpStream, provider: providerName, usage: streamUsage, model: streamModel, finishReason: streamFinishReason, toolCalls: streamToolCalls, toolResults: streamToolResults, analytics: streamAnalytics, } = await this.createMCPStream(enhancedOptions);
4342
+ const streamState = {
4343
+ finishReason: streamFinishReason ?? "stop",
4344
+ toolCalls: streamToolCalls,
4345
+ toolResults: streamToolResults,
4346
+ };
4347
+ streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
4348
+ let accumulatedContent = "";
4349
+ let chunkCount = 0;
4350
+ const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
4351
+ const metadata = {
4352
+ fallbackAttempted: false,
4353
+ guardrailsBlocked: false,
4354
+ error: undefined,
4355
+ fallbackProvider: undefined,
4356
+ fallbackModel: undefined,
4357
+ };
4358
+ const self = this;
4359
+ const streamStartTime = Date.now();
4360
+ const sessionId = enhancedOptions.context
4361
+ ?.sessionId;
4362
+ const processedStream = (async function* () {
4363
+ let streamError;
4364
+ try {
4365
+ for await (const chunk of mcpStream) {
4366
+ chunkCount++;
4367
+ if (chunk &&
4368
+ "content" in chunk &&
4369
+ typeof chunk.content === "string") {
4370
+ accumulatedContent += chunk.content;
4371
+ self.emitter.emit("response:chunk", chunk.content);
4372
+ self.emitter.emit("stream:chunk", {
4373
+ type: "stream:chunk",
4374
+ content: chunk.content,
4375
+ metadata: {
4376
+ chunkIndex: chunkCount,
4377
+ totalLength: accumulatedContent.length,
4378
+ },
4379
+ timestamp: Date.now(),
4380
+ });
4283
4381
  }
4284
- throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${err instanceof Error ? err.message : String(err)}`);
4285
- }
4286
- if (!authResult.valid) {
4287
- throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
4288
- }
4289
- // Fail closed: token valid but no user identity is a provider bug
4290
- if (!authResult.user) {
4291
- throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
4382
+ yield chunk;
4292
4383
  }
4293
- if (!authResult.user.id) {
4294
- throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
4384
+ if (chunkCount === 0 &&
4385
+ !metadata.fallbackAttempted &&
4386
+ !enhancedOptions.disableInternalFallback &&
4387
+ streamState.toolCalls.length === 0 &&
4388
+ streamState.toolResults.length === 0) {
4389
+ yield* self.handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, (content) => {
4390
+ accumulatedContent += content;
4391
+ });
4295
4392
  }
4296
- // Merge validated user into context
4297
- options.context = {
4298
- ...(options.context || {}),
4299
- userId: authResult.user.id,
4300
- userEmail: authResult.user.email,
4301
- userRoles: authResult.user.roles,
4302
- };
4303
- }
4304
- // Handle pre-validated requestContext
4305
- if (options.requestContext) {
4306
- // When auth token was validated, token-derived identity fields
4307
- // MUST take precedence over requestContext to prevent privilege escalation.
4308
- const tokenDerivedFields = options.auth?.token && this.authProvider
4309
- ? {
4310
- userId: options.context?.userId,
4311
- userEmail: options.context?.userEmail,
4312
- userRoles: options.context?.userRoles,
4313
- }
4314
- : {};
4315
- options.context = {
4316
- ...(options.context || {}),
4317
- ...options.requestContext,
4318
- ...tokenDerivedFields,
4319
- };
4320
- }
4321
- this.emitStreamStartEvents(options, startTime);
4322
- // Auto-inject lifecycle middleware when callbacks are provided
4323
- // (must happen before workflow early return so that path gets middleware too)
4324
- if (options.onFinish || options.onError || options.onChunk) {
4325
- options.middleware = {
4326
- ...options.middleware,
4327
- middlewareConfig: {
4328
- ...options.middleware?.middlewareConfig,
4329
- lifecycle: {
4330
- ...options.middleware?.middlewareConfig?.lifecycle,
4331
- enabled: true,
4332
- config: {
4333
- ...options.middleware?.middlewareConfig?.lifecycle?.config,
4334
- onFinish: options.onFinish,
4335
- onError: options.onError,
4336
- onChunk: options.onChunk,
4337
- },
4338
- },
4339
- },
4340
- };
4341
- }
4342
- // Check if workflow is requested
4343
- if (options.workflow || options.workflowConfig) {
4344
- const result = await this.streamWithWorkflow(options, startTime);
4345
- // Wrap the workflow stream so the span stays open until fully consumed
4346
- const originalWorkflowStream = result.stream;
4347
- const selfWorkflow = this;
4348
- result.stream = (async function* () {
4393
+ let resolvedUsage = streamUsage;
4394
+ if (!resolvedUsage && streamAnalytics) {
4349
4395
  try {
4350
- for await (const chunk of originalWorkflowStream) {
4351
- yield chunk;
4396
+ const resolved = await Promise.resolve(streamAnalytics);
4397
+ if (resolved?.tokenUsage) {
4398
+ resolvedUsage = resolved.tokenUsage;
4352
4399
  }
4353
- streamSpan.setStatus({ code: SpanStatusCode.OK });
4354
4400
  }
4355
- catch (error) {
4356
- streamSpan.setStatus({
4357
- code: SpanStatusCode.ERROR,
4358
- message: error instanceof Error ? error.message : String(error),
4359
- });
4360
- throw error;
4361
- }
4362
- finally {
4363
- selfWorkflow._disableToolCacheForCurrentRequest = false;
4364
- streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4365
- streamSpan.end();
4401
+ catch {
4402
+ // non-blocking
4366
4403
  }
4367
- })();
4368
- return result;
4404
+ }
4405
+ self.emitter.emit("stream:complete", {
4406
+ type: "stream:complete",
4407
+ content: accumulatedContent,
4408
+ provider: metadata.fallbackProvider ?? providerName,
4409
+ model: metadata.fallbackModel ?? streamModel ?? enhancedOptions.model,
4410
+ prompt: enhancedOptions.input?.text ||
4411
+ enhancedOptions.prompt,
4412
+ metadata: {
4413
+ chunkCount,
4414
+ totalLength: accumulatedContent.length,
4415
+ durationMs: Date.now() - streamStartTime,
4416
+ sessionId,
4417
+ usage: resolvedUsage,
4418
+ ...(metadata.fallbackAttempted && {
4419
+ primaryProvider: providerName,
4420
+ primaryModel: enhancedOptions.model,
4421
+ fallback: true,
4422
+ }),
4423
+ },
4424
+ timestamp: Date.now(),
4425
+ });
4369
4426
  }
4370
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
4371
- return await this.setLangfuseContextFromOptions(options, async () => {
4372
- try {
4373
- // Prepare options: init memory, MCP, orchestration, Ollama auto-disable, tool detection
4374
- const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
4375
- const { stream: mcpStream, provider: providerName, usage: streamUsage, model: streamModel, analytics: streamAnalytics, } = await this.createMCPStream(enhancedOptions);
4376
- // Update span with resolved provider name
4377
- streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
4378
- let accumulatedContent = "";
4379
- let chunkCount = 0;
4380
- // Set up event capture listeners
4381
- const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
4382
- const metadata = {
4383
- fallbackAttempted: false,
4384
- guardrailsBlocked: false,
4385
- error: undefined,
4386
- fallbackProvider: undefined,
4387
- fallbackModel: undefined,
4388
- };
4389
- const self = this;
4390
- const streamStartTime = Date.now();
4391
- const sessionId = enhancedOptions.context?.sessionId;
4392
- const processedStream = (async function* () {
4393
- let streamError;
4394
- try {
4395
- for await (const chunk of mcpStream) {
4396
- chunkCount++;
4397
- if (chunk &&
4398
- "content" in chunk &&
4399
- typeof chunk.content === "string") {
4400
- accumulatedContent += chunk.content;
4401
- self.emitter.emit("response:chunk", chunk.content);
4402
- // Emit stream:chunk event (Observability Solution 8)
4403
- self.emitter.emit("stream:chunk", {
4404
- type: "stream:chunk",
4405
- content: chunk.content,
4406
- metadata: {
4407
- chunkIndex: chunkCount,
4408
- totalLength: accumulatedContent.length,
4409
- },
4410
- timestamp: Date.now(),
4411
- });
4412
- }
4413
- yield chunk;
4414
- }
4415
- if (chunkCount === 0 && !metadata.fallbackAttempted) {
4416
- yield* self.handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
4417
- accumulatedContent += content;
4418
- });
4419
- }
4420
- // Emit stream:complete event (Observability Solution 8)
4421
- // When fallback took over, attribute the completion to the
4422
- // fallback provider so downstream telemetry reflects reality.
4423
- const effectiveProvider = metadata.fallbackProvider ?? providerName;
4424
- const effectiveModel = metadata.fallbackModel ??
4425
- streamModel ??
4426
- enhancedOptions.model;
4427
- // Resolve analytics promise to get final token usage
4428
- let resolvedUsage = streamUsage;
4429
- if (!resolvedUsage && streamAnalytics) {
4430
- try {
4431
- const resolved = await Promise.resolve(streamAnalytics);
4432
- if (resolved?.tokenUsage) {
4433
- resolvedUsage = resolved.tokenUsage;
4434
- }
4435
- }
4436
- catch {
4437
- /* non-blocking */
4438
- }
4439
- }
4440
- self.emitter.emit("stream:complete", {
4441
- type: "stream:complete",
4442
- content: accumulatedContent,
4443
- provider: effectiveProvider,
4444
- model: effectiveModel,
4445
- prompt: enhancedOptions.input?.text ||
4446
- enhancedOptions.prompt,
4447
- metadata: {
4448
- chunkCount,
4449
- totalLength: accumulatedContent.length,
4450
- durationMs: Date.now() - streamStartTime,
4451
- sessionId,
4452
- usage: resolvedUsage,
4453
- ...(metadata.fallbackAttempted && {
4454
- primaryProvider: providerName,
4455
- primaryModel: enhancedOptions.model,
4456
- fallback: true,
4457
- }),
4458
- },
4459
- timestamp: Date.now(),
4460
- });
4461
- }
4462
- catch (error) {
4463
- streamError = error;
4464
- // Emit stream:error event (Observability Solution 8)
4465
- self.emitter.emit("stream:error", {
4466
- type: "stream:error",
4467
- content: error instanceof Error ? error.message : String(error),
4468
- provider: providerName,
4469
- model: enhancedOptions.model,
4470
- metadata: {
4471
- chunkCount,
4472
- totalLength: accumulatedContent.length,
4473
- durationMs: Date.now() - streamStartTime,
4474
- errorName: error instanceof Error ? error.name : "UnknownError",
4475
- sessionId,
4476
- },
4477
- timestamp: Date.now(),
4478
- });
4479
- throw error;
4480
- }
4481
- finally {
4482
- self._disableToolCacheForCurrentRequest = false;
4483
- cleanupListeners();
4484
- // Finalize span now that the stream is fully consumed
4485
- streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4486
- streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
4487
- // When fallback took over, the primary provider's span must
4488
- // reflect that it failed — never mark it as successful.
4489
- const primaryFailed = !!(metadata.error || streamError);
4490
- streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, primaryFailed ? "error" : "stop");
4491
- if (metadata.fallbackAttempted) {
4492
- streamSpan.setAttribute("neurolink.fallback_triggered", true);
4493
- if (metadata.fallbackProvider) {
4494
- streamSpan.setAttribute("neurolink.fallback_provider", metadata.fallbackProvider);
4495
- }
4496
- }
4497
- if (primaryFailed) {
4498
- streamSpan.setStatus({
4499
- code: SpanStatusCode.ERROR,
4500
- message: metadata.error ||
4501
- (streamError instanceof Error
4502
- ? streamError.message
4503
- : String(streamError)),
4504
- });
4505
- }
4506
- else {
4507
- streamSpan.setStatus({ code: SpanStatusCode.OK });
4508
- }
4509
- streamSpan.end();
4510
- if (accumulatedContent.trim()) {
4511
- logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
4512
- provider: providerName,
4513
- model: enhancedOptions.model,
4514
- responseTimeMs: Date.now() - startTime,
4515
- contentLength: accumulatedContent.length,
4516
- fallback: metadata.fallbackAttempted,
4517
- });
4518
- }
4519
- await self.storeStreamConversationMemory({
4520
- enhancedOptions,
4521
- providerName,
4522
- originalPrompt,
4523
- accumulatedContent,
4524
- startTime,
4525
- eventSequence,
4526
- });
4527
- }
4528
- })();
4529
- const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
4530
- const responseTime = Date.now() - startTime;
4531
- // Accumulate session cost for budget tracking
4532
- if (streamResult.analytics?.cost &&
4533
- streamResult.analytics.cost > 0) {
4534
- this._sessionCostUsd += streamResult.analytics.cost;
4427
+ catch (error) {
4428
+ streamError = error;
4429
+ self.emitter.emit("stream:error", {
4430
+ type: "stream:error",
4431
+ content: error instanceof Error ? error.message : String(error),
4432
+ provider: providerName,
4433
+ model: enhancedOptions.model,
4434
+ metadata: {
4435
+ chunkCount,
4436
+ totalLength: accumulatedContent.length,
4437
+ durationMs: Date.now() - streamStartTime,
4438
+ errorName: error instanceof Error ? error.name : "UnknownError",
4439
+ sessionId,
4440
+ },
4441
+ timestamp: Date.now(),
4442
+ });
4443
+ throw error;
4444
+ }
4445
+ finally {
4446
+ self._disableToolCacheForCurrentRequest = false;
4447
+ cleanupListeners();
4448
+ streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4449
+ streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
4450
+ const primaryFailed = !!(metadata.error || streamError);
4451
+ streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, primaryFailed ? "error" : "stop");
4452
+ if (metadata.fallbackAttempted) {
4453
+ streamSpan.setAttribute("neurolink.fallback_triggered", true);
4454
+ if (metadata.fallbackProvider) {
4455
+ streamSpan.setAttribute("neurolink.fallback_provider", metadata.fallbackProvider);
4535
4456
  }
4536
- this.emitStreamEndEvents(streamResult);
4537
- return this.createStreamResponse(streamResult, processedStream, {
4538
- providerName,
4539
- options,
4540
- startTime,
4541
- responseTime,
4542
- streamId,
4543
- fallback: metadata.fallbackAttempted,
4544
- guardrailsBlocked: metadata.guardrailsBlocked,
4545
- error: metadata.error,
4546
- events: eventSequence,
4457
+ }
4458
+ if (primaryFailed) {
4459
+ streamSpan.setStatus({
4460
+ code: SpanStatusCode.ERROR,
4461
+ message: metadata.error ||
4462
+ (streamError instanceof Error
4463
+ ? streamError.message
4464
+ : String(streamError)),
4547
4465
  });
4548
4466
  }
4549
- catch (error) {
4550
- return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
4467
+ else {
4468
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
4551
4469
  }
4552
- });
4553
- }
4554
- catch (error) {
4555
- // End span on error before re-throwing
4556
- streamSpan.setStatus({
4557
- code: SpanStatusCode.ERROR,
4558
- message: error instanceof Error ? error.message : String(error),
4559
- });
4560
- if (error instanceof Error) {
4561
- streamSpan.recordException(error);
4470
+ streamSpan.end();
4471
+ if (accumulatedContent.trim()) {
4472
+ logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
4473
+ provider: providerName,
4474
+ model: enhancedOptions.model,
4475
+ responseTimeMs: Date.now() - startTime,
4476
+ contentLength: accumulatedContent.length,
4477
+ fallback: metadata.fallbackAttempted,
4478
+ });
4479
+ }
4480
+ await self.storeStreamConversationMemory({
4481
+ enhancedOptions,
4482
+ providerName,
4483
+ originalPrompt,
4484
+ accumulatedContent,
4485
+ startTime,
4486
+ eventSequence,
4487
+ });
4562
4488
  }
4563
- streamSpan.end();
4489
+ })();
4490
+ const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
4491
+ streamResult.finishReason =
4492
+ streamState.finishReason || streamResult.finishReason;
4493
+ streamResult.toolCalls = streamState.toolCalls;
4494
+ streamResult.toolResults = streamState.toolResults;
4495
+ if (!streamResult.usage) {
4496
+ streamResult.usage = streamUsage;
4497
+ }
4498
+ if (!streamResult.analytics) {
4499
+ streamResult.analytics =
4500
+ streamAnalytics instanceof Promise
4501
+ ? await streamAnalytics
4502
+ : streamAnalytics;
4503
+ }
4504
+ if (streamResult.analytics?.cost && streamResult.analytics.cost > 0) {
4505
+ this._sessionCostUsd += streamResult.analytics.cost;
4506
+ }
4507
+ this.emitStreamEndEvents(streamResult);
4508
+ return this.createStreamResponse(streamResult, processedStream, {
4509
+ providerName,
4510
+ options,
4511
+ startTime,
4512
+ responseTime: Date.now() - startTime,
4513
+ streamId,
4514
+ fallback: metadata.fallbackAttempted,
4515
+ guardrailsBlocked: metadata.guardrailsBlocked,
4516
+ error: metadata.error,
4517
+ events: eventSequence,
4518
+ });
4519
+ }
4520
+ catch (error) {
4521
+ if (options.disableInternalFallback) {
4564
4522
  throw error;
4565
4523
  }
4566
- }); // end metricsTraceContextStorage.run
4524
+ return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
4525
+ }
4567
4526
  }
4568
4527
  /**
4569
4528
  * Prepare stream options: initialize memory, MCP, retrieval, orchestration,
@@ -4745,7 +4704,7 @@ Current user's request: ${currentInput}`;
4745
4704
  * Handle fallback when the primary stream returns 0 chunks.
4746
4705
  * Yields chunks from a fallback provider and updates metadata accordingly.
4747
4706
  */
4748
- async *handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, _accumulatedContent, appendContent) {
4707
+ async *handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, appendContent) {
4749
4708
  metadata.fallbackAttempted = true;
4750
4709
  const errorMsg = "Stream completed with 0 chunks (possible guardrails block)";
4751
4710
  metadata.error = errorMsg;
@@ -4803,6 +4762,14 @@ Current user's request: ${currentInput}`;
4803
4762
  model: fallbackRoute.model,
4804
4763
  conversationMessages,
4805
4764
  });
4765
+ const fallbackToolCalls = fallbackResult.toolCalls ?? [];
4766
+ const fallbackToolResults = fallbackResult.toolResults ?? [];
4767
+ if (fallbackToolCalls.length > 0 || fallbackToolResults.length > 0) {
4768
+ streamState.toolCalls = fallbackToolCalls;
4769
+ streamState.toolResults = fallbackToolResults;
4770
+ streamState.finishReason =
4771
+ fallbackResult.finishReason ?? streamState.finishReason;
4772
+ }
4806
4773
  let fallbackChunkCount = 0;
4807
4774
  for await (const fallbackChunk of fallbackResult.stream) {
4808
4775
  fallbackChunkCount++;
@@ -4814,7 +4781,9 @@ Current user's request: ${currentInput}`;
4814
4781
  }
4815
4782
  yield fallbackChunk;
4816
4783
  }
4817
- if (fallbackChunkCount === 0) {
4784
+ if (fallbackChunkCount === 0 &&
4785
+ fallbackToolCalls.length === 0 &&
4786
+ fallbackToolResults.length === 0) {
4818
4787
  throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
4819
4788
  }
4820
4789
  // Fallback succeeded - likely guardrails blocked primary
@@ -5059,6 +5028,9 @@ Current user's request: ${currentInput}`;
5059
5028
  provider: providerName,
5060
5029
  usage: streamResult.usage,
5061
5030
  model: streamResult.model || options.model,
5031
+ finishReason: streamResult.finishReason,
5032
+ toolCalls: streamResult.toolCalls ?? [],
5033
+ toolResults: streamResult.toolResults ?? [],
5062
5034
  analytics: streamResult.analytics,
5063
5035
  };
5064
5036
  }
@@ -5601,7 +5573,8 @@ Current user's request: ${currentInput}`;
5601
5573
  // (direct executeTool() or AI SDK generateText() tool calling).
5602
5574
  if (options?.timeout !== undefined &&
5603
5575
  options.timeout > 0 &&
5604
- Number.isFinite(options.timeout)) {
5576
+ Number.isFinite(options.timeout) &&
5577
+ typeof convertedTool.execute === "function") {
5605
5578
  const originalExecute = convertedTool.execute;
5606
5579
  const toolTimeout = options.timeout;
5607
5580
  const toolName = name;
@@ -5623,7 +5596,7 @@ Current user's request: ${currentInput}`;
5623
5596
  new Promise((_, reject) => {
5624
5597
  composedSignal.addEventListener("abort", () => {
5625
5598
  if (timeoutSignal.aborted) {
5626
- reject(new Error(`Tool '${toolName}' timed out after ${toolTimeout}ms (configured at registration)`));
5599
+ reject(ErrorFactory.toolTimeout(toolName, toolTimeout));
5627
5600
  }
5628
5601
  else {
5629
5602
  reject(new DOMException("The operation was aborted", "AbortError"));
@@ -5968,401 +5941,346 @@ Current user's request: ${currentInput}`;
5968
5941
  * @returns Tool execution result
5969
5942
  */
5970
5943
  async executeTool(toolName, params = {}, options) {
5971
- const functionTag = "NeuroLink.executeTool";
5972
- const executionStartTime = Date.now();
5973
- // === MCP ENHANCEMENT: RequestBatcher — batch programmatic tool calls ===
5974
- // LIMITATION: When the request batcher is enabled, per-tool timeout and retry
5975
- // settings (from registration options or call-site options) are NOT applied.
5976
- // The batcher uses its own hardcoded defaults for timeout and retry behavior.
5977
- // Use `bypassBatcher: true` to ensure per-tool timeout/retry is respected.
5978
- // Additionally, note that executeToolInternal's safe-tool retry logic may still
5979
- // trigger even when maxRetries is set to 0, since it operates independently.
5980
5944
  if (this.mcpToolBatcher && !options?.bypassBatcher) {
5981
5945
  return this.mcpToolBatcher.execute(toolName, params);
5982
5946
  }
5983
- // Determine tool type for span attributes
5984
- const externalTools = this.externalServerManager.getAllTools();
5985
- const externalTool = externalTools.find((tool) => tool.name === toolName);
5947
+ const executionContext = this.createToolExecutionContext(toolName, params, options);
5948
+ return tracers.mcp.startActiveSpan("neurolink.tool.execute", {
5949
+ attributes: {
5950
+ "tool.name": toolName,
5951
+ "tool.type": executionContext.toolType,
5952
+ "tool.input_size": executionContext.inputSize,
5953
+ "tool.input_preview": executionContext.truncatedInput,
5954
+ },
5955
+ }, (toolSpan) => this.executeToolWithSpan(toolName, params, options, executionContext, toolSpan));
5956
+ }
5957
+ createToolExecutionContext(toolName, params, options) {
5958
+ const externalTool = this.externalServerManager
5959
+ .getAllTools()
5960
+ .find((tool) => tool.name === toolName);
5986
5961
  const toolType = externalTool
5987
5962
  ? "mcp"
5988
5963
  : this.getCustomTools().has(toolName)
5989
5964
  ? "custom"
5990
5965
  : "external";
5991
- // Compute truncated input size for the span
5992
5966
  const inputStr = typeof params === "string"
5993
5967
  ? params
5994
5968
  : params
5995
5969
  ? JSON.stringify(params)
5996
5970
  : "";
5997
- const inputSize = inputStr.length;
5998
- const truncatedInput = inputStr.length > 2048 ? inputStr.substring(0, 2048) : inputStr;
5999
- return tracers.mcp.startActiveSpan("neurolink.tool.execute", {
6000
- attributes: {
6001
- "tool.name": toolName,
6002
- "tool.type": toolType,
6003
- "tool.input_size": inputSize,
6004
- "tool.input_preview": truncatedInput,
5971
+ return {
5972
+ functionTag: "NeuroLink.executeTool",
5973
+ executionStartTime: Date.now(),
5974
+ externalTool,
5975
+ toolType,
5976
+ inputSize: inputStr.length,
5977
+ truncatedInput: inputStr.length > 2048 ? inputStr.substring(0, 2048) : inputStr,
5978
+ options,
5979
+ };
5980
+ }
5981
+ async executeToolWithSpan(toolName, params, options, executionContext, toolSpan) {
5982
+ try {
5983
+ const prepared = await this.prepareToolExecutionState(toolName, params, options, executionContext);
5984
+ return await this.runPreparedToolExecution(toolName, params, prepared, executionContext, toolSpan);
5985
+ }
5986
+ catch (outerError) {
5987
+ if (!(outerError instanceof NeuroLinkError)) {
5988
+ const errMsg = outerError instanceof Error ? outerError.message : String(outerError);
5989
+ toolSpan.recordException(outerError instanceof Error ? outerError : new Error(errMsg));
5990
+ toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
5991
+ }
5992
+ throw outerError;
5993
+ }
5994
+ finally {
5995
+ toolSpan.end();
5996
+ }
5997
+ }
5998
+ async prepareToolExecutionState(toolName, params, options, executionContext) {
5999
+ logger.debug(`[${executionContext.functionTag}] Tool execution requested:`, {
6000
+ toolName,
6001
+ params: isNonNullObject(params)
6002
+ ? transformParamsForLogging(params)
6003
+ : params,
6004
+ hasExternalManager: !!this.externalServerManager,
6005
+ });
6006
+ logger.debug(`Tool execution detailed analysis`, {
6007
+ toolName,
6008
+ executionStartTime: executionContext.executionStartTime,
6009
+ paramsAnalysis: {
6010
+ type: typeof params,
6011
+ isNull: params === null,
6012
+ isUndefined: params === undefined,
6013
+ isEmpty: params &&
6014
+ typeof params === "object" &&
6015
+ Object.keys(params).length === 0,
6016
+ keys: params && typeof params === "object"
6017
+ ? Object.keys(params)
6018
+ : "NOT_OBJECT",
6019
+ keysLength: params && typeof params === "object"
6020
+ ? Object.keys(params).length
6021
+ : 0,
6005
6022
  },
6006
- }, async (toolSpan) => {
6007
- try {
6008
- // Debug: Log tool execution attempt
6009
- logger.debug(`[${functionTag}] Tool execution requested:`, {
6010
- toolName,
6011
- params: isNonNullObject(params)
6012
- ? transformParamsForLogging(params)
6013
- : params,
6014
- hasExternalManager: !!this.externalServerManager,
6015
- });
6016
- // 🔧 PARAMETER TRACE: Log tool execution details for debugging
6017
- logger.debug(`Tool execution detailed analysis`, {
6018
- toolName,
6019
- executionStartTime,
6020
- paramsAnalysis: {
6021
- type: typeof params,
6022
- isNull: params === null,
6023
- isUndefined: params === undefined,
6024
- isEmpty: params &&
6025
- typeof params === "object" &&
6026
- Object.keys(params).length === 0,
6027
- keys: params && typeof params === "object"
6028
- ? Object.keys(params)
6029
- : "NOT_OBJECT",
6030
- keysLength: params && typeof params === "object"
6031
- ? Object.keys(params).length
6032
- : 0,
6023
+ isTargetTool: toolName === "juspay-analytics_SuccessRateSRByTime",
6024
+ options,
6025
+ hasExternalManager: !!this.externalServerManager,
6026
+ });
6027
+ this.emitter.emit("tool:start", {
6028
+ toolName,
6029
+ timestamp: executionContext.executionStartTime,
6030
+ input: params,
6031
+ });
6032
+ const toolInfo = this.toolRegistry.getToolInfo(toolName);
6033
+ const finalOptions = {
6034
+ timeout: options?.timeout ??
6035
+ toolInfo?.tool?.timeoutMs ??
6036
+ TOOL_TIMEOUTS.EXECUTION_DEFAULT_MS,
6037
+ maxRetries: options?.maxRetries ??
6038
+ toolInfo?.tool?.maxRetries ??
6039
+ RETRY_ATTEMPTS.DEFAULT,
6040
+ retryDelayMs: options?.retryDelayMs || RETRY_DELAYS.BASE_MS,
6041
+ authContext: options?.authContext,
6042
+ disableToolCache: options?.disableToolCache,
6043
+ };
6044
+ const { MemoryManager } = await import("./utils/performance.js");
6045
+ const startMemory = MemoryManager.getMemoryUsageMB();
6046
+ const breakerServerId = executionContext.externalTool?.serverId ||
6047
+ toolInfo?.tool?.serverId ||
6048
+ "unknown";
6049
+ const breakerKey = `${breakerServerId}.${toolName}`;
6050
+ let circuitBreaker = this.toolCircuitBreakers.get(breakerKey);
6051
+ if (!circuitBreaker) {
6052
+ circuitBreaker = new CircuitBreaker(CIRCUIT_BREAKER.FAILURE_THRESHOLD, CIRCUIT_BREAKER_RESET_MS);
6053
+ this.toolCircuitBreakers.set(breakerKey, circuitBreaker);
6054
+ }
6055
+ let metrics = this.toolExecutionMetrics.get(toolName);
6056
+ if (!metrics) {
6057
+ metrics = {
6058
+ totalExecutions: 0,
6059
+ successfulExecutions: 0,
6060
+ failedExecutions: 0,
6061
+ averageExecutionTime: 0,
6062
+ lastExecutionTime: 0,
6063
+ errorCategories: {},
6064
+ };
6065
+ this.toolExecutionMetrics.set(toolName, metrics);
6066
+ }
6067
+ metrics.totalExecutions++;
6068
+ return {
6069
+ finalOptions,
6070
+ startMemory,
6071
+ circuitBreaker,
6072
+ breakerKey,
6073
+ metrics,
6074
+ };
6075
+ }
6076
+ async runPreparedToolExecution(toolName, params, prepared, executionContext, toolSpan) {
6077
+ try {
6078
+ mcpLogger.debug(`[${executionContext.functionTag}] Executing tool: ${toolName}`, {
6079
+ toolName,
6080
+ params,
6081
+ options: prepared.finalOptions,
6082
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6083
+ });
6084
+ const result = await prepared.circuitBreaker.execute(async () => {
6085
+ return withRetry(async () => withTimeout(this.executeToolInternal(toolName, params, prepared.finalOptions), prepared.finalOptions.timeout, ErrorFactory.toolTimeout(toolName, prepared.finalOptions.timeout)), {
6086
+ maxAttempts: prepared.finalOptions.maxRetries + 1,
6087
+ delayMs: prepared.finalOptions.retryDelayMs,
6088
+ isRetriable: isRetriableError,
6089
+ onRetry: (attempt, error) => {
6090
+ mcpLogger.warn(`[${executionContext.functionTag}] Retrying tool execution (attempt ${attempt})`, {
6091
+ toolName,
6092
+ error: error.message,
6093
+ attempt,
6094
+ });
6033
6095
  },
6034
- isTargetTool: toolName === "juspay-analytics_SuccessRateSRByTime",
6035
- options,
6036
- hasExternalManager: !!this.externalServerManager,
6037
6096
  });
6038
- // Emit tool start event (NeuroLink format - keep existing)
6039
- this.emitter.emit("tool:start", {
6040
- toolName,
6041
- timestamp: executionStartTime,
6042
- input: params, // Enhanced: add input parameters
6097
+ });
6098
+ return await this.handleSuccessfulToolExecution(toolName, result, prepared, executionContext, toolSpan);
6099
+ }
6100
+ catch (error) {
6101
+ return this.handleFailedToolExecution(toolName, params, error, prepared, executionContext, toolSpan);
6102
+ }
6103
+ }
6104
+ async handleSuccessfulToolExecution(toolName, result, prepared, executionContext, toolSpan) {
6105
+ const executionTime = Date.now() - executionContext.executionStartTime;
6106
+ prepared.metrics.successfulExecutions++;
6107
+ prepared.metrics.lastExecutionTime = executionTime;
6108
+ prepared.metrics.averageExecutionTime =
6109
+ (prepared.metrics.averageExecutionTime *
6110
+ (prepared.metrics.successfulExecutions - 1) +
6111
+ executionTime) /
6112
+ prepared.metrics.successfulExecutions;
6113
+ const { MemoryManager } = await import("./utils/performance.js");
6114
+ const endMemory = MemoryManager.getMemoryUsageMB();
6115
+ const memoryDelta = endMemory.heapUsed - prepared.startMemory.heapUsed;
6116
+ if (memoryDelta > 20) {
6117
+ mcpLogger.warn(`Tool '${toolName}' used excessive memory: ${memoryDelta}MB`, {
6118
+ toolName,
6119
+ memoryDelta,
6120
+ executionTime,
6121
+ });
6122
+ }
6123
+ mcpLogger.debug(`[${executionContext.functionTag}] Tool executed successfully`, {
6124
+ toolName,
6125
+ executionTime,
6126
+ memoryDelta,
6127
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6128
+ });
6129
+ const resultObj = result && typeof result === "object"
6130
+ ? result
6131
+ : undefined;
6132
+ const isToolError = (resultObj && "isError" in resultObj && resultObj.isError === true) ||
6133
+ (resultObj && "success" in resultObj && resultObj.success === false);
6134
+ if (isToolError) {
6135
+ try {
6136
+ await prepared.circuitBreaker.execute(async () => {
6137
+ throw new Error(`Tool ${toolName} returned isError:true`);
6043
6138
  });
6044
- // NL-004: Use composite key (serverId.toolName) to avoid cross-server collisions
6045
- // Fetch toolInfo early so per-tool timeout is available for finalOptions
6046
- const toolInfo = this.toolRegistry.getToolInfo(toolName);
6047
- // Set default options — per-tool values from registration take precedence over global defaults.
6048
- // When not explicitly set at registration, global defaults are preserved for backward compatibility.
6049
- const registeredTimeout = toolInfo?.tool?.timeoutMs;
6050
- const registeredMaxRetries = toolInfo?.tool?.maxRetries;
6051
- const finalOptions = {
6052
- timeout: options?.timeout ??
6053
- registeredTimeout ??
6054
- TOOL_TIMEOUTS.EXECUTION_DEFAULT_MS,
6055
- maxRetries: options?.maxRetries ??
6056
- registeredMaxRetries ??
6057
- RETRY_ATTEMPTS.DEFAULT,
6058
- retryDelayMs: options?.retryDelayMs || RETRY_DELAYS.BASE_MS,
6059
- authContext: options?.authContext,
6060
- disableToolCache: options?.disableToolCache,
6061
- };
6062
- // Track memory usage for tool execution
6063
- const { MemoryManager } = await import("./utils/performance.js");
6064
- const startMemory = MemoryManager.getMemoryUsageMB();
6065
- const breakerServerId = externalTool?.serverId || toolInfo?.tool?.serverId || "unknown";
6066
- const breakerKey = `${breakerServerId}.${toolName}`;
6067
- // Get or create circuit breaker for this tool
6068
- if (!this.toolCircuitBreakers.has(breakerKey)) {
6069
- this.toolCircuitBreakers.set(breakerKey, new CircuitBreaker(CIRCUIT_BREAKER.FAILURE_THRESHOLD, CIRCUIT_BREAKER_RESET_MS));
6070
- }
6071
- const circuitBreaker = this.toolCircuitBreakers.get(breakerKey);
6072
- // Initialize metrics for this tool if not exists
6073
- if (!this.toolExecutionMetrics.has(toolName)) {
6074
- this.toolExecutionMetrics.set(toolName, {
6075
- totalExecutions: 0,
6076
- successfulExecutions: 0,
6077
- failedExecutions: 0,
6078
- averageExecutionTime: 0,
6079
- lastExecutionTime: 0,
6080
- errorCategories: {},
6081
- });
6082
- }
6083
- const metrics = this.toolExecutionMetrics.get(toolName);
6084
- if (metrics) {
6085
- metrics.totalExecutions++;
6086
- }
6087
- try {
6088
- mcpLogger.debug(`[${functionTag}] Executing tool: ${toolName}`, {
6089
- toolName,
6090
- params,
6091
- options: finalOptions,
6092
- circuitBreakerState: circuitBreaker?.getState(),
6093
- });
6094
- // Execute with circuit breaker, timeout, and retry logic
6095
- if (!circuitBreaker) {
6096
- throw new Error(`Circuit breaker not initialized for tool: ${toolName}`);
6097
- }
6098
- const result = await circuitBreaker.execute(async () => {
6099
- return await withRetry(async () => {
6100
- return await withTimeout(this.executeToolInternal(toolName, params, finalOptions), finalOptions.timeout, ErrorFactory.toolTimeout(toolName, finalOptions.timeout));
6101
- }, {
6102
- maxAttempts: finalOptions.maxRetries + 1, // +1 for initial attempt
6103
- delayMs: finalOptions.retryDelayMs,
6104
- isRetriable: isRetriableError,
6105
- onRetry: (attempt, error) => {
6106
- mcpLogger.warn(`[${functionTag}] Retrying tool execution (attempt ${attempt})`, {
6107
- toolName,
6108
- error: error.message,
6109
- attempt,
6110
- });
6111
- },
6112
- });
6113
- });
6114
- // Update success metrics
6115
- const executionTime = Date.now() - executionStartTime;
6116
- if (metrics) {
6117
- metrics.successfulExecutions++;
6118
- metrics.lastExecutionTime = executionTime;
6119
- metrics.averageExecutionTime =
6120
- (metrics.averageExecutionTime *
6121
- (metrics.successfulExecutions - 1) +
6122
- executionTime) /
6123
- metrics.successfulExecutions;
6124
- }
6125
- // Track memory usage
6126
- const endMemory = MemoryManager.getMemoryUsageMB();
6127
- const memoryDelta = endMemory.heapUsed - startMemory.heapUsed;
6128
- if (memoryDelta > 20) {
6129
- mcpLogger.warn(`Tool '${toolName}' used excessive memory: ${memoryDelta}MB`, {
6130
- toolName,
6131
- memoryDelta,
6132
- executionTime,
6133
- });
6134
- }
6135
- mcpLogger.debug(`[${functionTag}] Tool executed successfully`, {
6136
- toolName,
6137
- executionTime,
6138
- memoryDelta,
6139
- circuitBreakerState: circuitBreaker?.getState(),
6140
- });
6141
- // Set span success attributes
6142
- // Check if result has isError flag (MCP tool error result)
6143
- // Also detect toolRegistry-wrapped errors that return { success: false }
6144
- const resultObj = result && typeof result === "object"
6145
- ? result
6146
- : undefined;
6147
- const isToolError = (resultObj &&
6148
- "isError" in resultObj &&
6149
- resultObj.isError === true) ||
6150
- (resultObj &&
6151
- "success" in resultObj &&
6152
- resultObj.success === false);
6153
- // NL-001: Count isError:true results as circuit breaker failures
6154
- // This ensures tools that return error results (not just thrown errors) are tracked
6155
- // TODO(NL-009): This records a failure AFTER the circuit breaker already recorded
6156
- // success inside `circuitBreaker.execute()`. The correct fix is to check `isToolError`
6157
- // inside the execute callback and throw before returning, so the breaker never sees
6158
- // success. Deferred because moving the check inside the callback requires restructuring
6159
- // the retry/timeout wrapper chain and is high-risk for a hot-path change.
6160
- if (isToolError && circuitBreaker) {
6161
- // Record a failure by executing a rejected promise through the breaker
6162
- try {
6163
- await circuitBreaker.execute(async () => {
6164
- throw new Error(`Tool ${toolName} returned isError:true`);
6165
- });
6166
- }
6167
- catch {
6168
- // Expected — we intentionally triggered the failure recording
6169
- }
6170
- mcpLogger.debug(`[${functionTag}] Circuit breaker failure recorded for isError result`, {
6171
- toolName,
6172
- circuitBreakerState: circuitBreaker.getState(),
6173
- circuitBreakerFailures: circuitBreaker.getFailureCount(),
6174
- });
6175
- }
6176
- // NL-002 + NL-003: Format and capture MCP error results
6177
- if (isToolError) {
6178
- const resultObj = result;
6179
- const contentArr = resultObj.content;
6180
- const errorText = contentArr
6181
- ?.filter((c) => c.type === "text" && c.text)
6182
- .map((c) => c.text)
6183
- .join(" ") ||
6184
- (typeof resultObj.error === "string"
6185
- ? resultObj.error
6186
- : "Unknown error");
6187
- const errorCategory = classifyMcpErrorMessage(errorText);
6188
- const prefix = `[TOOL_ERROR: ${toolName} failed (${errorCategory})] `;
6189
- // NL-002: Clone content array to avoid mutating shared objects, then prefix error
6190
- if (contentArr && Array.isArray(contentArr)) {
6191
- const clonedContent = contentArr.map((c) => ({ ...c }));
6192
- for (const content of clonedContent) {
6193
- if (content.type === "text" && content.text) {
6194
- content.text = prefix + content.text;
6195
- break; // Only prefix the first text content
6196
- }
6197
- }
6198
- resultObj.content = clonedContent;
6199
- }
6200
- // NL-003: Capture error details in span attributes for telemetry
6201
- toolSpan.setAttribute("tool.error.message", errorText.substring(0, 500));
6202
- toolSpan.setAttribute("tool.error.category", errorCategory);
6203
- toolSpan.setStatus({
6204
- code: SpanStatusCode.ERROR,
6205
- message: `MCP tool returned isError: ${errorText.substring(0, 200)}`,
6206
- });
6207
- if (metrics) {
6208
- metrics.failedExecutions++;
6209
- const prevSuccessful = metrics.successfulExecutions;
6210
- metrics.successfulExecutions = Math.max(0, metrics.successfulExecutions - 1);
6211
- // Recompute averageExecutionTime: back out this execution's duration
6212
- // which was incorrectly included as a success
6213
- if (prevSuccessful > 1) {
6214
- metrics.averageExecutionTime =
6215
- (metrics.averageExecutionTime * prevSuccessful -
6216
- executionTime) /
6217
- (prevSuccessful - 1);
6218
- }
6219
- else {
6220
- // No remaining successful executions, reset to 0
6221
- metrics.averageExecutionTime = 0;
6222
- }
6223
- const mappedCategory = mcpCategoryToErrorCategory(errorCategory);
6224
- metrics.errorCategories[mappedCategory] =
6225
- (metrics.errorCategories[mappedCategory] || 0) + 1;
6226
- }
6227
- }
6228
- // Emit tool end event AFTER isError check so success flag is correct
6229
- this.emitToolEndEvent(toolName, executionStartTime, !isToolError, result);
6230
- toolSpan.setAttribute("tool.result.status", isToolError ? "error" : "success");
6231
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6232
- return result;
6233
- }
6234
- catch (error) {
6235
- // Update failure metrics
6236
- if (metrics) {
6237
- metrics.failedExecutions++;
6238
- }
6239
- const executionTime = Date.now() - executionStartTime;
6240
- // Circuit breaker open: return a structured non-retryable isError result
6241
- // so the AI model understands the tool is temporarily unavailable.
6242
- // Log at warn (not error) since this is expected circuit breaker behavior.
6243
- if (error instanceof CircuitBreakerOpenError) {
6244
- mcpLogger.warn(`[${functionTag}] Tool blocked by circuit breaker: ${toolName}`, {
6245
- toolName,
6246
- breakerState: error.breakerState,
6247
- retryAfter: error.retryAfter,
6248
- retryAfterMs: error.retryAfterMs,
6249
- failureCount: error.failureCount,
6250
- executionTime,
6251
- });
6252
- if (metrics) {
6253
- const category = ErrorCategory.EXECUTION;
6254
- metrics.errorCategories[category] =
6255
- (metrics.errorCategories[category] || 0) + 1;
6256
- }
6257
- // Emit tool end event for circuit breaker open
6258
- this.emitToolEndEvent(toolName, executionStartTime, false, undefined);
6259
- toolSpan.setAttribute("tool.result.status", "circuit_breaker_open");
6260
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6261
- toolSpan.setAttribute("tool.circuit_breaker.state", error.breakerState);
6262
- toolSpan.setAttribute("tool.circuit_breaker.retry_after_ms", error.retryAfterMs);
6263
- toolSpan.setAttribute("tool.circuit_breaker.failure_count", error.failureCount);
6264
- toolSpan.setStatus({
6265
- code: SpanStatusCode.ERROR,
6266
- message: `Circuit breaker open for ${toolName}: ${error.message}`,
6267
- });
6268
- // Return an isError tool result so the AI can inform the user
6269
- // instead of throwing, which would cause a generic retry
6270
- return {
6271
- isError: true,
6272
- content: [
6273
- {
6274
- type: "text",
6275
- text: `TOOL TEMPORARILY UNAVAILABLE: "${toolName}" has been disabled after ` +
6276
- `${error.failureCount} failures. ` +
6277
- `This is a circuit breaker protection — do NOT retry this tool. ` +
6278
- `It will become available again after ${Math.ceil(error.retryAfterMs / 1000)} seconds ` +
6279
- `(at ${error.retryAfter}). ` +
6280
- `Instead, inform the user that the operation failed and suggest trying again later.`,
6281
- },
6282
- ],
6283
- };
6284
- }
6285
- // Create structured error
6286
- let structuredError;
6287
- if (error instanceof NeuroLinkError) {
6288
- structuredError = error;
6289
- }
6290
- else if (error instanceof Error) {
6291
- // Categorize the error based on the message
6292
- if (error.message.includes("timeout")) {
6293
- structuredError = ErrorFactory.toolTimeout(toolName, finalOptions.timeout);
6294
- }
6295
- else if (error.message.includes("not found")) {
6296
- const availableTools = await this.getAllAvailableTools();
6297
- structuredError = ErrorFactory.toolNotFound(toolName, extractToolNames(availableTools.map((t) => ({ name: t.name }))));
6298
- }
6299
- else if (error.message.includes("validation") ||
6300
- error.message.includes("parameter")) {
6301
- structuredError = ErrorFactory.invalidParameters(toolName, error, params);
6302
- }
6303
- else if (error.message.includes("network") ||
6304
- error.message.includes("connection")) {
6305
- structuredError = ErrorFactory.networkError(toolName, error);
6306
- }
6307
- else {
6308
- structuredError = ErrorFactory.toolExecutionFailed(toolName, error);
6309
- }
6310
- }
6311
- else {
6312
- structuredError = ErrorFactory.toolExecutionFailed(toolName, new Error(String(error)));
6313
- }
6314
- if (metrics) {
6315
- const category = structuredError.category || ErrorCategory.EXECUTION;
6316
- metrics.errorCategories[category] =
6317
- (metrics.errorCategories[category] || 0) + 1;
6139
+ }
6140
+ catch {
6141
+ // Expected intentionally records the failure
6142
+ }
6143
+ mcpLogger.debug(`[${executionContext.functionTag}] Circuit breaker failure recorded for isError result`, {
6144
+ toolName,
6145
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6146
+ circuitBreakerFailures: prepared.circuitBreaker.getFailureCount(),
6147
+ });
6148
+ const contentArr = resultObj?.content;
6149
+ const errorText = contentArr
6150
+ ?.filter((content) => content.type === "text" && content.text)
6151
+ .map((content) => content.text)
6152
+ .join(" ") ||
6153
+ (typeof resultObj?.error === "string"
6154
+ ? resultObj.error
6155
+ : "Unknown error");
6156
+ const errorCategory = classifyMcpErrorMessage(errorText);
6157
+ const prefix = `[TOOL_ERROR: ${toolName} failed (${errorCategory})] `;
6158
+ if (resultObj && Array.isArray(contentArr)) {
6159
+ const clonedContent = contentArr.map((content) => ({ ...content }));
6160
+ for (const content of clonedContent) {
6161
+ if (content.type === "text" && content.text) {
6162
+ content.text = prefix + content.text;
6163
+ break;
6318
6164
  }
6319
- // Emit tool end event BEFORE the error event.
6320
- // Node.js EventEmitter throws on unhandled 'error' events,
6321
- // which would prevent tool:end from being emitted.
6322
- this.emitToolEndEvent(toolName, executionStartTime, false, undefined, structuredError);
6323
- // Centralized error event emission
6324
- this.emitter.emit("error", structuredError);
6325
- // Add execution context to structured error
6326
- structuredError = new NeuroLinkError({
6327
- ...structuredError,
6328
- context: {
6329
- ...structuredError.context,
6330
- executionTime,
6331
- params,
6332
- options: finalOptions,
6333
- circuitBreakerState: circuitBreaker?.getState(),
6334
- circuitBreakerFailures: circuitBreaker?.getFailureCount(),
6335
- metrics: { ...metrics },
6336
- },
6337
- });
6338
- // Log structured error
6339
- logStructuredError(structuredError);
6340
- // Record error on span
6341
- toolSpan.setAttribute("tool.result.status", "error");
6342
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6343
- toolSpan.recordException(structuredError);
6344
- toolSpan.setStatus({
6345
- code: SpanStatusCode.ERROR,
6346
- message: structuredError.message,
6347
- });
6348
- throw structuredError;
6349
6165
  }
6166
+ resultObj.content = clonedContent;
6350
6167
  }
6351
- catch (outerError) {
6352
- // If the error was not already recorded on the span (from inner catch), record it
6353
- if (!(outerError instanceof NeuroLinkError)) {
6354
- const errMsg = outerError instanceof Error
6355
- ? outerError.message
6356
- : String(outerError);
6357
- toolSpan.recordException(outerError instanceof Error ? outerError : new Error(errMsg));
6358
- toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
6359
- }
6360
- throw outerError;
6168
+ toolSpan.setAttribute("tool.error.message", errorText.substring(0, 500));
6169
+ toolSpan.setAttribute("tool.error.category", errorCategory);
6170
+ toolSpan.setStatus({
6171
+ code: SpanStatusCode.ERROR,
6172
+ message: `MCP tool returned isError: ${errorText.substring(0, 200)}`,
6173
+ });
6174
+ prepared.metrics.failedExecutions++;
6175
+ const prevSuccessful = prepared.metrics.successfulExecutions;
6176
+ prepared.metrics.successfulExecutions = Math.max(0, prepared.metrics.successfulExecutions - 1);
6177
+ prepared.metrics.averageExecutionTime =
6178
+ prevSuccessful > 1
6179
+ ? (prepared.metrics.averageExecutionTime * prevSuccessful -
6180
+ executionTime) /
6181
+ (prevSuccessful - 1)
6182
+ : 0;
6183
+ const mappedCategory = mcpCategoryToErrorCategory(errorCategory);
6184
+ prepared.metrics.errorCategories[mappedCategory] =
6185
+ (prepared.metrics.errorCategories[mappedCategory] || 0) + 1;
6186
+ }
6187
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, !isToolError, result);
6188
+ toolSpan.setAttribute("tool.result.status", isToolError ? "error" : "success");
6189
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6190
+ return result;
6191
+ }
6192
+ async handleFailedToolExecution(toolName, params, error, prepared, executionContext, toolSpan) {
6193
+ prepared.metrics.failedExecutions++;
6194
+ const executionTime = Date.now() - executionContext.executionStartTime;
6195
+ if (error instanceof CircuitBreakerOpenError) {
6196
+ mcpLogger.warn(`[${executionContext.functionTag}] Tool blocked by circuit breaker: ${toolName}`, {
6197
+ toolName,
6198
+ breakerState: error.breakerState,
6199
+ retryAfter: error.retryAfter,
6200
+ retryAfterMs: error.retryAfterMs,
6201
+ failureCount: error.failureCount,
6202
+ executionTime,
6203
+ });
6204
+ prepared.metrics.errorCategories[ErrorCategory.EXECUTION] =
6205
+ (prepared.metrics.errorCategories[ErrorCategory.EXECUTION] || 0) + 1;
6206
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, false, undefined);
6207
+ toolSpan.setAttribute("tool.result.status", "circuit_breaker_open");
6208
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6209
+ toolSpan.setAttribute("tool.circuit_breaker.state", error.breakerState);
6210
+ toolSpan.setAttribute("tool.circuit_breaker.retry_after_ms", error.retryAfterMs);
6211
+ toolSpan.setAttribute("tool.circuit_breaker.failure_count", error.failureCount);
6212
+ toolSpan.setStatus({
6213
+ code: SpanStatusCode.ERROR,
6214
+ message: `Circuit breaker open for ${toolName}: ${error.message}`,
6215
+ });
6216
+ return {
6217
+ isError: true,
6218
+ content: [
6219
+ {
6220
+ type: "text",
6221
+ text: `TOOL TEMPORARILY UNAVAILABLE: "${toolName}" has been disabled after ` +
6222
+ `${error.failureCount} failures. ` +
6223
+ `This is a circuit breaker protection — do NOT retry this tool. ` +
6224
+ `It will become available again after ${Math.ceil(error.retryAfterMs / 1000)} seconds ` +
6225
+ `(at ${error.retryAfter}). ` +
6226
+ `Instead, inform the user that the operation failed and suggest trying again later.`,
6227
+ },
6228
+ ],
6229
+ };
6230
+ }
6231
+ let structuredError;
6232
+ if (error instanceof NeuroLinkError) {
6233
+ structuredError = error;
6234
+ }
6235
+ else if (error instanceof Error) {
6236
+ if (error.message.includes("timeout")) {
6237
+ structuredError = ErrorFactory.toolTimeout(toolName, prepared.finalOptions.timeout);
6361
6238
  }
6362
- finally {
6363
- toolSpan.end();
6239
+ else if (error.message.includes("not found")) {
6240
+ const availableTools = await this.getAllAvailableTools();
6241
+ structuredError = ErrorFactory.toolNotFound(toolName, extractToolNames(availableTools.map((tool) => ({ name: tool.name }))));
6242
+ }
6243
+ else if (error.message.includes("validation") ||
6244
+ error.message.includes("parameter")) {
6245
+ structuredError = ErrorFactory.invalidParameters(toolName, error, params);
6364
6246
  }
6247
+ else if (error.message.includes("network") ||
6248
+ error.message.includes("connection")) {
6249
+ structuredError = ErrorFactory.networkError(toolName, error);
6250
+ }
6251
+ else {
6252
+ structuredError = ErrorFactory.toolExecutionFailed(toolName, error);
6253
+ }
6254
+ }
6255
+ else {
6256
+ structuredError = ErrorFactory.toolExecutionFailed(toolName, new Error(String(error)));
6257
+ }
6258
+ const category = structuredError.category || ErrorCategory.EXECUTION;
6259
+ prepared.metrics.errorCategories[category] =
6260
+ (prepared.metrics.errorCategories[category] || 0) + 1;
6261
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, false, undefined, structuredError);
6262
+ this.emitter.emit("error", structuredError);
6263
+ structuredError = new NeuroLinkError({
6264
+ ...structuredError,
6265
+ context: {
6266
+ ...structuredError.context,
6267
+ executionTime,
6268
+ params,
6269
+ options: prepared.finalOptions,
6270
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6271
+ circuitBreakerFailures: prepared.circuitBreaker.getFailureCount(),
6272
+ metrics: { ...prepared.metrics },
6273
+ },
6365
6274
  });
6275
+ logStructuredError(structuredError);
6276
+ toolSpan.setAttribute("tool.result.status", "error");
6277
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6278
+ toolSpan.recordException(structuredError);
6279
+ toolSpan.setStatus({
6280
+ code: SpanStatusCode.ERROR,
6281
+ message: structuredError.message,
6282
+ });
6283
+ throw structuredError;
6366
6284
  }
6367
6285
  /**
6368
6286
  * Internal tool execution method with MCP enhancements wired in:
@@ -6379,9 +6297,17 @@ Current user's request: ${currentInput}`;
6379
6297
  !options.disableToolCache &&
6380
6298
  !this._disableToolCacheForCurrentRequest &&
6381
6299
  !toolAnnotations?.destructiveHint;
6300
+ const toolResultCache = this.mcpToolResultCache;
6382
6301
  // === MCP ENHANCEMENT: Cache check (before execution) ===
6383
- if (isCacheEnabled) {
6384
- const cached = this.mcpToolResultCache.getCachedResult(toolName, params);
6302
+ // Scope cache key by auth context to prevent cross-user cache leaks
6303
+ const cacheParams = options.authContext || this.toolExecutionContext
6304
+ ? {
6305
+ __args: params,
6306
+ __ctx: options.authContext ?? this.toolExecutionContext,
6307
+ }
6308
+ : params;
6309
+ if (isCacheEnabled && toolResultCache) {
6310
+ const cached = toolResultCache.getCachedResult(toolName, cacheParams);
6385
6311
  if (cached !== undefined) {
6386
6312
  logger.debug(`[${functionTag}] Cache HIT for tool: ${toolName}`);
6387
6313
  return cached;
@@ -6515,8 +6441,8 @@ Current user's request: ${currentInput}`;
6515
6441
  try {
6516
6442
  const result = await executeWithMiddleware(executeCore);
6517
6443
  // === MCP ENHANCEMENT: Cache store (after successful execution) ===
6518
- if (isCacheEnabled && result !== undefined) {
6519
- this.mcpToolResultCache.cacheResult(toolName, params, result);
6444
+ if (isCacheEnabled && toolResultCache && result !== undefined) {
6445
+ toolResultCache.cacheResult(toolName, cacheParams, result);
6520
6446
  logger.debug(`[${functionTag}] Cached result for tool: ${toolName}`);
6521
6447
  }
6522
6448
  return result;
@@ -6539,8 +6465,8 @@ Current user's request: ${currentInput}`;
6539
6465
  try {
6540
6466
  const retryResult = await executeWithMiddleware(executeCore);
6541
6467
  // Cache the retry result
6542
- if (isCacheEnabled && retryResult !== undefined) {
6543
- this.mcpToolResultCache.cacheResult(toolName, params, retryResult);
6468
+ if (isCacheEnabled && toolResultCache && retryResult !== undefined) {
6469
+ toolResultCache.cacheResult(toolName, cacheParams, retryResult);
6544
6470
  }
6545
6471
  return retryResult;
6546
6472
  }
@@ -8641,28 +8567,33 @@ Current user's request: ${currentInput}`;
8641
8567
  async setAuthProvider(config) {
8642
8568
  // Clear any pending lazy-init promise so it does not race with this call.
8643
8569
  this.authInitPromise = undefined;
8570
+ await this.initializeAuthProviderFromConfig(config);
8571
+ }
8572
+ async initializeAuthProviderFromConfig(config) {
8573
+ let provider;
8574
+ let providerType;
8644
8575
  // Duck-type check: direct MastraAuthProvider instance
8645
8576
  if ("authenticateToken" in config &&
8646
8577
  typeof config.authenticateToken === "function") {
8647
- this.authProvider = config;
8648
- logger.info(`Auth provider set: ${this.authProvider.type}`);
8578
+ provider = config;
8579
+ providerType = provider.type;
8649
8580
  }
8650
8581
  else if ("provider" in config) {
8651
- this.authProvider = config.provider;
8652
- logger.info(`Auth provider set: ${this.authProvider.type}`);
8582
+ provider = config.provider;
8583
+ providerType = provider.type;
8653
8584
  }
8654
8585
  else {
8655
8586
  const typedConfig = config;
8656
8587
  const { AuthProviderFactory } = await import("./auth/AuthProviderFactory.js");
8657
- this.authProvider = await AuthProviderFactory.createProvider(typedConfig.type, typedConfig.config);
8658
- logger.info(`Auth provider created and set: ${typedConfig.type}`);
8659
- }
8660
- if (this.authProvider) {
8661
- this.emitter.emit("auth:provider:set", {
8662
- type: this.authProvider.type,
8663
- timestamp: Date.now(),
8664
- });
8588
+ provider = await AuthProviderFactory.createProvider(typedConfig.type, typedConfig.config);
8589
+ providerType = typedConfig.type;
8665
8590
  }
8591
+ this.authProvider = provider;
8592
+ logger.info(`Auth provider set: ${providerType}`);
8593
+ this.emitter.emit("auth:provider:set", {
8594
+ type: provider.type,
8595
+ timestamp: Date.now(),
8596
+ });
8666
8597
  }
8667
8598
  /**
8668
8599
  * Get the currently configured authentication provider
@@ -8679,14 +8610,18 @@ Current user's request: ${currentInput}`;
8679
8610
  if (this.authProvider || !this.pendingAuthConfig) {
8680
8611
  return;
8681
8612
  }
8613
+ const pendingAuthConfig = this.pendingAuthConfig;
8682
8614
  this.authInitPromise ??= (async () => {
8683
8615
  try {
8684
- await this.setAuthProvider(this.pendingAuthConfig);
8616
+ await this.initializeAuthProviderFromConfig(pendingAuthConfig);
8685
8617
  this.pendingAuthConfig = undefined;
8686
8618
  }
8687
- catch (err) {
8688
- this.authInitPromise = undefined;
8689
- throw err;
8619
+ finally {
8620
+ if (this.authInitPromise &&
8621
+ (this.pendingAuthConfig === undefined ||
8622
+ this.pendingAuthConfig === pendingAuthConfig)) {
8623
+ this.authInitPromise = undefined;
8624
+ }
8690
8625
  }
8691
8626
  })();
8692
8627
  await this.authInitPromise;