@juspay/neurolink 9.42.0 → 9.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/dist/auth/anthropicOAuth.js +12 -0
  3. package/dist/browser/neurolink.min.js +335 -334
  4. package/dist/cli/commands/mcp.d.ts +6 -0
  5. package/dist/cli/commands/mcp.js +200 -184
  6. package/dist/cli/commands/proxy.js +560 -518
  7. package/dist/core/baseProvider.d.ts +6 -1
  8. package/dist/core/baseProvider.js +219 -232
  9. package/dist/core/factory.d.ts +3 -0
  10. package/dist/core/factory.js +140 -190
  11. package/dist/core/modules/ToolsManager.d.ts +1 -0
  12. package/dist/core/modules/ToolsManager.js +40 -42
  13. package/dist/core/toolEvents.d.ts +3 -0
  14. package/dist/core/toolEvents.js +7 -0
  15. package/dist/evaluation/pipeline/evaluationPipeline.js +5 -2
  16. package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
  17. package/dist/evaluation/scorers/scorerRegistry.js +356 -284
  18. package/dist/lib/auth/anthropicOAuth.js +12 -0
  19. package/dist/lib/core/baseProvider.d.ts +6 -1
  20. package/dist/lib/core/baseProvider.js +219 -232
  21. package/dist/lib/core/factory.d.ts +3 -0
  22. package/dist/lib/core/factory.js +140 -190
  23. package/dist/lib/core/modules/ToolsManager.d.ts +1 -0
  24. package/dist/lib/core/modules/ToolsManager.js +40 -42
  25. package/dist/lib/core/toolEvents.d.ts +3 -0
  26. package/dist/lib/core/toolEvents.js +8 -0
  27. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +5 -2
  28. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
  29. package/dist/lib/evaluation/scorers/scorerRegistry.js +356 -284
  30. package/dist/lib/mcp/toolRegistry.d.ts +2 -0
  31. package/dist/lib/mcp/toolRegistry.js +32 -31
  32. package/dist/lib/neurolink.d.ts +38 -0
  33. package/dist/lib/neurolink.js +1890 -1707
  34. package/dist/lib/providers/googleAiStudio.js +0 -5
  35. package/dist/lib/providers/googleNativeGemini3.d.ts +4 -0
  36. package/dist/lib/providers/googleNativeGemini3.js +39 -1
  37. package/dist/lib/providers/googleVertex.d.ts +10 -0
  38. package/dist/lib/providers/googleVertex.js +445 -445
  39. package/dist/lib/providers/litellm.d.ts +1 -0
  40. package/dist/lib/providers/litellm.js +73 -64
  41. package/dist/lib/providers/ollama.js +17 -4
  42. package/dist/lib/providers/openAI.d.ts +2 -0
  43. package/dist/lib/providers/openAI.js +139 -140
  44. package/dist/lib/proxy/claudeFormat.js +14 -5
  45. package/dist/lib/proxy/oauthFetch.js +298 -318
  46. package/dist/lib/proxy/proxyConfig.js +3 -1
  47. package/dist/lib/proxy/proxyFetch.js +250 -222
  48. package/dist/lib/proxy/proxyHealth.d.ts +17 -0
  49. package/dist/lib/proxy/proxyHealth.js +55 -0
  50. package/dist/lib/proxy/requestLogger.js +140 -48
  51. package/dist/lib/proxy/routingPolicy.d.ts +33 -0
  52. package/dist/lib/proxy/routingPolicy.js +255 -0
  53. package/dist/lib/proxy/snapshotPersistence.d.ts +2 -0
  54. package/dist/lib/proxy/snapshotPersistence.js +41 -0
  55. package/dist/lib/proxy/sseInterceptor.js +36 -11
  56. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +2 -1
  57. package/dist/lib/server/routes/claudeProxyRoutes.js +2916 -2377
  58. package/dist/lib/services/server/ai/observability/instrumentation.js +194 -218
  59. package/dist/lib/tasks/backends/bullmqBackend.js +24 -18
  60. package/dist/lib/tasks/store/redisTaskStore.js +42 -17
  61. package/dist/lib/tasks/taskManager.d.ts +2 -0
  62. package/dist/lib/tasks/taskManager.js +100 -5
  63. package/dist/lib/telemetry/telemetryService.js +9 -5
  64. package/dist/lib/types/cli.d.ts +4 -0
  65. package/dist/lib/types/proxyTypes.d.ts +211 -1
  66. package/dist/lib/types/tools.d.ts +18 -0
  67. package/dist/lib/utils/providerHealth.d.ts +1 -0
  68. package/dist/lib/utils/providerHealth.js +46 -31
  69. package/dist/lib/utils/providerUtils.js +11 -22
  70. package/dist/lib/utils/schemaConversion.d.ts +1 -0
  71. package/dist/lib/utils/schemaConversion.js +3 -0
  72. package/dist/mcp/toolRegistry.d.ts +2 -0
  73. package/dist/mcp/toolRegistry.js +32 -31
  74. package/dist/neurolink.d.ts +38 -0
  75. package/dist/neurolink.js +1890 -1707
  76. package/dist/providers/googleAiStudio.js +0 -5
  77. package/dist/providers/googleNativeGemini3.d.ts +4 -0
  78. package/dist/providers/googleNativeGemini3.js +39 -1
  79. package/dist/providers/googleVertex.d.ts +10 -0
  80. package/dist/providers/googleVertex.js +445 -445
  81. package/dist/providers/litellm.d.ts +1 -0
  82. package/dist/providers/litellm.js +73 -64
  83. package/dist/providers/ollama.js +17 -4
  84. package/dist/providers/openAI.d.ts +2 -0
  85. package/dist/providers/openAI.js +139 -140
  86. package/dist/proxy/claudeFormat.js +14 -5
  87. package/dist/proxy/oauthFetch.js +298 -318
  88. package/dist/proxy/proxyConfig.js +3 -1
  89. package/dist/proxy/proxyFetch.js +250 -222
  90. package/dist/proxy/proxyHealth.d.ts +17 -0
  91. package/dist/proxy/proxyHealth.js +54 -0
  92. package/dist/proxy/requestLogger.js +140 -48
  93. package/dist/proxy/routingPolicy.d.ts +33 -0
  94. package/dist/proxy/routingPolicy.js +254 -0
  95. package/dist/proxy/snapshotPersistence.d.ts +2 -0
  96. package/dist/proxy/snapshotPersistence.js +40 -0
  97. package/dist/proxy/sseInterceptor.js +36 -11
  98. package/dist/server/routes/claudeProxyRoutes.d.ts +2 -1
  99. package/dist/server/routes/claudeProxyRoutes.js +2916 -2377
  100. package/dist/services/server/ai/observability/instrumentation.js +194 -218
  101. package/dist/tasks/backends/bullmqBackend.js +24 -18
  102. package/dist/tasks/store/redisTaskStore.js +42 -17
  103. package/dist/tasks/taskManager.d.ts +2 -0
  104. package/dist/tasks/taskManager.js +100 -5
  105. package/dist/telemetry/telemetryService.js +9 -5
  106. package/dist/types/cli.d.ts +4 -0
  107. package/dist/types/proxyTypes.d.ts +211 -1
  108. package/dist/types/tools.d.ts +18 -0
  109. package/dist/utils/providerHealth.d.ts +1 -0
  110. package/dist/utils/providerHealth.js +46 -31
  111. package/dist/utils/providerUtils.js +12 -22
  112. package/dist/utils/schemaConversion.d.ts +1 -0
  113. package/dist/utils/schemaConversion.js +3 -0
  114. package/package.json +3 -2
  115. package/scripts/observability/check-proxy-telemetry.mjs +1 -1
  116. package/scripts/observability/manage-local-openobserve.sh +36 -5
@@ -22,12 +22,13 @@ import pLimit from "p-limit";
22
22
  import { ErrorCategory, ErrorSeverity } from "./constants/enums.js";
23
23
  import { CIRCUIT_BREAKER, CIRCUIT_BREAKER_RESET_MS, MEMORY_THRESHOLDS, NANOSECOND_TO_MS_DIVISOR, PERFORMANCE_THRESHOLDS, PROVIDER_TIMEOUTS, RETRY_ATTEMPTS, RETRY_DELAYS, TOOL_TIMEOUTS, } from "./constants/index.js";
24
24
  import { checkContextBudget } from "./context/budgetChecker.js";
25
- import { ContextCompactor } from "./context/contextCompactor.js";
25
+ import { ContextCompactor, } from "./context/contextCompactor.js";
26
26
  import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
27
27
  import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
28
28
  import { ContextBudgetExceededError } from "./context/errors.js";
29
29
  import { repairToolPairs } from "./context/toolPairRepair.js";
30
30
  import { SYSTEM_LIMITS } from "./core/constants.js";
31
+ import { createToolEventPayload } from "./core/toolEvents.js";
31
32
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
32
33
  import { AIProviderFactory } from "./core/factory.js";
33
34
  import { ProviderRegistry } from "./factories/providerRegistry.js";
@@ -44,9 +45,9 @@ import { ToolRouter } from "./mcp/routing/index.js";
44
45
  import { directToolsServer } from "./mcp/servers/agent/directToolsServer.js";
45
46
  import { inferAnnotations, isSafeToRetry } from "./mcp/toolAnnotations.js";
46
47
  import { MCPToolRegistry } from "./mcp/toolRegistry.js";
47
- import { initializeHippocampus } from "./memory/hippocampusInitializer.js";
48
+ import { initializeHippocampus, } from "./memory/hippocampusInitializer.js";
48
49
  import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
49
- import { getMetricsAggregator, MetricsAggregator } from "./observability/metricsAggregator.js";
50
+ import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
50
51
  import { SpanStatus, SpanType } from "./observability/types/spanTypes.js";
51
52
  import { SpanSerializer } from "./observability/utils/spanSerializer.js";
52
53
  import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
@@ -56,14 +57,14 @@ import { ATTR } from "./telemetry/attributes.js";
56
57
  import { tracers } from "./telemetry/tracers.js";
57
58
  import { CircuitBreakerOpenError } from "./types/circuitBreakerErrors.js";
58
59
  import { ConversationMemoryError } from "./types/conversation.js";
59
- import { AuthenticationError, AuthorizationError, InvalidModelError } from "./types/errors.js";
60
- import { getConversationMessages, storeConversationTurn } from "./utils/conversationMemory.js";
60
+ import { AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/errors.js";
61
+ import { getConversationMessages, storeConversationTurn, } from "./utils/conversationMemory.js";
61
62
  // Enhanced error handling imports
62
63
  import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableError, logStructuredError, NeuroLinkError, withRetry, withTimeout, } from "./utils/errorHandling.js";
63
64
  // Factory processing imports
64
65
  import { createCleanStreamOptions, enhanceTextGenerationOptions, processFactoryOptions, processStreamingFactoryOptions, validateFactoryConfig, } from "./utils/factoryProcessing.js";
65
66
  import { logger, mcpLogger } from "./utils/logger.js";
66
- import { createCustomToolServerInfo, detectCategory } from "./utils/mcpDefaults.js";
67
+ import { createCustomToolServerInfo, detectCategory, } from "./utils/mcpDefaults.js";
67
68
  import { resolveModel } from "./utils/modelAliasResolver.js";
68
69
  // Import orchestration components
69
70
  import { ModelRouter } from "./utils/modelRouter.js";
@@ -97,7 +98,9 @@ function classifyMcpErrorMessage(text) {
97
98
  lower.includes("access denied")) {
98
99
  return "permission_denied";
99
100
  }
100
- if (lower.includes("timeout") || lower.includes("timed out") || lower.includes("deadline exceeded")) {
101
+ if (lower.includes("timeout") ||
102
+ lower.includes("timed out") ||
103
+ lower.includes("deadline exceeded")) {
101
104
  return "timeout";
102
105
  }
103
106
  if (lower.includes("rate limit") ||
@@ -154,7 +157,11 @@ function isNonRetryableProviderError(error) {
154
157
  // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
155
158
  if (error && typeof error === "object") {
156
159
  const err = error;
157
- const status = typeof err.status === "number" ? err.status : typeof err.statusCode === "number" ? err.statusCode : undefined;
160
+ const status = typeof err.status === "number"
161
+ ? err.status
162
+ : typeof err.statusCode === "number"
163
+ ? err.statusCode
164
+ : undefined;
158
165
  if (status && NON_RETRYABLE_HTTP_STATUS_CODES.includes(status)) {
159
166
  return true;
160
167
  }
@@ -200,7 +207,8 @@ export class NeuroLink {
200
207
  lastCompactionMessageCount = new Map();
201
208
  /** Extract sessionId from options context for compaction watermark keying */
202
209
  getCompactionSessionId(options) {
203
- return options.context?.sessionId || "__default__";
210
+ return (options.context
211
+ ?.sessionId || "__default__");
204
212
  }
205
213
  // MCP Enhancement modules - wired into core execution path
206
214
  mcpToolResultCache;
@@ -229,14 +237,13 @@ export class NeuroLink {
229
237
  // Emit tool end event (NeuroLink format - enhanced with result/error)
230
238
  // Serialize error to string for consumer compatibility (event listeners
231
239
  // commonly check `typeof event.error === "string"`).
232
- this.emitter.emit("tool:end", {
233
- toolName,
240
+ this.emitter.emit("tool:end", createToolEventPayload(toolName, {
234
241
  responseTime: Date.now() - startTime,
235
242
  success,
236
243
  timestamp: Date.now(),
237
- result: result, // Enhanced: include actual result
238
- error: error ? error.message : undefined, // Emit as string, not Error object
239
- });
244
+ result,
245
+ error: error ? error.message : undefined,
246
+ }));
240
247
  }
241
248
  // Conversation memory support
242
249
  conversationMemory;
@@ -263,19 +270,28 @@ export class NeuroLink {
263
270
  * Extract and set Langfuse context from options with proper async scoping
264
271
  */
265
272
  async setLangfuseContextFromOptions(options, callback) {
266
- if (options.context && typeof options.context === "object" && options.context !== null) {
273
+ if (options.context &&
274
+ typeof options.context === "object" &&
275
+ options.context !== null) {
267
276
  let callbackExecuted = false;
268
277
  try {
269
278
  const ctx = options.context;
270
279
  // Trigger context scoping if any meaningful Langfuse field is present
271
- if (ctx.userId || ctx.sessionId || ctx.conversationId || ctx.requestId || ctx.traceName || ctx.metadata) {
280
+ if (ctx.userId ||
281
+ ctx.sessionId ||
282
+ ctx.conversationId ||
283
+ ctx.requestId ||
284
+ ctx.traceName ||
285
+ ctx.metadata) {
272
286
  // Build customAttributes from top-level metadata string/number/boolean fields
273
287
  let customAttributes;
274
288
  if (ctx.metadata && typeof ctx.metadata === "object") {
275
289
  const metaObj = ctx.metadata;
276
290
  const attrs = {};
277
291
  for (const [k, v] of Object.entries(metaObj)) {
278
- if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
292
+ if (typeof v === "string" ||
293
+ typeof v === "number" ||
294
+ typeof v === "boolean") {
279
295
  attrs[k] = v;
280
296
  }
281
297
  }
@@ -287,10 +303,14 @@ export class NeuroLink {
287
303
  setLangfuseContext({
288
304
  userId: typeof ctx.userId === "string" ? ctx.userId : null,
289
305
  sessionId: typeof ctx.sessionId === "string" ? ctx.sessionId : null,
290
- conversationId: typeof ctx.conversationId === "string" ? ctx.conversationId : null,
306
+ conversationId: typeof ctx.conversationId === "string"
307
+ ? ctx.conversationId
308
+ : null,
291
309
  requestId: typeof ctx.requestId === "string" ? ctx.requestId : null,
292
310
  traceName: typeof ctx.traceName === "string" ? ctx.traceName : null,
293
- metadata: ctx.metadata && typeof ctx.metadata === "object" ? ctx.metadata : null,
311
+ metadata: ctx.metadata && typeof ctx.metadata === "object"
312
+ ? ctx.metadata
313
+ : null,
294
314
  ...(customAttributes !== undefined && { customAttributes }),
295
315
  }, async () => {
296
316
  try {
@@ -319,6 +339,137 @@ export class NeuroLink {
319
339
  }
320
340
  return await callback();
321
341
  }
342
+ createMetricsTraceContext() {
343
+ return {
344
+ traceId: crypto.randomUUID().replace(/-/g, ""),
345
+ parentSpanId: crypto.randomUUID().replace(/-/g, "").substring(0, 16),
346
+ };
347
+ }
348
+ enforceSessionBudget(maxBudgetUsd) {
349
+ if (maxBudgetUsd === undefined ||
350
+ maxBudgetUsd <= 0 ||
351
+ this._sessionCostUsd < maxBudgetUsd) {
352
+ return;
353
+ }
354
+ throw new NeuroLinkError({
355
+ code: "SESSION_BUDGET_EXCEEDED",
356
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${maxBudgetUsd.toFixed(4)} limit`,
357
+ category: ErrorCategory.VALIDATION,
358
+ severity: ErrorSeverity.HIGH,
359
+ retriable: false,
360
+ context: {
361
+ spent: this._sessionCostUsd,
362
+ limit: maxBudgetUsd,
363
+ },
364
+ });
365
+ }
366
+ assertInputText(text, message) {
367
+ if (!text || typeof text !== "string") {
368
+ throw new Error(message);
369
+ }
370
+ }
371
+ async applyAuthenticatedRequestContext(options) {
372
+ if (options.auth?.token) {
373
+ const { AuthError } = await import("./auth/errors.js");
374
+ await this.ensureAuthProvider();
375
+ if (!this.authProvider) {
376
+ throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
377
+ }
378
+ let authResult;
379
+ try {
380
+ authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
381
+ }
382
+ catch (error) {
383
+ if (error instanceof Error &&
384
+ "feature" in error &&
385
+ error.feature === "Auth") {
386
+ throw error;
387
+ }
388
+ throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${error instanceof Error ? error.message : String(error)}`);
389
+ }
390
+ if (!authResult.valid) {
391
+ throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
392
+ }
393
+ if (!authResult.user) {
394
+ throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
395
+ }
396
+ if (!authResult.user.id) {
397
+ throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
398
+ }
399
+ options.context = {
400
+ ...(options.context || {}),
401
+ userId: authResult.user.id,
402
+ userEmail: authResult.user.email,
403
+ userRoles: authResult.user.roles,
404
+ };
405
+ }
406
+ if (!options.requestContext) {
407
+ return;
408
+ }
409
+ const tokenDerivedFields = options.auth?.token && this.authProvider
410
+ ? {
411
+ userId: options.context?.userId,
412
+ userEmail: options.context?.userEmail,
413
+ userRoles: options.context?.userRoles,
414
+ }
415
+ : {};
416
+ options.context = {
417
+ ...(options.context || {}),
418
+ ...options.requestContext,
419
+ ...tokenDerivedFields,
420
+ };
421
+ }
422
+ applyGenerateLifecycleMiddleware(options) {
423
+ if (!options.onFinish && !options.onError) {
424
+ return;
425
+ }
426
+ options.middleware = {
427
+ ...options.middleware,
428
+ middlewareConfig: {
429
+ ...options.middleware?.middlewareConfig,
430
+ lifecycle: {
431
+ ...options.middleware?.middlewareConfig?.lifecycle,
432
+ enabled: true,
433
+ config: {
434
+ ...options.middleware?.middlewareConfig?.lifecycle?.config,
435
+ ...(options.onFinish !== undefined
436
+ ? { onFinish: options.onFinish }
437
+ : {}),
438
+ ...(options.onError !== undefined
439
+ ? { onError: options.onError }
440
+ : {}),
441
+ },
442
+ },
443
+ },
444
+ };
445
+ }
446
+ applyStreamLifecycleMiddleware(options) {
447
+ if (!options.onFinish && !options.onError && !options.onChunk) {
448
+ return;
449
+ }
450
+ options.middleware = {
451
+ ...options.middleware,
452
+ middlewareConfig: {
453
+ ...options.middleware?.middlewareConfig,
454
+ lifecycle: {
455
+ ...options.middleware?.middlewareConfig?.lifecycle,
456
+ enabled: true,
457
+ config: {
458
+ ...options.middleware?.middlewareConfig?.lifecycle?.config,
459
+ ...(options.onFinish !== undefined
460
+ ? { onFinish: options.onFinish }
461
+ : {}),
462
+ ...(options.onError !== undefined
463
+ ? { onError: options.onError }
464
+ : {}),
465
+ ...(options.onChunk !== undefined
466
+ ? { onChunk: options.onChunk }
467
+ : {}),
468
+ },
469
+ },
470
+ },
471
+ };
472
+ }
322
473
  initializeMemoryConfig() {
323
474
  const memory = this.conversationMemoryConfig?.conversationMemory?.memory;
324
475
  if (!memory?.enabled) {
@@ -424,7 +575,9 @@ export class NeuroLink {
424
575
  logger.setEventEmitter(this.emitter);
425
576
  // Read tool cache duration from environment variables, with a default
426
577
  const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
427
- this.toolCacheDuration = cacheDurationEnv ? parseInt(cacheDurationEnv, 10) : 20000;
578
+ this.toolCacheDuration = cacheDurationEnv
579
+ ? parseInt(cacheDurationEnv, 10)
580
+ : 20000;
428
581
  const constructorStartTime = Date.now();
429
582
  const constructorHrTimeStart = process.hrtime.bigint();
430
583
  const constructorId = `neurolink-constructor-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
@@ -779,7 +932,9 @@ export class NeuroLink {
779
932
  // memory manager supports getSessionRaw.
780
933
  const memConfig = this.conversationMemoryConfig?.conversationMemory;
781
934
  const hasRedisConfig = !!memConfig?.redisConfig ||
782
- (memConfig && "redis" in memConfig && !!memConfig.redis) ||
935
+ (memConfig &&
936
+ "redis" in memConfig &&
937
+ !!memConfig.redis) ||
783
938
  process.env.STORAGE_TYPE === "redis";
784
939
  if (!memConfig?.enabled || !hasRedisConfig) {
785
940
  logger.debug("[NeuroLink] Skipping memory retrieval tools — requires Redis conversation memory");
@@ -810,8 +965,13 @@ export class NeuroLink {
810
965
  messages: [],
811
966
  });
812
967
  // Check if the tool itself reported an error
813
- const hasError = result && typeof result === "object" && "error" in result && !("messages" in result);
814
- const errorMsg = hasError ? result.error : undefined;
968
+ const hasError = result &&
969
+ typeof result === "object" &&
970
+ "error" in result &&
971
+ !("messages" in result);
972
+ const errorMsg = hasError
973
+ ? result.error
974
+ : undefined;
815
975
  return {
816
976
  success: !hasError,
817
977
  data: result,
@@ -888,7 +1048,8 @@ Current user's request: ${currentInput}`;
888
1048
  * Respects both the global memory SDK config and per-call overrides.
889
1049
  */
890
1050
  shouldReadMemory(perCallMemory, userId) {
891
- if (!this.conversationMemoryConfig?.conversationMemory?.memory?.enabled || !userId) {
1051
+ if (!this.conversationMemoryConfig?.conversationMemory?.memory?.enabled ||
1052
+ !userId) {
892
1053
  return false;
893
1054
  }
894
1055
  if (perCallMemory?.enabled === false) {
@@ -904,7 +1065,8 @@ Current user's request: ${currentInput}`;
904
1065
  * Respects both the global memory SDK config and per-call overrides.
905
1066
  */
906
1067
  shouldWriteMemory(perCallMemory, userId, content) {
907
- if (!this.conversationMemoryConfig?.conversationMemory?.memory?.enabled || !userId) {
1068
+ if (!this.conversationMemoryConfig?.conversationMemory?.memory?.enabled ||
1069
+ !userId) {
908
1070
  return false;
909
1071
  }
910
1072
  if (!content?.trim()) {
@@ -978,7 +1140,9 @@ Current user's request: ${currentInput}`;
978
1140
  const writeOps = [client.add(userId, content)];
979
1141
  const writableAdditional = (additionalUsers || []).filter((u) => u.write !== false);
980
1142
  for (const user of writableAdditional) {
981
- const addOptions = user.prompt || user.maxWords ? { prompt: user.prompt, maxWords: user.maxWords } : undefined;
1143
+ const addOptions = user.prompt || user.maxWords
1144
+ ? { prompt: user.prompt, maxWords: user.maxWords }
1145
+ : undefined;
982
1146
  writeOps.push(client.add(user.userId, content, addOptions));
983
1147
  }
984
1148
  await Promise.all(writeOps);
@@ -1137,7 +1301,8 @@ Current user's request: ${currentInput}`;
1137
1301
  try {
1138
1302
  const langfuseConfig = this.observabilityConfig?.langfuse;
1139
1303
  // Check if we should use external provider mode - bypass enabled check
1140
- const useExternalProvider = langfuseConfig?.autoDetectExternalProvider === true || langfuseConfig?.useExternalTracerProvider === true;
1304
+ const useExternalProvider = langfuseConfig?.autoDetectExternalProvider === true ||
1305
+ langfuseConfig?.useExternalTracerProvider === true;
1141
1306
  if (langfuseConfig?.enabled || useExternalProvider) {
1142
1307
  logger.debug(`[NeuroLink] 📊 LOG_POINT_C019_LANGFUSE_INIT_START`, {
1143
1308
  logPoint: "C019_LANGFUSE_INIT_START",
@@ -1152,7 +1317,9 @@ Current user's request: ${currentInput}`;
1152
1317
  initializeOpenTelemetry(langfuseConfig);
1153
1318
  const healthStatus = getLangfuseHealthStatus();
1154
1319
  const langfuseInitDurationNs = process.hrtime.bigint() - langfuseInitStartTime;
1155
- if (healthStatus.initialized && healthStatus.hasProcessor && healthStatus.isHealthy) {
1320
+ if (healthStatus.initialized &&
1321
+ healthStatus.hasProcessor &&
1322
+ healthStatus.isHealthy) {
1156
1323
  logger.debug(`[NeuroLink] ✅ LOG_POINT_C020_LANGFUSE_INIT_SUCCESS`, {
1157
1324
  logPoint: "C020_LANGFUSE_INIT_SUCCESS",
1158
1325
  constructorId,
@@ -1428,7 +1595,9 @@ Current user's request: ${currentInput}`;
1428
1595
  }
1429
1596
  catch (configError) {
1430
1597
  mcpLogger.warn("[NeuroLink] MCP configuration loading failed", {
1431
- error: configError instanceof Error ? configError.message : String(configError),
1598
+ error: configError instanceof Error
1599
+ ? configError.message
1600
+ : String(configError),
1432
1601
  });
1433
1602
  }
1434
1603
  }
@@ -1553,7 +1722,9 @@ Current user's request: ${currentInput}`;
1553
1722
  taskType: classification.type,
1554
1723
  routedProvider: route.provider,
1555
1724
  routedModel: route.model,
1556
- reason: error instanceof Error ? error.message : "Ollama service check failed",
1725
+ reason: error instanceof Error
1726
+ ? error.message
1727
+ : "Ollama service check failed",
1557
1728
  orchestrationTime: `${Date.now() - startTime}ms`,
1558
1729
  });
1559
1730
  return {}; // Return empty object to preserve existing fallback behavior
@@ -1689,7 +1860,9 @@ Current user's request: ${currentInput}`;
1689
1860
  taskType: classification.type,
1690
1861
  routedProvider: route.provider,
1691
1862
  routedModel: route.model,
1692
- reason: error instanceof Error ? error.message : "Ollama service check failed",
1863
+ reason: error instanceof Error
1864
+ ? error.message
1865
+ : "Ollama service check failed",
1693
1866
  orchestrationTime: `${Date.now() - startTime}ms`,
1694
1867
  });
1695
1868
  return {}; // Return empty object to preserve existing fallback behavior
@@ -1740,7 +1913,9 @@ Current user's request: ${currentInput}`;
1740
1913
  const anyOptions = optionsOrPrompt;
1741
1914
  if (anyOptions.messages && anyOptions.messages.length > 0) {
1742
1915
  const lastMessage = anyOptions.messages[anyOptions.messages.length - 1];
1743
- return typeof lastMessage.content === "string" ? lastMessage.content : JSON.stringify(lastMessage.content);
1916
+ return typeof lastMessage.content === "string"
1917
+ ? lastMessage.content
1918
+ : JSON.stringify(lastMessage.content);
1744
1919
  }
1745
1920
  // Handle input.text format
1746
1921
  return optionsOrPrompt.input?.text || "";
@@ -1832,7 +2007,8 @@ Current user's request: ${currentInput}`;
1832
2007
  endpoint: otelConfig.endpoint,
1833
2008
  serviceName: otelConfig.serviceName,
1834
2009
  }
1835
- : isOpenTelemetryInitialized() || process.env.OTEL_EXPORTER_OTLP_ENDPOINT
2010
+ : isOpenTelemetryInitialized() ||
2011
+ process.env.OTEL_EXPORTER_OTLP_ENDPOINT
1836
2012
  ? {
1837
2013
  enabled: isOpenTelemetryInitialized(),
1838
2014
  endpoint: process.env.OTEL_EXPORTER_OTLP_ENDPOINT,
@@ -1974,7 +2150,9 @@ Current user's request: ${currentInput}`;
1974
2150
  const result = data.result;
1975
2151
  const usage = result?.usage;
1976
2152
  const analytics = result?.analytics;
1977
- const provider = data.provider || result?.provider || "unknown";
2153
+ const provider = data.provider ||
2154
+ result?.provider ||
2155
+ "unknown";
1978
2156
  const model = result?.model || "unknown";
1979
2157
  const responseTime = data.responseTime || 0;
1980
2158
  const traceCtx = this._metricsTraceContext;
@@ -1993,7 +2171,9 @@ Current user's request: ${currentInput}`;
1993
2171
  span.parentSpanId = undefined;
1994
2172
  }
1995
2173
  // Mark failed generations with ERROR status so metrics count them correctly
1996
- const spanStatus = data.success === false || data.error ? SpanStatus.ERROR : SpanStatus.OK;
2174
+ const spanStatus = data.success === false || data.error
2175
+ ? SpanStatus.ERROR
2176
+ : SpanStatus.OK;
1997
2177
  span = SpanSerializer.endSpan(span, spanStatus, data.error ? String(data.error) : undefined);
1998
2178
  span.durationMs = responseTime;
1999
2179
  if (usage) {
@@ -2029,7 +2209,9 @@ Current user's request: ${currentInput}`;
2029
2209
  const content = result?.content || result?.text;
2030
2210
  if (content) {
2031
2211
  span = SpanSerializer.updateAttributes(span, {
2032
- output: content.length > 5000 ? content.substring(0, 5000) + "...[truncated]" : content,
2212
+ output: content.length > 5000
2213
+ ? content.substring(0, 5000) + "...[truncated]"
2214
+ : content,
2033
2215
  });
2034
2216
  }
2035
2217
  this.metricsAggregator.recordSpan(span);
@@ -2068,14 +2250,18 @@ Current user's request: ${currentInput}`;
2068
2250
  if (data.prompt) {
2069
2251
  const promptStr = String(data.prompt);
2070
2252
  span = SpanSerializer.updateAttributes(span, {
2071
- input: promptStr.length > 5000 ? promptStr.substring(0, 5000) + "...[truncated]" : promptStr,
2253
+ input: promptStr.length > 5000
2254
+ ? promptStr.substring(0, 5000) + "...[truncated]"
2255
+ : promptStr,
2072
2256
  });
2073
2257
  }
2074
2258
  // Record streamed output (truncated for safety)
2075
2259
  const streamContent = data.content;
2076
2260
  if (streamContent) {
2077
2261
  span = SpanSerializer.updateAttributes(span, {
2078
- output: streamContent.length > 5000 ? streamContent.substring(0, 5000) + "...[truncated]" : streamContent,
2262
+ output: streamContent.length > 5000
2263
+ ? streamContent.substring(0, 5000) + "...[truncated]"
2264
+ : streamContent,
2079
2265
  });
2080
2266
  }
2081
2267
  // Enrich stream span with token usage if available
@@ -2092,7 +2278,8 @@ Current user's request: ${currentInput}`;
2092
2278
  const pricing = tokenTracker.getModelPricing(model);
2093
2279
  if (pricing) {
2094
2280
  const inputCost = ((usage.input || 0) / 1_000_000) * pricing.inputPricePerMillion;
2095
- const outputCost = ((usage.output || 0) / 1_000_000) * pricing.outputPricePerMillion;
2281
+ const outputCost = ((usage.output || 0) / 1_000_000) *
2282
+ pricing.outputPricePerMillion;
2096
2283
  const totalCost = inputCost + outputCost;
2097
2284
  if (totalCost > 0) {
2098
2285
  span = SpanSerializer.enrichWithCost(span, {
@@ -2127,7 +2314,8 @@ Current user's request: ${currentInput}`;
2127
2314
  span = SpanSerializer.endSpan(span, success ? SpanStatus.OK : SpanStatus.ERROR);
2128
2315
  span.durationMs = responseTime;
2129
2316
  if (!success && data.error) {
2130
- span.statusMessage = data.error.message || String(data.error);
2317
+ span.statusMessage =
2318
+ data.error.message || String(data.error);
2131
2319
  }
2132
2320
  if (data.result) {
2133
2321
  try {
@@ -2279,398 +2467,313 @@ Current user's request: ${currentInput}`;
2279
2467
  * @since 1.0.0
2280
2468
  */
2281
2469
  async generate(optionsOrPrompt) {
2282
- return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, async (generateSpan) => {
2283
- // Set metrics trace context for parent-child span linking.
2284
- // The generation span will be the root (no parentSpanId).
2285
- // Tool spans will be children of the root span via rootSpanId.
2286
- const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
2287
- const metricsRootSpanId = crypto.randomUUID().replace(/-/g, "").substring(0, 16);
2288
- // Scope trace context to this request via AsyncLocalStorage
2289
- // so concurrent generate/stream calls don't race.
2290
- return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsRootSpanId }, async () => {
2291
- try {
2292
- const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
2293
- // Convert string prompt to full options
2294
- // Shallow-copy caller's object to avoid mutating their original reference
2295
- const options = typeof optionsOrPrompt === "string" ? { input: { text: optionsOrPrompt } } : { ...optionsOrPrompt };
2296
- // NL-004: Resolve model aliases/deprecations before processing
2297
- options.model = resolveModel(options.model, this.modelAliasConfig);
2298
- // MCP Enhancement: propagate disableToolCache to tool execution
2299
- this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
2300
- // Set span attributes for observability
2301
- generateSpan.setAttribute("neurolink.provider", options.provider || "default");
2302
- generateSpan.setAttribute("neurolink.model", options.model || "default");
2303
- generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string" ? optionsOrPrompt.length : options.input?.text?.length || 0);
2304
- generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
2305
- // Validate prompt
2306
- if (!options.input?.text || typeof options.input.text !== "string") {
2307
- throw new Error("Input text is required and must be a non-empty string");
2308
- }
2309
- // Check budget limit before making API call
2310
- if (options.maxBudgetUsd !== undefined &&
2311
- options.maxBudgetUsd > 0 &&
2312
- this._sessionCostUsd >= options.maxBudgetUsd) {
2313
- throw new NeuroLinkError({
2314
- code: "SESSION_BUDGET_EXCEEDED",
2315
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
2316
- category: ErrorCategory.VALIDATION,
2317
- severity: ErrorSeverity.HIGH,
2318
- retriable: false,
2319
- context: {
2320
- spent: this._sessionCostUsd,
2321
- limit: options.maxBudgetUsd,
2322
- },
2323
- });
2324
- }
2325
- // Auto-inject lifecycle middleware when callbacks are provided
2326
- // (must happen before workflow/PPT early returns so those paths get middleware too)
2327
- if (options.onFinish || options.onError) {
2328
- options.middleware = {
2329
- ...options.middleware,
2330
- middlewareConfig: {
2331
- ...options.middleware?.middlewareConfig,
2332
- lifecycle: {
2333
- ...options.middleware?.middlewareConfig?.lifecycle,
2334
- enabled: true,
2335
- config: {
2336
- ...options.middleware?.middlewareConfig?.lifecycle?.config,
2337
- ...(options.onFinish !== undefined ? { onFinish: options.onFinish } : {}),
2338
- ...(options.onError !== undefined ? { onError: options.onError } : {}),
2339
- },
2340
- },
2341
- },
2342
- };
2343
- }
2344
- // Handle per-call auth token validation
2345
- if (options.auth?.token) {
2346
- const { AuthError } = await import("./auth/errors.js");
2347
- await this.ensureAuthProvider();
2348
- if (!this.authProvider) {
2349
- throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
2350
- }
2351
- let authResult;
2352
- try {
2353
- authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
2354
- }
2355
- catch (err) {
2356
- // Rethrow auth errors as-is; wrap anything else
2357
- if (err instanceof Error && "feature" in err && err.feature === "Auth") {
2358
- throw err;
2359
- }
2360
- throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${err instanceof Error ? err.message : String(err)}`);
2361
- }
2362
- if (!authResult.valid) {
2363
- throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
2364
- }
2365
- // Fail closed: token valid but no user identity is a provider bug
2366
- if (!authResult.user) {
2367
- throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
2368
- }
2369
- if (!authResult.user.id) {
2370
- throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
2371
- }
2372
- // Merge validated user into context
2373
- options.context = {
2374
- ...(options.context || {}),
2375
- userId: authResult.user.id,
2376
- userEmail: authResult.user.email,
2377
- userRoles: authResult.user.roles,
2378
- };
2379
- }
2380
- // Handle pre-validated requestContext
2381
- if (options.requestContext) {
2382
- // When auth token was validated, token-derived identity fields
2383
- // MUST take precedence over requestContext to prevent privilege escalation.
2384
- const tokenDerivedFields = options.auth?.token && this.authProvider
2385
- ? {
2386
- userId: options.context?.userId,
2387
- userEmail: options.context?.userEmail,
2388
- userRoles: options.context?.userRoles,
2389
- }
2390
- : {};
2391
- options.context = {
2392
- ...(options.context || {}),
2393
- ...options.requestContext,
2394
- ...tokenDerivedFields,
2395
- };
2396
- }
2397
- // Check if workflow is requested
2398
- if (options.workflow || options.workflowConfig) {
2399
- return await this.generateWithWorkflow(options);
2400
- }
2401
- // Check if PPT output mode is requested
2402
- if (options.output?.mode === "ppt") {
2403
- const pptResult = await this.generateWithPPT(options);
2404
- generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
2405
- if (pptResult.analytics) {
2406
- generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
2407
- generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
2408
- generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
2409
- }
2410
- generateSpan.setStatus({ code: SpanStatusCode.OK });
2411
- return pptResult;
2412
- }
2413
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
2414
- return await this.setLangfuseContextFromOptions(options, async () => {
2415
- const startTime = Date.now();
2416
- // Apply orchestration if enabled and no specific provider/model requested
2417
- if (this.enableOrchestration && !options.provider && !options.model) {
2418
- try {
2419
- const orchestratedOptions = await this.applyOrchestration(options);
2420
- logger.debug("Orchestration applied", {
2421
- originalProvider: options.provider || "auto",
2422
- orchestratedProvider: orchestratedOptions.provider,
2423
- orchestratedModel: orchestratedOptions.model,
2424
- prompt: options.input.text.substring(0, 100),
2425
- });
2426
- // Use orchestrated options
2427
- Object.assign(options, orchestratedOptions);
2428
- // Re-resolve model alias in case orchestration returned an alias
2429
- if (orchestratedOptions.model) {
2430
- options.model = resolveModel(options.model, this.modelAliasConfig);
2431
- }
2432
- }
2433
- catch (error) {
2434
- logger.warn("Orchestration failed, continuing with original options", {
2435
- error: error instanceof Error ? error.message : String(error),
2436
- originalProvider: options.provider || "auto",
2437
- });
2438
- // Continue with original options if orchestration fails
2439
- }
2440
- }
2441
- // Emit generation start event (NeuroLink format - keep existing)
2442
- this.emitter.emit("generation:start", {
2443
- provider: options.provider || "auto",
2444
- timestamp: startTime,
2445
- });
2446
- // ADD: Bedrock-compatible response:start event
2447
- this.emitter.emit("response:start");
2448
- // ADD: Bedrock-compatible message event
2449
- this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
2450
- // Process factory configuration
2451
- const factoryResult = processFactoryOptions(options);
2452
- // Validate factory configuration if present
2453
- if (factoryResult.hasFactoryConfig && options.factoryConfig) {
2454
- const validation = validateFactoryConfig(options.factoryConfig);
2455
- if (!validation.isValid) {
2456
- logger.warn("Invalid factory configuration detected", {
2457
- errors: validation.errors,
2458
- });
2459
- // Continue with warning rather than throwing - graceful degradation
2460
- }
2461
- }
2462
- // RAG Integration: If rag config is provided, prepare the RAG search tool
2463
- if (options.rag?.files?.length) {
2464
- try {
2465
- const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2466
- const ragResult = await prepareRAGTool(options.rag, options.provider);
2467
- // Inject the RAG tool into the tools record
2468
- if (!options.tools) {
2469
- options.tools = {};
2470
- }
2471
- options.tools[ragResult.toolName] = ragResult.tool;
2472
- // Inject RAG-aware system prompt so the AI uses the RAG tool first
2473
- const ragSystemInstruction = [
2474
- `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2475
- `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2476
- `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2477
- `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2478
- `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2479
- ].join(" ");
2480
- options.systemPrompt = (options.systemPrompt || "") + ragSystemInstruction;
2481
- logger.info("[RAG] Tool injected into generate()", {
2482
- toolName: ragResult.toolName,
2483
- filesLoaded: ragResult.filesLoaded,
2484
- chunksIndexed: ragResult.chunksIndexed,
2485
- });
2486
- }
2487
- catch (error) {
2488
- logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2489
- error: error instanceof Error ? error.message : String(error),
2490
- });
2491
- }
2492
- }
2493
- // Memory retrieval for generate path
2494
- if (this.shouldReadMemory(options.memory, options.context?.userId) && options.context?.userId) {
2495
- try {
2496
- options.input.text = await this.retrieveMemory(options.input.text, options.context.userId, options.memory?.additionalUsers);
2497
- logger.debug("Memory retrieval successful (generate)");
2498
- }
2499
- catch (error) {
2500
- logger.warn("Memory retrieval failed (generate):", error);
2501
- }
2502
- }
2503
- // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
2504
- const baseOptions = {
2505
- prompt: options.input.text,
2506
- provider: options.provider,
2507
- model: options.model,
2508
- temperature: options.temperature,
2509
- maxTokens: options.maxTokens,
2510
- systemPrompt: options.systemPrompt,
2511
- schema: options.schema,
2512
- output: options.output,
2513
- tools: options.tools, // Includes RAG tools if rag config was provided
2514
- disableTools: options.disableTools,
2515
- toolFilter: options.toolFilter,
2516
- excludeTools: options.excludeTools,
2517
- maxSteps: options.maxSteps,
2518
- toolChoice: options.toolChoice,
2519
- prepareStep: options.prepareStep,
2520
- enableAnalytics: options.enableAnalytics,
2521
- enableEvaluation: options.enableEvaluation,
2522
- context: options.context,
2523
- evaluationDomain: options.evaluationDomain,
2524
- toolUsageContext: options.toolUsageContext,
2525
- input: options.input, // This includes text, images, and content arrays
2526
- region: options.region,
2527
- tts: options.tts,
2528
- fileRegistry: this.fileRegistry,
2529
- abortSignal: options.abortSignal,
2530
- skipToolPromptInjection: options.skipToolPromptInjection,
2531
- middleware: options.middleware,
2532
- // Pass through conversation messages for task continuation and external callers
2533
- conversationMessages: options.conversationMessages,
2534
- };
2535
- // Auto-map top-level sessionId/userId to context for convenience
2536
- // Tests and users may pass sessionId/userId as top-level options
2537
- const extraContext = options;
2538
- if (extraContext.sessionId || extraContext.userId) {
2539
- baseOptions.context = {
2540
- ...baseOptions.context,
2541
- ...(extraContext.sessionId && !baseOptions.context?.sessionId
2542
- ? { sessionId: extraContext.sessionId }
2543
- : {}),
2544
- ...(extraContext.userId && !baseOptions.context?.userId
2545
- ? { userId: extraContext.userId }
2546
- : {}),
2547
- };
2548
- }
2549
- // Apply factory enhancement using centralized utilities
2550
- const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
2551
- // Pass conversation memory config if available
2552
- if (this.conversationMemory) {
2553
- textOptions.conversationMemoryConfig = this.conversationMemory.config;
2554
- // Include original prompt for context summarization
2555
- textOptions.originalPrompt = originalPrompt;
2556
- }
2557
- // Detect and execute domain-specific tools
2558
- const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
2559
- // Update prompt with tool results if available
2560
- if (enhancedPrompt !== textOptions.prompt) {
2561
- textOptions.prompt = enhancedPrompt;
2562
- logger.debug("Enhanced prompt with tool results", {
2563
- originalLength: options.input.text.length,
2564
- enhancedLength: enhancedPrompt.length,
2565
- toolResults: toolResults.length,
2566
- });
2567
- }
2568
- const textResult = await this.generateTextInternal(textOptions);
2569
- // Emit generation completion event (NeuroLink format - enhanced with content)
2570
- this.emitter.emit("generation:end", {
2571
- provider: textResult.provider,
2572
- responseTime: Date.now() - startTime,
2573
- toolsUsed: textResult.toolsUsed,
2574
- timestamp: Date.now(),
2575
- result: textResult, // Enhanced: include full result
2576
- prompt: options.input?.text || options.prompt,
2577
- temperature: textOptions.temperature,
2578
- maxTokens: textOptions.maxTokens,
2579
- });
2580
- // ADD: Bedrock-compatible response:end event with content
2581
- this.emitter.emit("response:end", textResult.content || "");
2582
- // ADD: Bedrock-compatible message event
2583
- this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
2584
- // Convert back to GenerateResult
2585
- const generateResult = {
2586
- content: textResult.content,
2587
- finishReason: textResult.finishReason,
2588
- provider: textResult.provider,
2589
- model: textResult.model,
2590
- usage: textResult.usage
2591
- ? {
2592
- input: textResult.usage.input || 0,
2593
- output: textResult.usage.output || 0,
2594
- total: textResult.usage.total || 0,
2595
- }
2596
- : undefined,
2597
- responseTime: textResult.responseTime,
2598
- toolsUsed: textResult.toolsUsed,
2599
- toolExecutions: transformToolExecutions(textResult.toolExecutions),
2600
- enhancedWithTools: textResult.enhancedWithTools,
2601
- availableTools: transformAvailableTools(textResult.availableTools),
2602
- analytics: textResult.analytics,
2603
- // CRITICAL FIX: Include imageOutput for image generation models
2604
- imageOutput: textResult.imageOutput,
2605
- evaluation: textResult.evaluation
2606
- ? {
2607
- ...textResult.evaluation,
2608
- isOffTopic: textResult.evaluation.isOffTopic ?? false,
2609
- alertSeverity: textResult.evaluation.alertSeverity ?? "none",
2610
- reasoning: textResult.evaluation.reasoning ?? "No evaluation provided",
2611
- evaluationModel: textResult.evaluation.evaluationModel ?? "unknown",
2612
- evaluationTime: textResult.evaluation.evaluationTime ?? Date.now(),
2613
- evaluationDomain: textResult.evaluation.evaluationDomain ??
2614
- textOptions.evaluationDomain ??
2615
- factoryResult.domainType,
2616
- }
2617
- : undefined,
2618
- audio: textResult.audio,
2619
- video: textResult.video,
2620
- ppt: textResult.ppt,
2621
- // NL-007: Copy retry metadata from MCP generation path
2622
- ...(textResult.retries && { retries: textResult.retries }),
2623
- };
2624
- // Accumulate session cost for budget tracking
2625
- if (generateResult.analytics?.cost && generateResult.analytics.cost > 0) {
2626
- this._sessionCostUsd += generateResult.analytics.cost;
2627
- }
2628
- this.scheduleGenerateMemoryStorage(options, originalPrompt, generateResult);
2629
- // Set completion span attributes
2630
- generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2631
- generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2632
- generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2633
- generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2634
- generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2635
- generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2636
- // NL-007: Expose retry count in OTel span
2637
- generateSpan.setAttribute("generate.retry_count", generateResult.retries?.count || 0);
2638
- generateSpan.setStatus({ code: SpanStatusCode.OK });
2639
- return generateResult;
2640
- });
2470
+ return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan));
2471
+ }
2472
+ async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
2473
+ return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
2474
+ }
2475
+ async executeGenerateRequest(optionsOrPrompt, generateSpan) {
2476
+ try {
2477
+ const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
2478
+ const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
2479
+ if (earlyResult) {
2480
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2481
+ return earlyResult;
2482
+ }
2483
+ const result = await this.setLangfuseContextFromOptions(options, () => this.runStandardGenerateRequest(options, originalPrompt, generateSpan));
2484
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2485
+ return result;
2486
+ }
2487
+ catch (error) {
2488
+ generateSpan.setStatus({
2489
+ code: SpanStatusCode.ERROR,
2490
+ message: error instanceof Error ? error.message : String(error),
2491
+ });
2492
+ this.emitGenerateErrorEvent(optionsOrPrompt, error);
2493
+ throw error;
2494
+ }
2495
+ finally {
2496
+ this._disableToolCacheForCurrentRequest = false;
2497
+ generateSpan.end();
2498
+ }
2499
+ }
2500
+ async prepareGenerateRequest(optionsOrPrompt, generateSpan) {
2501
+ const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
2502
+ const options = typeof optionsOrPrompt === "string"
2503
+ ? { input: { text: optionsOrPrompt } }
2504
+ : { ...optionsOrPrompt };
2505
+ options.model = resolveModel(options.model, this.modelAliasConfig);
2506
+ this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
2507
+ generateSpan.setAttribute("neurolink.provider", options.provider || "default");
2508
+ generateSpan.setAttribute("neurolink.model", options.model || "default");
2509
+ generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
2510
+ ? optionsOrPrompt.length
2511
+ : options.input?.text?.length || 0);
2512
+ generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
2513
+ this.assertInputText(options.input?.text, "Input text is required and must be a non-empty string");
2514
+ this.enforceSessionBudget(options.maxBudgetUsd);
2515
+ this.applyGenerateLifecycleMiddleware(options);
2516
+ await this.applyAuthenticatedRequestContext(options);
2517
+ return { options, originalPrompt };
2518
+ }
2519
+ async maybeHandleEarlyGenerateResult(options, generateSpan) {
2520
+ if (options.workflow || options.workflowConfig) {
2521
+ return this.generateWithWorkflow(options);
2522
+ }
2523
+ if (options.output?.mode !== "ppt") {
2524
+ return null;
2525
+ }
2526
+ const pptResult = await this.generateWithPPT(options);
2527
+ generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
2528
+ if (pptResult.analytics) {
2529
+ generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
2530
+ generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
2531
+ generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
2532
+ }
2533
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2534
+ return pptResult;
2535
+ }
2536
+ async runStandardGenerateRequest(options, originalPrompt, generateSpan) {
2537
+ const startTime = Date.now();
2538
+ await this.maybeApplyGenerateOrchestration(options);
2539
+ this.emitter.emit("generation:start", {
2540
+ provider: options.provider || "auto",
2541
+ timestamp: startTime,
2542
+ });
2543
+ this.emitter.emit("response:start");
2544
+ this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
2545
+ const factoryResult = processFactoryOptions(options);
2546
+ if (factoryResult.hasFactoryConfig && options.factoryConfig) {
2547
+ const validation = validateFactoryConfig(options.factoryConfig);
2548
+ if (!validation.isValid) {
2549
+ logger.warn("Invalid factory configuration detected", {
2550
+ errors: validation.errors,
2551
+ });
2552
+ }
2553
+ }
2554
+ await this.prepareGenerateAugmentations(options);
2555
+ const textOptions = await this.buildGenerateTextOptions(options, originalPrompt, factoryResult);
2556
+ const textResult = await this.generateTextInternal(textOptions);
2557
+ return this.finalizeGenerateRequestResult({
2558
+ generateSpan,
2559
+ options,
2560
+ textOptions,
2561
+ textResult,
2562
+ factoryResult,
2563
+ originalPrompt,
2564
+ startTime,
2565
+ });
2566
+ }
2567
+ async maybeApplyGenerateOrchestration(options) {
2568
+ if (!this.enableOrchestration || options.provider || options.model) {
2569
+ return;
2570
+ }
2571
+ try {
2572
+ const orchestratedOptions = await this.applyOrchestration(options);
2573
+ logger.debug("Orchestration applied", {
2574
+ originalProvider: options.provider || "auto",
2575
+ orchestratedProvider: orchestratedOptions.provider,
2576
+ orchestratedModel: orchestratedOptions.model,
2577
+ prompt: options.input.text.substring(0, 100),
2578
+ });
2579
+ Object.assign(options, orchestratedOptions);
2580
+ if (orchestratedOptions.model) {
2581
+ options.model = resolveModel(options.model, this.modelAliasConfig);
2582
+ }
2583
+ }
2584
+ catch (error) {
2585
+ logger.warn("Orchestration failed, continuing with original options", {
2586
+ error: error instanceof Error ? error.message : String(error),
2587
+ originalProvider: options.provider || "auto",
2588
+ });
2589
+ }
2590
+ }
2591
+ async prepareGenerateAugmentations(options) {
2592
+ if (options.rag?.files?.length) {
2593
+ try {
2594
+ const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2595
+ const ragResult = await prepareRAGTool(options.rag, options.provider);
2596
+ if (!options.tools) {
2597
+ options.tools = {};
2641
2598
  }
2642
- catch (error) {
2643
- generateSpan.setStatus({
2644
- code: SpanStatusCode.ERROR,
2645
- message: error instanceof Error ? error.message : String(error),
2646
- });
2647
- // Emit generation:end on error so metrics listeners still record the failure.
2648
- // Note: variables declared inside try blocks are not accessible in error
2649
- // handlers, so we extract what we can from the original input.
2650
- const errProvider = typeof optionsOrPrompt === "object"
2651
- ? optionsOrPrompt.provider || "unknown"
2652
- : "unknown";
2653
- const errModel = typeof optionsOrPrompt === "object" ? optionsOrPrompt.model || "unknown" : "unknown";
2654
- try {
2655
- this.emitter.emit("generation:end", {
2656
- provider: errProvider,
2657
- model: errModel,
2658
- responseTime: 0,
2659
- error: error instanceof Error ? error.message : String(error),
2660
- success: false,
2661
- });
2662
- }
2663
- catch (emitError) {
2664
- void emitError; // non-blocking — error event emission is best-effort
2665
- }
2666
- throw error;
2599
+ options.tools[ragResult.toolName] =
2600
+ ragResult.tool;
2601
+ options.systemPrompt =
2602
+ (options.systemPrompt || "") +
2603
+ [
2604
+ `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2605
+ `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2606
+ `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2607
+ `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2608
+ `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2609
+ ].join(" ");
2610
+ logger.info("[RAG] Tool injected into generate()", {
2611
+ toolName: ragResult.toolName,
2612
+ filesLoaded: ragResult.filesLoaded,
2613
+ chunksIndexed: ragResult.chunksIndexed,
2614
+ });
2615
+ }
2616
+ catch (error) {
2617
+ logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2618
+ error: error instanceof Error ? error.message : String(error),
2619
+ });
2620
+ }
2621
+ }
2622
+ if (!this.shouldReadMemory(options.memory, options.context?.userId) ||
2623
+ !options.context?.userId) {
2624
+ return;
2625
+ }
2626
+ try {
2627
+ options.input.text = await this.retrieveMemory(options.input.text, options.context.userId, options.memory?.additionalUsers);
2628
+ logger.debug("Memory retrieval successful (generate)");
2629
+ }
2630
+ catch (error) {
2631
+ logger.warn("Memory retrieval failed (generate):", error);
2632
+ }
2633
+ }
2634
+ async buildGenerateTextOptions(options, originalPrompt, factoryResult) {
2635
+ const baseOptions = {
2636
+ prompt: options.input.text,
2637
+ provider: options.provider,
2638
+ model: options.model,
2639
+ temperature: options.temperature,
2640
+ maxTokens: options.maxTokens,
2641
+ systemPrompt: options.systemPrompt,
2642
+ schema: options.schema,
2643
+ output: options.output,
2644
+ tools: options.tools,
2645
+ disableTools: options.disableTools,
2646
+ toolFilter: options.toolFilter,
2647
+ excludeTools: options.excludeTools,
2648
+ maxSteps: options.maxSteps,
2649
+ toolChoice: options.toolChoice,
2650
+ prepareStep: options.prepareStep,
2651
+ enableAnalytics: options.enableAnalytics,
2652
+ enableEvaluation: options.enableEvaluation,
2653
+ context: options.context,
2654
+ evaluationDomain: options.evaluationDomain,
2655
+ toolUsageContext: options.toolUsageContext,
2656
+ input: options.input,
2657
+ region: options.region,
2658
+ tts: options.tts,
2659
+ fileRegistry: this.fileRegistry,
2660
+ abortSignal: options.abortSignal,
2661
+ skipToolPromptInjection: options.skipToolPromptInjection,
2662
+ middleware: options.middleware,
2663
+ conversationMessages: options.conversationMessages,
2664
+ };
2665
+ const extraContext = options;
2666
+ if (extraContext.sessionId || extraContext.userId) {
2667
+ baseOptions.context = {
2668
+ ...baseOptions.context,
2669
+ ...(extraContext.sessionId && !baseOptions.context?.sessionId
2670
+ ? { sessionId: extraContext.sessionId }
2671
+ : {}),
2672
+ ...(extraContext.userId && !baseOptions.context?.userId
2673
+ ? { userId: extraContext.userId }
2674
+ : {}),
2675
+ };
2676
+ }
2677
+ const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
2678
+ if (this.conversationMemory) {
2679
+ textOptions.conversationMemoryConfig = this.conversationMemory.config;
2680
+ textOptions.originalPrompt = originalPrompt;
2681
+ }
2682
+ const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
2683
+ if (enhancedPrompt !== textOptions.prompt) {
2684
+ textOptions.prompt = enhancedPrompt;
2685
+ logger.debug("Enhanced prompt with tool results", {
2686
+ originalLength: options.input.text.length,
2687
+ enhancedLength: enhancedPrompt.length,
2688
+ toolResults: toolResults.length,
2689
+ });
2690
+ }
2691
+ return textOptions;
2692
+ }
2693
+ finalizeGenerateRequestResult(params) {
2694
+ const { generateSpan, options, textOptions, textResult, factoryResult, originalPrompt, startTime, } = params;
2695
+ this.emitter.emit("generation:end", {
2696
+ provider: textResult.provider,
2697
+ responseTime: Date.now() - startTime,
2698
+ toolsUsed: textResult.toolsUsed,
2699
+ timestamp: Date.now(),
2700
+ result: textResult,
2701
+ prompt: options.input?.text || options.prompt,
2702
+ temperature: textOptions.temperature,
2703
+ maxTokens: textOptions.maxTokens,
2704
+ });
2705
+ this.emitter.emit("response:end", textResult.content || "");
2706
+ this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
2707
+ const generateResult = {
2708
+ content: textResult.content,
2709
+ finishReason: textResult.finishReason,
2710
+ provider: textResult.provider,
2711
+ model: textResult.model,
2712
+ usage: textResult.usage
2713
+ ? {
2714
+ input: textResult.usage.input || 0,
2715
+ output: textResult.usage.output || 0,
2716
+ total: textResult.usage.total || 0,
2667
2717
  }
2668
- finally {
2669
- this._disableToolCacheForCurrentRequest = false;
2670
- generateSpan.end();
2718
+ : undefined,
2719
+ responseTime: textResult.responseTime,
2720
+ toolsUsed: textResult.toolsUsed,
2721
+ toolExecutions: transformToolExecutions(textResult.toolExecutions),
2722
+ enhancedWithTools: textResult.enhancedWithTools,
2723
+ availableTools: transformAvailableTools(textResult.availableTools),
2724
+ analytics: textResult.analytics,
2725
+ imageOutput: textResult.imageOutput,
2726
+ evaluation: textResult.evaluation
2727
+ ? {
2728
+ ...textResult.evaluation,
2729
+ isOffTopic: textResult.evaluation.isOffTopic ?? false,
2730
+ alertSeverity: textResult.evaluation.alertSeverity ?? "none",
2731
+ reasoning: textResult.evaluation.reasoning ?? "No evaluation provided",
2732
+ evaluationModel: textResult.evaluation.evaluationModel ?? "unknown",
2733
+ evaluationTime: textResult.evaluation.evaluationTime ?? Date.now(),
2734
+ evaluationDomain: textResult.evaluation.evaluationDomain ??
2735
+ textOptions.evaluationDomain ??
2736
+ factoryResult.domainType,
2671
2737
  }
2672
- }); // end metricsTraceContextStorage.run
2673
- });
2738
+ : undefined,
2739
+ audio: textResult.audio,
2740
+ video: textResult.video,
2741
+ ppt: textResult.ppt,
2742
+ ...(textResult.retries && { retries: textResult.retries }),
2743
+ };
2744
+ if (generateResult.analytics?.cost && generateResult.analytics.cost > 0) {
2745
+ this._sessionCostUsd += generateResult.analytics.cost;
2746
+ }
2747
+ this.scheduleGenerateMemoryStorage(options, originalPrompt, generateResult);
2748
+ generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2749
+ generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2750
+ generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2751
+ generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2752
+ generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2753
+ generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2754
+ generateSpan.setAttribute("generate.retry_count", generateResult.retries?.count || 0);
2755
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2756
+ return generateResult;
2757
+ }
2758
+ emitGenerateErrorEvent(optionsOrPrompt, error) {
2759
+ const errProvider = typeof optionsOrPrompt === "object"
2760
+ ? optionsOrPrompt.provider || "unknown"
2761
+ : "unknown";
2762
+ const errModel = typeof optionsOrPrompt === "object"
2763
+ ? optionsOrPrompt.model || "unknown"
2764
+ : "unknown";
2765
+ try {
2766
+ this.emitter.emit("generation:end", {
2767
+ provider: errProvider,
2768
+ model: errModel,
2769
+ responseTime: 0,
2770
+ error: error instanceof Error ? error.message : String(error),
2771
+ success: false,
2772
+ });
2773
+ }
2774
+ catch (emitError) {
2775
+ void emitError;
2776
+ }
2674
2777
  }
2675
2778
  /**
2676
2779
  * Schedule non-blocking memory storage after generate completes.
@@ -2751,8 +2854,11 @@ Current user's request: ${currentInput}`;
2751
2854
  ?.filter((m) => m.role === "user" || m.role === "assistant")
2752
2855
  .map((m) => ({
2753
2856
  role: m.role,
2754
- content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
2755
- })) ?? options.conversationHistory,
2857
+ content: typeof m.content === "string"
2858
+ ? m.content
2859
+ : JSON.stringify(m.content),
2860
+ })) ??
2861
+ options.conversationHistory,
2756
2862
  timeout: options.timeout,
2757
2863
  verbose: false,
2758
2864
  metadata: options.context,
@@ -2762,8 +2868,10 @@ Current user's request: ${currentInput}`;
2762
2868
  // Primary output (backward compatible) - use the original best response
2763
2869
  content: workflowResult.content,
2764
2870
  // Provider info from selected response
2765
- provider: workflowResult.selectedResponse?.provider || workflowConfig.models[0]?.provider,
2766
- model: workflowResult.selectedResponse?.model || workflowConfig.models[0]?.model,
2871
+ provider: workflowResult.selectedResponse?.provider ||
2872
+ workflowConfig.models[0]?.provider,
2873
+ model: workflowResult.selectedResponse?.model ||
2874
+ workflowConfig.models[0]?.model,
2767
2875
  // Basic usage info
2768
2876
  usage: workflowResult.usage
2769
2877
  ? {
@@ -2845,8 +2953,11 @@ Current user's request: ${currentInput}`;
2845
2953
  ?.filter((m) => m.role === "user" || m.role === "assistant")
2846
2954
  .map((m) => ({
2847
2955
  role: m.role,
2848
- content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
2849
- })) ?? options.conversationHistory,
2956
+ content: typeof m.content === "string"
2957
+ ? m.content
2958
+ : JSON.stringify(m.content),
2959
+ })) ??
2960
+ options.conversationHistory,
2850
2961
  timeout: options.timeout,
2851
2962
  verbose: false,
2852
2963
  metadata: options.context,
@@ -2970,7 +3081,9 @@ Current user's request: ${currentInput}`;
2970
3081
  */
2971
3082
  async generateText(options) {
2972
3083
  // Validate required parameters for backward compatibility
2973
- if (!options.prompt || typeof options.prompt !== "string" || options.prompt.trim() === "") {
3084
+ if (!options.prompt ||
3085
+ typeof options.prompt !== "string" ||
3086
+ options.prompt.trim() === "") {
2974
3087
  throw new Error("GenerateText options must include prompt as a non-empty string");
2975
3088
  }
2976
3089
  // NL-004: Resolve model aliases/deprecations before processing
@@ -2989,239 +3102,247 @@ Current user's request: ${currentInput}`;
2989
3102
  * 5. Store conversation turn for future context
2990
3103
  */
2991
3104
  async generateTextInternal(options) {
2992
- return tracers.sdk.startActiveSpan("neurolink.generateTextInternal", { kind: SpanKind.INTERNAL }, async (internalSpan) => {
2993
- try {
2994
- const generateInternalId = `generate-internal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2995
- const existingRequestId = options.context?.requestId;
2996
- const requestId = typeof existingRequestId === "string" && existingRequestId
2997
- ? existingRequestId
2998
- : `req-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2999
- options.context = { ...options.context, requestId };
3000
- const generateInternalStartTime = Date.now();
3001
- const generateInternalHrTimeStart = process.hrtime.bigint();
3002
- const functionTag = "NeuroLink.generateTextInternal";
3003
- // Set span attributes for internal generation
3004
- internalSpan.setAttribute("neurolink.request_id", requestId);
3005
- internalSpan.setAttribute("neurolink.has_conversation_memory", !!this.conversationMemory);
3006
- internalSpan.setAttribute("neurolink.provider", options.provider || "auto");
3007
- internalSpan.setAttribute("neurolink.model", options.model || "default");
3008
- this.logGenerateTextInternalStart(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, options, functionTag);
3009
- this.emitGenerationStartEvents(options);
3010
- try {
3011
- await this.initializeConversationMemoryForGeneration(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart);
3012
- const mcpResult = await this.attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
3013
- if (mcpResult) {
3014
- logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS (MCP path)`, {
3015
- provider: mcpResult.provider,
3016
- model: mcpResult.model,
3017
- responseTimeMs: Date.now() - generateInternalStartTime,
3018
- tokensUsed: mcpResult.usage?.total || 0,
3019
- toolsUsed: mcpResult.toolsUsed?.length || 0,
3020
- ...(mcpResult.usage?.cacheCreationTokens !== undefined && {
3021
- cacheCreationTokens: mcpResult.usage.cacheCreationTokens,
3022
- }),
3023
- ...(mcpResult.usage?.cacheReadTokens !== undefined && {
3024
- cacheReadTokens: mcpResult.usage.cacheReadTokens,
3025
- }),
3026
- ...(mcpResult.usage?.cacheSavingsPercent !== undefined && {
3027
- cacheSavingsPercent: mcpResult.usage.cacheSavingsPercent,
3028
- }),
3029
- });
3030
- {
3031
- const memStoreStart = Date.now();
3032
- try {
3033
- await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
3034
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.OK);
3035
- }
3036
- catch (memErr) {
3037
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
3038
- }
3039
- }
3040
- this.emitter.emit("response:end", mcpResult.content || "");
3041
- internalSpan.setAttribute("neurolink.path", "mcp");
3042
- internalSpan.setAttribute("neurolink.tokens.input", mcpResult.usage?.input || 0);
3043
- internalSpan.setAttribute("neurolink.tokens.output", mcpResult.usage?.output || 0);
3044
- internalSpan.setAttribute("neurolink.result_provider", mcpResult.provider || "unknown");
3045
- internalSpan.setStatus({ code: SpanStatusCode.OK });
3046
- return mcpResult;
3047
- }
3048
- if (options.abortSignal?.aborted) {
3049
- throw new DOMException("The operation was aborted", "AbortError");
3050
- }
3051
- // Save original messages for smart overflow recovery (Solution 6)
3052
- // directProviderGeneration may compact messages; if provider still rejects,
3053
- // the catch block needs the originals for a more effective retry
3054
- if (this.conversationMemory) {
3055
- const originalMessages = await getConversationMessages(this.conversationMemory, options);
3056
- options._originalConversationMessages = originalMessages ? [...originalMessages] : undefined;
3057
- }
3058
- const directResult = await this.directProviderGeneration(options);
3059
- logger.debug(`[${functionTag}] Direct generation successful`);
3060
- logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS`, {
3061
- provider: directResult.provider,
3062
- model: directResult.model,
3063
- responseTimeMs: Date.now() - generateInternalStartTime,
3064
- tokensUsed: directResult.usage?.total || 0,
3065
- toolsUsed: directResult.toolsUsed?.length || 0,
3066
- ...(directResult.usage?.cacheCreationTokens !== undefined && {
3067
- cacheCreationTokens: directResult.usage.cacheCreationTokens,
3068
- }),
3069
- ...(directResult.usage?.cacheReadTokens !== undefined && {
3070
- cacheReadTokens: directResult.usage.cacheReadTokens,
3071
- }),
3072
- ...(directResult.usage?.cacheSavingsPercent !== undefined && {
3073
- cacheSavingsPercent: directResult.usage.cacheSavingsPercent,
3074
- }),
3075
- });
3076
- {
3077
- const memStoreStart = Date.now();
3078
- try {
3079
- await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
3080
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.OK);
3081
- }
3082
- catch (memErr) {
3083
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
3084
- }
3085
- }
3086
- this.emitter.emit("response:end", directResult.content || "");
3087
- this.emitter.emit("message", `Text generation completed successfully`);
3088
- internalSpan.setAttribute("neurolink.path", "direct");
3089
- internalSpan.setAttribute("neurolink.tokens.input", directResult.usage?.input || 0);
3090
- internalSpan.setAttribute("neurolink.tokens.output", directResult.usage?.output || 0);
3091
- internalSpan.setAttribute("neurolink.result_provider", directResult.provider || "unknown");
3092
- internalSpan.setStatus({ code: SpanStatusCode.OK });
3093
- return directResult;
3094
- }
3095
- catch (error) {
3096
- // Check if this is a context overflow error - attempt recovery
3097
- if (isContextOverflowError(error) && this.conversationMemory) {
3098
- logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
3099
- error: error instanceof Error ? error.message : String(error),
3100
- overflowProvider: getContextOverflowProvider(error),
3101
- });
3102
- try {
3103
- // IMPROVEMENT 1: Extract actual token count from provider error if available
3104
- const actualOverflow = parseProviderOverflowDetails(error);
3105
- // IMPROVEMENT 2: Use ORIGINAL messages (not already-compacted ones)
3106
- const originalMessages = options._originalConversationMessages ?? (await getConversationMessages(this.conversationMemory, options));
3107
- // IMPROVEMENT 3: Calculate precise reduction target
3108
- const recoveryBudget = checkContextBudget({
3109
- provider: options.provider || "openai",
3110
- model: options.model,
3111
- maxTokens: options.maxTokens,
3112
- currentPrompt: options.prompt,
3113
- systemPrompt: options.systemPrompt,
3114
- });
3115
- // Use provider's reported token count if available (more accurate than our estimate)
3116
- const actualTokens = actualOverflow?.actualTokens ?? recoveryBudget.estimatedInputTokens;
3117
- const budgetTokens = actualOverflow?.budgetTokens ?? recoveryBudget.availableInputTokens;
3118
- // Target = 70% of budget (aggressive safety margin for recovery)
3119
- const compactionTarget = Math.floor(budgetTokens * 0.7);
3120
- // IMPROVEMENT 4: Calculate adaptive truncation fraction from actual numbers
3121
- const requiredReduction = actualTokens > 0 ? (actualTokens - compactionTarget) / actualTokens : 0.5;
3122
- const compactor = new ContextCompactor({
3123
- enableSummarize: false, // Skip LLM call for recovery (speed)
3124
- enablePrune: true,
3125
- enableDeduplicate: true,
3126
- enableTruncate: true,
3127
- truncationFraction: Math.min(0.9, requiredReduction + 0.15),
3128
- });
3129
- const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3130
- if (compactionResult.compacted) {
3131
- const repairedResult = repairToolPairs(compactionResult.messages);
3132
- // IMPROVEMENT 5: Verify BEFORE retrying
3133
- const verifyBudget = checkContextBudget({
3134
- provider: options.provider || "openai",
3135
- model: options.model,
3136
- maxTokens: options.maxTokens,
3137
- systemPrompt: options.systemPrompt,
3138
- currentPrompt: options.prompt,
3139
- conversationMessages: repairedResult.messages,
3140
- });
3141
- if (!verifyBudget.withinBudget) {
3142
- logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
3143
- estimatedTokens: verifyBudget.estimatedInputTokens,
3144
- availableTokens: verifyBudget.availableInputTokens,
3145
- });
3146
- throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
3147
- `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
3148
- `but budget is ${budgetTokens} tokens.`, {
3149
- estimatedTokens: compactionResult.tokensAfter,
3150
- availableTokens: budgetTokens,
3151
- stagesUsed: compactionResult.stagesUsed,
3152
- breakdown: verifyBudget.breakdown,
3153
- });
3154
- }
3155
- logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
3156
- tokensSaved: compactionResult.tokensSaved,
3157
- compactionTarget,
3158
- verifiedTokens: verifyBudget.estimatedInputTokens,
3159
- verifiedBudget: verifyBudget.availableInputTokens,
3160
- });
3161
- // Single verified retry
3162
- return await this.directProviderGeneration({
3163
- ...options,
3164
- conversationMessages: repairedResult.messages,
3165
- });
3166
- }
3167
- }
3168
- catch (retryError) {
3169
- // If the retry error is our own ContextBudgetExceededError, re-throw it
3170
- if (retryError instanceof ContextBudgetExceededError) {
3171
- throw retryError;
3172
- }
3173
- logger.error(`[${functionTag}] Recovery attempt failed`, {
3174
- error: retryError instanceof Error ? retryError.message : String(retryError),
3175
- });
3176
- }
3177
- }
3178
- // If the generation was aborted (e.g., coding task short-circuit via AbortController),
3179
- // still store the conversation turn so that:
3180
- // 1. The Redis conversation entry is created (if first turn)
3181
- // 2. setImmediate triggers generateConversationTitle() for the session
3182
- // 3. The caller's syncTitleFromRedis() can find the SDK-generated title
3183
- if (isAbortError(error)) {
3184
- logger.info(`[${functionTag}] Generation aborted — storing conversation turn for title generation`, {
3185
- hasMemory: !!this.conversationMemory,
3186
- memoryType: this.conversationMemory?.constructor?.name || "NONE",
3187
- sessionId: options.context?.sessionId || "unknown",
3188
- });
3189
- try {
3190
- const abortedResult = {
3191
- content: "[generation was interrupted]",
3192
- provider: options.provider || "unknown",
3193
- model: options.model || "unknown",
3194
- responseTime: Date.now() - generateInternalStartTime,
3195
- };
3196
- await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(generateInternalStartTime), requestId), 5000);
3197
- }
3198
- catch (storeError) {
3199
- logger.warn(`[${functionTag}] Failed to store conversation turn after abort`, {
3200
- error: storeError instanceof Error ? storeError.message : String(storeError),
3201
- });
3202
- }
3203
- }
3204
- else {
3205
- logger.error(`[${functionTag}] All generation methods failed`, {
3206
- error: error instanceof Error ? error.message : String(error),
3207
- });
3208
- }
3209
- this.emitter.emit("response:end", "");
3210
- this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
3211
- throw error;
3212
- }
3213
- }
3214
- catch (spanError) {
3215
- internalSpan.setStatus({
3216
- code: SpanStatusCode.ERROR,
3217
- message: spanError instanceof Error ? spanError.message : String(spanError),
3105
+ return tracers.sdk.startActiveSpan("neurolink.generateTextInternal", { kind: SpanKind.INTERNAL }, (internalSpan) => this.executeGenerateTextInternalWithSpan(options, internalSpan));
3106
+ }
3107
+ async executeGenerateTextInternalWithSpan(options, internalSpan) {
3108
+ try {
3109
+ const context = this.initializeGenerateTextInternalContext(options);
3110
+ internalSpan.setAttribute("neurolink.request_id", context.requestId);
3111
+ internalSpan.setAttribute("neurolink.has_conversation_memory", !!this.conversationMemory);
3112
+ internalSpan.setAttribute("neurolink.provider", options.provider || "auto");
3113
+ internalSpan.setAttribute("neurolink.model", options.model || "default");
3114
+ this.logGenerateTextInternalStart(context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart, options, context.functionTag);
3115
+ this.emitGenerationStartEvents(options);
3116
+ return await this.runGenerateTextInternalFlow(options, internalSpan, context);
3117
+ }
3118
+ catch (error) {
3119
+ internalSpan.setStatus({
3120
+ code: SpanStatusCode.ERROR,
3121
+ message: error instanceof Error ? error.message : String(error),
3122
+ });
3123
+ throw error;
3124
+ }
3125
+ finally {
3126
+ internalSpan.end();
3127
+ }
3128
+ }
3129
+ initializeGenerateTextInternalContext(options) {
3130
+ const generateInternalId = `generate-internal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3131
+ const existingRequestId = options.context?.requestId;
3132
+ const requestId = typeof existingRequestId === "string" && existingRequestId
3133
+ ? existingRequestId
3134
+ : `req-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3135
+ options.context = { ...options.context, requestId };
3136
+ return {
3137
+ generateInternalId,
3138
+ generateInternalStartTime: Date.now(),
3139
+ generateInternalHrTimeStart: process.hrtime.bigint(),
3140
+ functionTag: "NeuroLink.generateTextInternal",
3141
+ requestId,
3142
+ };
3143
+ }
3144
+ async runGenerateTextInternalFlow(options, internalSpan, context) {
3145
+ try {
3146
+ await this.initializeConversationMemoryForGeneration(context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart);
3147
+ const mcpResult = await this.attemptMCPGeneration(options, context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart, context.functionTag);
3148
+ if (mcpResult) {
3149
+ return this.finalizeGenerateTextInternalResult({
3150
+ path: "mcp",
3151
+ result: mcpResult,
3152
+ options,
3153
+ internalSpan,
3154
+ requestId: context.requestId,
3155
+ startTime: context.generateInternalStartTime,
3218
3156
  });
3219
- throw spanError;
3220
3157
  }
3221
- finally {
3222
- internalSpan.end();
3158
+ if (options.abortSignal?.aborted) {
3159
+ throw new DOMException("The operation was aborted", "AbortError");
3160
+ }
3161
+ await this.captureOriginalConversationMessagesForRecovery(options);
3162
+ const directResult = await this.directProviderGeneration(options);
3163
+ logger.debug(`[${context.functionTag}] Direct generation successful`);
3164
+ return this.finalizeGenerateTextInternalResult({
3165
+ path: "direct",
3166
+ result: directResult,
3167
+ options,
3168
+ internalSpan,
3169
+ requestId: context.requestId,
3170
+ startTime: context.generateInternalStartTime,
3171
+ });
3172
+ }
3173
+ catch (error) {
3174
+ const recoveredResult = await this.handleGenerateTextInternalFailure(options, context, error);
3175
+ if (recoveredResult) {
3176
+ return recoveredResult;
3223
3177
  }
3178
+ throw error;
3179
+ }
3180
+ }
3181
+ async captureOriginalConversationMessagesForRecovery(options) {
3182
+ if (!this.conversationMemory) {
3183
+ return;
3184
+ }
3185
+ const originalMessages = await getConversationMessages(this.conversationMemory, options);
3186
+ options._originalConversationMessages = originalMessages
3187
+ ? [...originalMessages]
3188
+ : undefined;
3189
+ }
3190
+ async finalizeGenerateTextInternalResult(params) {
3191
+ const { path, result, options, internalSpan, requestId, startTime } = params;
3192
+ logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS${path === "mcp" ? " (MCP path)" : ""}`, {
3193
+ provider: result.provider,
3194
+ model: result.model,
3195
+ responseTimeMs: Date.now() - startTime,
3196
+ tokensUsed: result.usage?.total || 0,
3197
+ toolsUsed: result.toolsUsed?.length || 0,
3198
+ ...(result.usage?.cacheCreationTokens !== undefined && {
3199
+ cacheCreationTokens: result.usage.cacheCreationTokens,
3200
+ }),
3201
+ ...(result.usage?.cacheReadTokens !== undefined && {
3202
+ cacheReadTokens: result.usage.cacheReadTokens,
3203
+ }),
3204
+ ...(result.usage?.cacheSavingsPercent !== undefined && {
3205
+ cacheSavingsPercent: result.usage.cacheSavingsPercent,
3206
+ }),
3224
3207
  });
3208
+ const memStoreStart = Date.now();
3209
+ try {
3210
+ await storeConversationTurn(this.conversationMemory, options, result, new Date(startTime), requestId);
3211
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": path }, Date.now() - memStoreStart, SpanStatus.OK);
3212
+ }
3213
+ catch (memoryError) {
3214
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": path }, Date.now() - memStoreStart, SpanStatus.ERROR, memoryError instanceof Error
3215
+ ? memoryError.message
3216
+ : String(memoryError));
3217
+ }
3218
+ this.emitter.emit("response:end", result.content || "");
3219
+ if (path === "direct") {
3220
+ this.emitter.emit("message", "Text generation completed successfully");
3221
+ }
3222
+ internalSpan.setAttribute("neurolink.path", path);
3223
+ internalSpan.setAttribute("neurolink.tokens.input", result.usage?.input || 0);
3224
+ internalSpan.setAttribute("neurolink.tokens.output", result.usage?.output || 0);
3225
+ internalSpan.setAttribute("neurolink.result_provider", result.provider || "unknown");
3226
+ internalSpan.setStatus({ code: SpanStatusCode.OK });
3227
+ return result;
3228
+ }
3229
+ async handleGenerateTextInternalFailure(options, context, error) {
3230
+ const recoveredResult = await this.tryRecoverGenerateTextOverflow(options, context.functionTag, error);
3231
+ if (recoveredResult) {
3232
+ return recoveredResult;
3233
+ }
3234
+ if (isAbortError(error)) {
3235
+ logger.info(`[${context.functionTag}] Generation aborted — storing conversation turn for title generation`, {
3236
+ hasMemory: !!this.conversationMemory,
3237
+ memoryType: this.conversationMemory?.constructor?.name || "NONE",
3238
+ sessionId: options.context?.sessionId ||
3239
+ "unknown",
3240
+ });
3241
+ try {
3242
+ const abortedResult = {
3243
+ content: "[generation was interrupted]",
3244
+ provider: options.provider || "unknown",
3245
+ model: options.model || "unknown",
3246
+ responseTime: Date.now() - context.generateInternalStartTime,
3247
+ };
3248
+ await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(context.generateInternalStartTime), context.requestId), 5000);
3249
+ }
3250
+ catch (storeError) {
3251
+ logger.warn(`[${context.functionTag}] Failed to store conversation turn after abort`, {
3252
+ error: storeError instanceof Error
3253
+ ? storeError.message
3254
+ : String(storeError),
3255
+ });
3256
+ }
3257
+ }
3258
+ else {
3259
+ logger.error(`[${context.functionTag}] All generation methods failed`, {
3260
+ error: error instanceof Error ? error.message : String(error),
3261
+ });
3262
+ }
3263
+ this.emitter.emit("response:end", "");
3264
+ this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
3265
+ return null;
3266
+ }
3267
+ async tryRecoverGenerateTextOverflow(options, functionTag, error) {
3268
+ if (!isContextOverflowError(error) || !this.conversationMemory) {
3269
+ return null;
3270
+ }
3271
+ logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
3272
+ error: error instanceof Error ? error.message : String(error),
3273
+ overflowProvider: getContextOverflowProvider(error),
3274
+ });
3275
+ try {
3276
+ const actualOverflow = parseProviderOverflowDetails(error);
3277
+ const originalMessages = options._originalConversationMessages ??
3278
+ (await getConversationMessages(this.conversationMemory, options));
3279
+ const recoveryBudget = checkContextBudget({
3280
+ provider: options.provider || "openai",
3281
+ model: options.model,
3282
+ maxTokens: options.maxTokens,
3283
+ currentPrompt: options.prompt,
3284
+ systemPrompt: options.systemPrompt,
3285
+ });
3286
+ const actualTokens = actualOverflow?.actualTokens ?? recoveryBudget.estimatedInputTokens;
3287
+ const budgetTokens = actualOverflow?.budgetTokens ?? recoveryBudget.availableInputTokens;
3288
+ const compactionTarget = Math.floor(budgetTokens * 0.7);
3289
+ const requiredReduction = actualTokens > 0
3290
+ ? (actualTokens - compactionTarget) / actualTokens
3291
+ : 0.5;
3292
+ const compactor = new ContextCompactor({
3293
+ enableSummarize: false,
3294
+ enablePrune: true,
3295
+ enableDeduplicate: true,
3296
+ enableTruncate: true,
3297
+ truncationFraction: Math.min(0.9, requiredReduction + 0.15),
3298
+ });
3299
+ const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3300
+ if (!compactionResult.compacted) {
3301
+ return null;
3302
+ }
3303
+ const repairedResult = repairToolPairs(compactionResult.messages);
3304
+ const verifyBudget = checkContextBudget({
3305
+ provider: options.provider || "openai",
3306
+ model: options.model,
3307
+ maxTokens: options.maxTokens,
3308
+ systemPrompt: options.systemPrompt,
3309
+ currentPrompt: options.prompt,
3310
+ conversationMessages: repairedResult.messages,
3311
+ });
3312
+ if (!verifyBudget.withinBudget) {
3313
+ logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
3314
+ estimatedTokens: verifyBudget.estimatedInputTokens,
3315
+ availableTokens: verifyBudget.availableInputTokens,
3316
+ });
3317
+ throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
3318
+ `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
3319
+ `but budget is ${budgetTokens} tokens.`, {
3320
+ estimatedTokens: compactionResult.tokensAfter,
3321
+ availableTokens: budgetTokens,
3322
+ stagesUsed: compactionResult.stagesUsed,
3323
+ breakdown: verifyBudget.breakdown,
3324
+ });
3325
+ }
3326
+ logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
3327
+ tokensSaved: compactionResult.tokensSaved,
3328
+ compactionTarget,
3329
+ verifiedTokens: verifyBudget.estimatedInputTokens,
3330
+ verifiedBudget: verifyBudget.availableInputTokens,
3331
+ });
3332
+ return this.directProviderGeneration({
3333
+ ...options,
3334
+ conversationMessages: repairedResult.messages,
3335
+ });
3336
+ }
3337
+ catch (retryError) {
3338
+ if (retryError instanceof ContextBudgetExceededError) {
3339
+ throw retryError;
3340
+ }
3341
+ logger.error(`[${functionTag}] Recovery attempt failed`, {
3342
+ error: retryError instanceof Error ? retryError.message : String(retryError),
3343
+ });
3344
+ return null;
3345
+ }
3225
3346
  }
3226
3347
  /**
3227
3348
  * Log generateTextInternal start with comprehensive analysis
@@ -3294,7 +3415,8 @@ Current user's request: ${currentInput}`;
3294
3415
  * Attempt MCP generation with retry logic
3295
3416
  */
3296
3417
  async attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag) {
3297
- if (!options.disableTools && !(options.tts?.enabled && !options.tts?.useAiResponse)) {
3418
+ if (!options.disableTools &&
3419
+ !(options.tts?.enabled && !options.tts?.useAiResponse)) {
3298
3420
  return await this.performMCPGenerationRetries(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
3299
3421
  }
3300
3422
  return null;
@@ -3316,7 +3438,9 @@ Current user's request: ${currentInput}`;
3316
3438
  try {
3317
3439
  logger.debug(`[${functionTag}] Attempting MCP generation (attempt ${attempt}/${maxAttempts})...`);
3318
3440
  const mcpResult = await this.tryMCPGeneration(options);
3319
- if (mcpResult && (mcpResult.content || (mcpResult.toolExecutions && mcpResult.toolExecutions.length > 0))) {
3441
+ if (mcpResult &&
3442
+ (mcpResult.content ||
3443
+ (mcpResult.toolExecutions && mcpResult.toolExecutions.length > 0))) {
3320
3444
  logger.debug(`[${functionTag}] MCP generation successful on attempt ${attempt}`, {
3321
3445
  contentLength: mcpResult.content?.length || 0,
3322
3446
  toolsUsed: mcpResult.toolsUsed?.length || 0,
@@ -3347,7 +3471,11 @@ Current user's request: ${currentInput}`;
3347
3471
  // NL-007: Record retry error for observability
3348
3472
  retryCount++;
3349
3473
  const errMsg = error instanceof Error ? error.message : String(error);
3350
- const errCode = error instanceof NeuroLinkError ? error.code : error instanceof Error ? error.name : "UNKNOWN";
3474
+ const errCode = error instanceof NeuroLinkError
3475
+ ? error.code
3476
+ : error instanceof Error
3477
+ ? error.name
3478
+ : "UNKNOWN";
3351
3479
  retryErrors.push({ code: errCode, message: errMsg.substring(0, 500) });
3352
3480
  logger.debug(`[${functionTag}] MCP generation failed on attempt ${attempt}/${maxAttempts}`, {
3353
3481
  error: errMsg,
@@ -3366,8 +3494,11 @@ Current user's request: ${currentInput}`;
3366
3494
  const isNonRetryable = isContextOverflowError(error) ||
3367
3495
  isToolError ||
3368
3496
  isNonRetryableProviderError(error) ||
3369
- (error instanceof Error && error.isRetryable === false) ||
3370
- (error instanceof Error && error.statusCode === 400);
3497
+ (error instanceof Error &&
3498
+ error.isRetryable ===
3499
+ false) ||
3500
+ (error instanceof Error &&
3501
+ error.statusCode === 400);
3371
3502
  if (isNonRetryable) {
3372
3503
  logger.debug(`[${functionTag}] Non-retryable error detected, skipping remaining retries`);
3373
3504
  break;
@@ -3403,288 +3534,28 @@ Current user's request: ${currentInput}`;
3403
3534
  throw new DOMException("The operation was aborted", "AbortError");
3404
3535
  }
3405
3536
  // 🚀 EXHAUSTIVE LOGGING POINT T001: TRY MCP GENERATION ENTRY
3406
- const requestId = options.context?.requestId || "unknown";
3537
+ const requestId = options.context?.requestId ||
3538
+ "unknown";
3407
3539
  const tryMCPId = `try-mcp-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3408
3540
  const tryMCPStartTime = Date.now();
3409
3541
  const tryMCPHrTimeStart = process.hrtime.bigint();
3410
3542
  const functionTag = "NeuroLink.tryMCPGeneration";
3411
3543
  try {
3412
- // Initialize MCP if needed
3413
- await this.initializeMCP();
3414
- if (!this.mcpInitialized) {
3415
- logger.warn(`[NeuroLink] ⚠️ LOG_POINT_T004_MCP_NOT_AVAILABLE`, {
3416
- logPoint: "T004_MCP_NOT_AVAILABLE",
3417
- tryMCPId,
3418
- timestamp: new Date().toISOString(),
3419
- elapsedMs: Date.now() - tryMCPStartTime,
3420
- elapsedNs: (process.hrtime.bigint() - tryMCPHrTimeStart).toString(),
3421
- mcpInitialized: this.mcpInitialized,
3422
- mcpComponents: {
3423
- hasExternalServerManager: !!this.externalServerManager,
3424
- hasToolRegistry: !!this.toolRegistry,
3425
- hasProviderRegistry: !!AIProviderFactory,
3426
- },
3427
- fallbackReason: "MCP_NOT_INITIALIZED",
3428
- message: "MCP not available - returning null for fallback to direct generation",
3429
- });
3430
- return null; // Skip MCP if not available
3431
- }
3432
- // Context creation removed - was never used
3433
- // Determine provider
3434
- const providerName = options.provider === "auto" || !options.provider ? await getBestProvider() : options.provider;
3435
- // Get available tools
3436
- let availableTools = await this.getAllAvailableTools();
3437
- // NL-001: Filter out tools with OPEN circuit breakers
3438
- const { tools: circuitBreakerFilteredTools, unavailableTools } = this.toolRegistry.getAvailableTools(this.toolCircuitBreakers);
3439
- // Intersect: keep only tools that pass both getAllAvailableTools and circuit breaker filtering
3440
- const cbFilteredNames = new Set(circuitBreakerFilteredTools.map((t) => t.name));
3441
- availableTools = availableTools.filter((t) => cbFilteredNames.has(t.name));
3442
- // Apply per-call tool filtering for system prompt tool descriptions
3443
- availableTools = this.applyToolInfoFiltering(availableTools, options);
3444
- const targetTool = availableTools.find((t) => t.name.includes("SuccessRateSRByTime") || t.name.includes("juspay-analytics"));
3445
- logger.debug("Available tools for AI prompt generation", {
3446
- toolsCount: availableTools.length,
3447
- toolNames: availableTools.map((t) => t.name),
3448
- unavailableToolsCount: unavailableTools.length,
3449
- unavailableTools: unavailableTools,
3450
- hasTargetTool: !!targetTool,
3451
- targetToolDetails: targetTool
3452
- ? {
3453
- name: targetTool.name,
3454
- description: targetTool.description,
3455
- server: targetTool.server,
3456
- }
3457
- : null,
3458
- });
3459
- // NL-001: Inject system note about unavailable tools
3460
- let circuitBreakerNote = "";
3461
- if (unavailableTools.length > 0) {
3462
- circuitBreakerNote = `\n\nNOTE: The following tools are temporarily unavailable due to repeated failures: ${unavailableTools.join(", ")}. Do not attempt to call these tools.`;
3463
- }
3464
- // Create tool-aware system prompt (skip if skipToolPromptInjection is true)
3465
- const enhancedSystemPrompt = options.skipToolPromptInjection
3466
- ? (options.systemPrompt || "") + circuitBreakerNote
3467
- : this.createToolAwareSystemPrompt(options.systemPrompt, availableTools) + circuitBreakerNote;
3468
- logger.debug("Tool-aware system prompt created", {
3469
- requestId,
3470
- originalPromptLength: options.systemPrompt?.length || 0,
3471
- enhancedPromptLength: enhancedSystemPrompt.length,
3472
- skippedToolInjection: !!options.skipToolPromptInjection,
3473
- enhancedPromptPreview: enhancedSystemPrompt.substring(0, 80) + "...",
3474
- });
3475
- logger.debug("[Observability] System prompt metadata", {
3476
- requestId,
3477
- systemPromptLength: enhancedSystemPrompt.length,
3478
- systemPromptHash: enhancedSystemPrompt.length > 0 ? `sha256:${enhancedSystemPrompt.slice(0, 8)}...` : "empty",
3479
- hasCustomSystemPrompt: !!options.systemPrompt,
3480
- });
3481
- // Get conversation messages for context
3482
- let conversationMessages = await getConversationMessages(this.conversationMemory, options);
3483
- if (logger.shouldLog("debug")) {
3484
- try {
3485
- logger.debug("[Observability] Conversation history summary", {
3486
- requestId,
3487
- messageCount: conversationMessages?.length || 0,
3488
- messages: conversationMessages?.map((msg, i) => {
3489
- let contentLength;
3490
- if (typeof msg.content === "string") {
3491
- contentLength = msg.content.length;
3492
- }
3493
- else {
3494
- try {
3495
- contentLength = JSON.stringify(msg.content).length;
3496
- }
3497
- catch {
3498
- contentLength = 0;
3499
- }
3500
- }
3501
- return {
3502
- index: i,
3503
- role: msg.role,
3504
- contentLength,
3505
- contentPreview: typeof msg.content === "string" ? msg.content.substring(0, 200) : "[multimodal]",
3506
- };
3507
- }),
3508
- });
3509
- }
3510
- catch {
3511
- // Ignore serialization errors in debug logging
3512
- }
3513
- }
3514
- logger.debug("[Observability] Available tools for LLM", {
3515
- requestId,
3516
- toolCount: availableTools?.length || 0,
3517
- toolNames: availableTools?.map((t) => t.name) || [],
3518
- });
3519
- // Pre-generation budget check
3520
- const budgetResult = checkContextBudget({
3521
- provider: providerName,
3522
- model: options.model,
3523
- maxTokens: options.maxTokens,
3524
- systemPrompt: enhancedSystemPrompt,
3525
- conversationMessages: conversationMessages,
3526
- currentPrompt: options.prompt,
3527
- toolDefinitions: availableTools,
3528
- });
3529
- logger.info("[TokenBudget] Token breakdown", {
3530
- requestId,
3531
- system: budgetResult.breakdown?.systemPrompt || 0,
3532
- history: budgetResult.breakdown?.conversationHistory || 0,
3533
- tools: budgetResult.breakdown?.toolDefinitions || 0,
3534
- currentPrompt: budgetResult.breakdown?.currentPrompt || 0,
3535
- files: budgetResult.breakdown?.fileAttachments || 0,
3536
- total: budgetResult.estimatedInputTokens,
3537
- budget: budgetResult.availableInputTokens,
3538
- usagePercent: Math.round(budgetResult.usageRatio * 1000) / 10,
3539
- conversationMessageCount: conversationMessages?.length || 0,
3540
- shouldCompact: budgetResult.shouldCompact,
3541
- });
3542
- const messageCount = conversationMessages?.length || 0;
3543
- const compactionSessionId = this.getCompactionSessionId(options);
3544
- if (budgetResult.shouldCompact &&
3545
- this.conversationMemory &&
3546
- messageCount > (this.lastCompactionMessageCount.get(compactionSessionId) ?? 0)) {
3547
- logger.info("[NeuroLink] Context budget exceeded, triggering auto-compaction", {
3548
- usageRatio: budgetResult.usageRatio,
3549
- estimatedTokens: budgetResult.estimatedInputTokens,
3550
- availableTokens: budgetResult.availableInputTokens,
3551
- });
3552
- const compactor = new ContextCompactor({
3553
- provider: providerName,
3554
- summarizationProvider: this.conversationMemoryConfig?.conversationMemory?.summarizationProvider,
3555
- summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
3556
- });
3557
- const compactionResult = await compactor.compact(conversationMessages, budgetResult.availableInputTokens, this.conversationMemoryConfig?.conversationMemory, requestId);
3558
- if (compactionResult.compacted) {
3559
- const repairedResult = repairToolPairs(compactionResult.messages);
3560
- conversationMessages = repairedResult.messages;
3561
- this.lastCompactionMessageCount.set(compactionSessionId, conversationMessages.length);
3562
- logger.info("[NeuroLink] Context compacted successfully", {
3563
- stagesUsed: compactionResult.stagesUsed,
3564
- tokensSaved: compactionResult.tokensSaved,
3565
- });
3566
- }
3567
- // POST-COMPACTION BUDGET RE-CHECK (BUG-003 fix)
3568
- const postCompactBudget = checkContextBudget({
3569
- provider: providerName,
3570
- model: options.model,
3571
- maxTokens: options.maxTokens,
3572
- systemPrompt: enhancedSystemPrompt,
3573
- conversationMessages: conversationMessages,
3574
- currentPrompt: options.prompt,
3575
- toolDefinitions: availableTools,
3576
- });
3577
- if (!postCompactBudget.withinBudget) {
3578
- const overageRatio = postCompactBudget.usageRatio - 1.0;
3579
- logger.warn("[NeuroLink] Post-compaction still over budget, attempting emergency content truncation", {
3580
- requestId,
3581
- estimatedTokens: postCompactBudget.estimatedInputTokens,
3582
- availableTokens: postCompactBudget.availableInputTokens,
3583
- overagePercent: Math.round(overageRatio * 100),
3584
- stagesUsedInCompaction: compactionResult.stagesUsed,
3585
- });
3586
- // Emergency: truncate the content of the longest messages
3587
- conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
3588
- // Final check after emergency truncation
3589
- const finalBudget = checkContextBudget({
3590
- provider: providerName,
3591
- model: options.model,
3592
- maxTokens: options.maxTokens,
3593
- systemPrompt: enhancedSystemPrompt,
3594
- conversationMessages: conversationMessages,
3595
- currentPrompt: options.prompt,
3596
- toolDefinitions: availableTools,
3597
- });
3598
- if (!finalBudget.withinBudget) {
3599
- throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3600
- `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3601
- `Budget: ${finalBudget.availableInputTokens} tokens. ` +
3602
- `Conversation is too large to fit in the model's context window.`, {
3603
- estimatedTokens: finalBudget.estimatedInputTokens,
3604
- availableTokens: finalBudget.availableInputTokens,
3605
- stagesUsed: compactionResult.stagesUsed,
3606
- breakdown: finalBudget.breakdown,
3607
- });
3608
- }
3609
- }
3544
+ const generationContext = await this.prepareMCPGenerationContext(options, requestId, tryMCPId, tryMCPStartTime, tryMCPHrTimeStart);
3545
+ if (!generationContext) {
3546
+ return null;
3610
3547
  }
3611
- // Create provider and generate (with confidence that context fits)
3612
- const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
3613
- this, // Pass SDK instance
3614
- options.region);
3615
- // Propagate trace context for parent-child span hierarchy
3616
- provider.setTraceContext(this._metricsTraceContext);
3617
- // ADD: Emit connection events for all providers (Bedrock-compatible)
3618
- this.emitter.emit("connected");
3619
- this.emitter.emit("message", `${providerName} provider initialized successfully`);
3620
- // Enable tool execution for the provider using BaseProvider method
3621
- provider.setupToolExecutor({
3622
- customTools: this.getCustomTools(),
3623
- executeTool: (toolName, params) => this.executeTool(toolName, params, {
3624
- disableToolCache: options.disableToolCache,
3625
- }),
3626
- }, functionTag);
3627
- logger.debug("[Observability] User input to LLM", {
3548
+ const conversationMessages = await this.ensureMCPGenerationBudget(options, requestId, generationContext.providerName, generationContext.enhancedSystemPrompt, generationContext.availableTools, generationContext.conversationMessages);
3549
+ return this.generateWithMCPProvider({
3550
+ options,
3628
3551
  requestId,
3629
- promptPreview: options.prompt?.substring(0, 200),
3630
- promptLength: options.prompt?.length || 0,
3631
- model: options.model,
3632
- maxTokens: options.maxTokens,
3633
- temperature: options.temperature,
3634
- maxSteps: options.maxSteps,
3635
- skipToolPromptInjection: options.skipToolPromptInjection,
3636
- });
3637
- const result = await provider.generate({
3638
- ...options,
3639
- systemPrompt: enhancedSystemPrompt,
3640
- conversationMessages, // Inject conversation history
3641
- });
3642
- const responseTime = Date.now() - tryMCPStartTime;
3643
- // Enhanced result validation - consider tool executions as valid results
3644
- const hasContent = result && result.content && result.content.trim().length > 0;
3645
- const hasToolExecutions = result && result.toolExecutions && result.toolExecutions.length > 0;
3646
- // Log detailed result analysis for debugging
3647
- mcpLogger.debug(`[${functionTag}] Result validation:`, {
3648
- hasResult: !!result,
3649
- hasContent,
3650
- hasToolExecutions,
3651
- contentLength: result?.content?.length || 0,
3652
- toolExecutionsCount: result?.toolExecutions?.length || 0,
3653
- toolsUsedCount: result?.toolsUsed?.length || 0,
3654
- });
3655
- // Accept result if it has content OR successful tool executions
3656
- if (!hasContent && !hasToolExecutions) {
3657
- mcpLogger.debug(`[${functionTag}] Result rejected: no content and no tool executions`);
3658
- return null; // Let caller fall back to direct generation
3659
- }
3660
- // Transform tool executions with enhanced preservation
3661
- const transformedToolExecutions = transformToolExecutionsForMCP(result.toolExecutions);
3662
- // Log transformation results
3663
- mcpLogger.debug(`[${functionTag}] Tool execution transformation:`, {
3664
- originalCount: result?.toolExecutions?.length || 0,
3665
- transformedCount: transformedToolExecutions.length,
3666
- transformedTools: transformedToolExecutions.map((te) => te.toolName),
3552
+ functionTag,
3553
+ tryMCPStartTime,
3554
+ providerName: generationContext.providerName,
3555
+ availableTools: generationContext.availableTools,
3556
+ enhancedSystemPrompt: generationContext.enhancedSystemPrompt,
3557
+ conversationMessages,
3667
3558
  });
3668
- // Return enhanced result with preserved tool information
3669
- return {
3670
- content: result.content || "", // Ensure content is never undefined
3671
- provider: providerName,
3672
- model: result.model,
3673
- usage: result.usage,
3674
- responseTime,
3675
- finishReason: result.finishReason,
3676
- toolsUsed: result.toolsUsed || [],
3677
- toolExecutions: transformedToolExecutions,
3678
- enhancedWithTools: Boolean(hasToolExecutions), // Mark as enhanced if tools were actually used
3679
- availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
3680
- audio: result.audio,
3681
- video: result.video,
3682
- ppt: result.ppt,
3683
- imageOutput: result.imageOutput,
3684
- // Include analytics and evaluation from BaseProvider
3685
- analytics: result.analytics,
3686
- evaluation: result.evaluation,
3687
- };
3688
3559
  }
3689
3560
  catch (error) {
3690
3561
  // Immediately propagate AbortError — never swallow aborted requests
@@ -3713,6 +3584,299 @@ Current user's request: ${currentInput}`;
3713
3584
  return null; // Let caller fall back
3714
3585
  }
3715
3586
  }
3587
+ async prepareMCPGenerationContext(options, requestId, tryMCPId, tryMCPStartTime, tryMCPHrTimeStart) {
3588
+ await this.initializeMCP();
3589
+ if (!this.mcpInitialized) {
3590
+ logger.warn(`[NeuroLink] ⚠️ LOG_POINT_T004_MCP_NOT_AVAILABLE`, {
3591
+ logPoint: "T004_MCP_NOT_AVAILABLE",
3592
+ tryMCPId,
3593
+ timestamp: new Date().toISOString(),
3594
+ elapsedMs: Date.now() - tryMCPStartTime,
3595
+ elapsedNs: (process.hrtime.bigint() - tryMCPHrTimeStart).toString(),
3596
+ mcpInitialized: this.mcpInitialized,
3597
+ mcpComponents: {
3598
+ hasExternalServerManager: !!this.externalServerManager,
3599
+ hasToolRegistry: !!this.toolRegistry,
3600
+ hasProviderRegistry: !!AIProviderFactory,
3601
+ },
3602
+ fallbackReason: "MCP_NOT_INITIALIZED",
3603
+ message: "MCP not available - returning null for fallback to direct generation",
3604
+ });
3605
+ return null;
3606
+ }
3607
+ const providerName = options.provider === "auto" || !options.provider
3608
+ ? await getBestProvider()
3609
+ : options.provider;
3610
+ let availableTools = await this.getAllAvailableTools();
3611
+ const { tools: circuitBreakerFilteredTools, unavailableTools } = this.toolRegistry.getAvailableTools(this.toolCircuitBreakers);
3612
+ const cbFilteredNames = new Set(circuitBreakerFilteredTools.map((tool) => tool.name));
3613
+ availableTools = availableTools.filter((tool) => cbFilteredNames.has(tool.name));
3614
+ availableTools = this.applyToolInfoFiltering(availableTools, options);
3615
+ const targetTool = availableTools.find((tool) => tool.name.includes("SuccessRateSRByTime") ||
3616
+ tool.name.includes("juspay-analytics"));
3617
+ logger.debug("Available tools for AI prompt generation", {
3618
+ toolsCount: availableTools.length,
3619
+ toolNames: availableTools.map((tool) => tool.name),
3620
+ unavailableToolsCount: unavailableTools.length,
3621
+ unavailableTools,
3622
+ hasTargetTool: !!targetTool,
3623
+ targetToolDetails: targetTool
3624
+ ? {
3625
+ name: targetTool.name,
3626
+ description: targetTool.description,
3627
+ server: targetTool.server,
3628
+ }
3629
+ : null,
3630
+ });
3631
+ const circuitBreakerNote = unavailableTools.length > 0
3632
+ ? `\n\nNOTE: The following tools are temporarily unavailable due to repeated failures: ${unavailableTools.join(", ")}. Do not attempt to call these tools.`
3633
+ : "";
3634
+ const enhancedSystemPrompt = options.skipToolPromptInjection
3635
+ ? (options.systemPrompt || "") + circuitBreakerNote
3636
+ : this.createToolAwareSystemPrompt(options.systemPrompt, availableTools) +
3637
+ circuitBreakerNote;
3638
+ logger.debug("Tool-aware system prompt created", {
3639
+ requestId,
3640
+ originalPromptLength: options.systemPrompt?.length || 0,
3641
+ enhancedPromptLength: enhancedSystemPrompt.length,
3642
+ skippedToolInjection: !!options.skipToolPromptInjection,
3643
+ enhancedPromptPreview: enhancedSystemPrompt.substring(0, 80) + "...",
3644
+ });
3645
+ logger.debug("[Observability] System prompt metadata", {
3646
+ requestId,
3647
+ systemPromptLength: enhancedSystemPrompt.length,
3648
+ systemPromptHash: enhancedSystemPrompt.length > 0
3649
+ ? `sha256:${enhancedSystemPrompt.slice(0, 8)}...`
3650
+ : "empty",
3651
+ hasCustomSystemPrompt: !!options.systemPrompt,
3652
+ });
3653
+ const conversationMessages = (await getConversationMessages(this.conversationMemory, options));
3654
+ this.logMCPConversationSummary(requestId, conversationMessages);
3655
+ logger.debug("[Observability] Available tools for LLM", {
3656
+ requestId,
3657
+ toolCount: availableTools.length,
3658
+ toolNames: availableTools.map((tool) => tool.name),
3659
+ });
3660
+ return {
3661
+ providerName,
3662
+ availableTools,
3663
+ enhancedSystemPrompt,
3664
+ conversationMessages,
3665
+ };
3666
+ }
3667
+ logMCPConversationSummary(requestId, conversationMessages) {
3668
+ if (!logger.shouldLog("debug")) {
3669
+ return;
3670
+ }
3671
+ try {
3672
+ logger.debug("[Observability] Conversation history summary", {
3673
+ requestId,
3674
+ messageCount: conversationMessages.length,
3675
+ messages: conversationMessages.map((message, index) => {
3676
+ let contentLength;
3677
+ if (typeof message.content === "string") {
3678
+ contentLength = message.content.length;
3679
+ }
3680
+ else {
3681
+ try {
3682
+ contentLength = JSON.stringify(message.content).length;
3683
+ }
3684
+ catch {
3685
+ contentLength = 0;
3686
+ }
3687
+ }
3688
+ return {
3689
+ index,
3690
+ role: message.role,
3691
+ contentLength,
3692
+ contentPreview: typeof message.content === "string"
3693
+ ? message.content.substring(0, 200)
3694
+ : "[multimodal]",
3695
+ };
3696
+ }),
3697
+ });
3698
+ }
3699
+ catch {
3700
+ // Ignore serialization errors in debug logging
3701
+ }
3702
+ }
3703
+ async ensureMCPGenerationBudget(options, requestId, providerName, enhancedSystemPrompt, availableTools, conversationMessages) {
3704
+ const budgetResult = checkContextBudget({
3705
+ provider: providerName,
3706
+ model: options.model,
3707
+ maxTokens: options.maxTokens,
3708
+ systemPrompt: enhancedSystemPrompt,
3709
+ conversationMessages: conversationMessages,
3710
+ currentPrompt: options.prompt,
3711
+ toolDefinitions: availableTools,
3712
+ });
3713
+ logger.info("[TokenBudget] Token breakdown", {
3714
+ requestId,
3715
+ system: budgetResult.breakdown?.systemPrompt || 0,
3716
+ history: budgetResult.breakdown?.conversationHistory || 0,
3717
+ tools: budgetResult.breakdown?.toolDefinitions || 0,
3718
+ currentPrompt: budgetResult.breakdown?.currentPrompt || 0,
3719
+ files: budgetResult.breakdown?.fileAttachments || 0,
3720
+ total: budgetResult.estimatedInputTokens,
3721
+ budget: budgetResult.availableInputTokens,
3722
+ usagePercent: Math.round(budgetResult.usageRatio * 1000) / 10,
3723
+ conversationMessageCount: conversationMessages.length,
3724
+ shouldCompact: budgetResult.shouldCompact,
3725
+ });
3726
+ const compactionSessionId = this.getCompactionSessionId(options);
3727
+ const lastCompactionCount = this.lastCompactionMessageCount.get(compactionSessionId) ?? 0;
3728
+ if (!budgetResult.shouldCompact ||
3729
+ !this.conversationMemory ||
3730
+ conversationMessages.length <= lastCompactionCount) {
3731
+ return conversationMessages;
3732
+ }
3733
+ return this.compactMCPConversationForBudget({
3734
+ options,
3735
+ requestId,
3736
+ providerName,
3737
+ enhancedSystemPrompt,
3738
+ availableTools,
3739
+ conversationMessages,
3740
+ availableInputTokens: budgetResult.availableInputTokens,
3741
+ usageRatio: budgetResult.usageRatio,
3742
+ estimatedInputTokens: budgetResult.estimatedInputTokens,
3743
+ compactionSessionId,
3744
+ });
3745
+ }
3746
+ async compactMCPConversationForBudget(context) {
3747
+ const { options, requestId, providerName, enhancedSystemPrompt, availableTools, conversationMessages, availableInputTokens, usageRatio, estimatedInputTokens, compactionSessionId, } = context;
3748
+ logger.info("[NeuroLink] Context budget exceeded, triggering auto-compaction", {
3749
+ usageRatio,
3750
+ estimatedTokens: estimatedInputTokens,
3751
+ availableTokens: availableInputTokens,
3752
+ });
3753
+ const compactor = new ContextCompactor({
3754
+ provider: providerName,
3755
+ summarizationProvider: this.conversationMemoryConfig?.conversationMemory
3756
+ ?.summarizationProvider,
3757
+ summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
3758
+ });
3759
+ const compactionResult = await compactor.compact(conversationMessages, availableInputTokens, this.conversationMemoryConfig?.conversationMemory, requestId);
3760
+ let compactedMessages = conversationMessages;
3761
+ if (compactionResult.compacted) {
3762
+ const repairedResult = repairToolPairs(compactionResult.messages);
3763
+ compactedMessages = repairedResult.messages;
3764
+ this.lastCompactionMessageCount.set(compactionSessionId, compactedMessages.length);
3765
+ logger.info("[NeuroLink] Context compacted successfully", {
3766
+ stagesUsed: compactionResult.stagesUsed,
3767
+ tokensSaved: compactionResult.tokensSaved,
3768
+ });
3769
+ }
3770
+ const postCompactBudget = checkContextBudget({
3771
+ provider: providerName,
3772
+ model: options.model,
3773
+ maxTokens: options.maxTokens,
3774
+ systemPrompt: enhancedSystemPrompt,
3775
+ conversationMessages: compactedMessages,
3776
+ currentPrompt: options.prompt,
3777
+ toolDefinitions: availableTools,
3778
+ });
3779
+ if (postCompactBudget.withinBudget) {
3780
+ return compactedMessages;
3781
+ }
3782
+ const overageRatio = postCompactBudget.usageRatio - 1.0;
3783
+ logger.warn("[NeuroLink] Post-compaction still over budget, attempting emergency content truncation", {
3784
+ requestId,
3785
+ estimatedTokens: postCompactBudget.estimatedInputTokens,
3786
+ availableTokens: postCompactBudget.availableInputTokens,
3787
+ overagePercent: Math.round(overageRatio * 100),
3788
+ stagesUsedInCompaction: compactionResult.stagesUsed,
3789
+ });
3790
+ compactedMessages = emergencyContentTruncation(compactedMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
3791
+ const finalBudget = checkContextBudget({
3792
+ provider: providerName,
3793
+ model: options.model,
3794
+ maxTokens: options.maxTokens,
3795
+ systemPrompt: enhancedSystemPrompt,
3796
+ conversationMessages: compactedMessages,
3797
+ currentPrompt: options.prompt,
3798
+ toolDefinitions: availableTools,
3799
+ });
3800
+ if (!finalBudget.withinBudget) {
3801
+ throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3802
+ `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3803
+ `Budget: ${finalBudget.availableInputTokens} tokens. ` +
3804
+ `Conversation is too large to fit in the model's context window.`, {
3805
+ estimatedTokens: finalBudget.estimatedInputTokens,
3806
+ availableTokens: finalBudget.availableInputTokens,
3807
+ stagesUsed: compactionResult.stagesUsed,
3808
+ breakdown: finalBudget.breakdown,
3809
+ });
3810
+ }
3811
+ return compactedMessages;
3812
+ }
3813
+ async generateWithMCPProvider(context) {
3814
+ const { options, requestId, functionTag, tryMCPStartTime, providerName, availableTools, enhancedSystemPrompt, conversationMessages, } = context;
3815
+ const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, this, options.region);
3816
+ provider.setTraceContext(this._metricsTraceContext);
3817
+ this.emitter.emit("connected");
3818
+ this.emitter.emit("message", `${providerName} provider initialized successfully`);
3819
+ provider.setupToolExecutor({
3820
+ customTools: this.getCustomTools(),
3821
+ executeTool: (toolName, params) => this.executeTool(toolName, params, {
3822
+ disableToolCache: options.disableToolCache,
3823
+ }),
3824
+ }, functionTag);
3825
+ logger.debug("[Observability] User input to LLM", {
3826
+ requestId,
3827
+ promptPreview: options.prompt?.substring(0, 200),
3828
+ promptLength: options.prompt?.length || 0,
3829
+ model: options.model,
3830
+ maxTokens: options.maxTokens,
3831
+ temperature: options.temperature,
3832
+ maxSteps: options.maxSteps,
3833
+ skipToolPromptInjection: options.skipToolPromptInjection,
3834
+ });
3835
+ const result = await provider.generate({
3836
+ ...options,
3837
+ systemPrompt: enhancedSystemPrompt,
3838
+ conversationMessages,
3839
+ });
3840
+ const responseTime = Date.now() - tryMCPStartTime;
3841
+ const hasContent = !!(result?.content && result.content.trim().length > 0);
3842
+ const hasToolExecutions = !!(result?.toolExecutions && result.toolExecutions.length > 0);
3843
+ mcpLogger.debug(`[${functionTag}] Result validation:`, {
3844
+ hasResult: !!result,
3845
+ hasContent,
3846
+ hasToolExecutions,
3847
+ contentLength: result?.content?.length || 0,
3848
+ toolExecutionsCount: result?.toolExecutions?.length || 0,
3849
+ toolsUsedCount: result?.toolsUsed?.length || 0,
3850
+ });
3851
+ if (!hasContent && !hasToolExecutions) {
3852
+ mcpLogger.debug(`[${functionTag}] Result rejected: no content and no tool executions`);
3853
+ return null;
3854
+ }
3855
+ const transformedToolExecutions = transformToolExecutionsForMCP(result.toolExecutions);
3856
+ mcpLogger.debug(`[${functionTag}] Tool execution transformation:`, {
3857
+ originalCount: result?.toolExecutions?.length || 0,
3858
+ transformedCount: transformedToolExecutions.length,
3859
+ transformedTools: transformedToolExecutions.map((te) => te.toolName),
3860
+ });
3861
+ return {
3862
+ content: result.content || "",
3863
+ provider: providerName,
3864
+ model: result.model,
3865
+ usage: result.usage,
3866
+ responseTime,
3867
+ finishReason: result.finishReason,
3868
+ toolsUsed: result.toolsUsed || [],
3869
+ toolExecutions: transformedToolExecutions,
3870
+ enhancedWithTools: Boolean(hasToolExecutions),
3871
+ availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
3872
+ audio: result.audio,
3873
+ video: result.video,
3874
+ ppt: result.ppt,
3875
+ imageOutput: result.imageOutput,
3876
+ analytics: result.analytics,
3877
+ evaluation: result.evaluation,
3878
+ };
3879
+ }
3716
3880
  /**
3717
3881
  * Direct provider generation (no MCP, no recursion)
3718
3882
  */
@@ -3732,12 +3896,18 @@ Current user's request: ${currentInput}`;
3732
3896
  ];
3733
3897
  const requestedProvider = options.provider === "auto" ? undefined : options.provider;
3734
3898
  // Check for orchestrated preferred provider in context
3735
- const preferredOrchestrated = options.context && typeof options.context === "object" && "__orchestratedPreferredProvider" in options.context
3736
- ? options.context.__orchestratedPreferredProvider
3899
+ const preferredOrchestrated = options.context &&
3900
+ typeof options.context === "object" &&
3901
+ "__orchestratedPreferredProvider" in options.context
3902
+ ? options.context
3903
+ .__orchestratedPreferredProvider
3737
3904
  : undefined;
3738
3905
  // Build provider list with orchestrated preference first, then fallback to full list
3739
3906
  const tryProviders = preferredOrchestrated
3740
- ? [preferredOrchestrated, ...providerPriority.filter((p) => p !== preferredOrchestrated)]
3907
+ ? [
3908
+ preferredOrchestrated,
3909
+ ...providerPriority.filter((p) => p !== preferredOrchestrated),
3910
+ ]
3741
3911
  : requestedProvider
3742
3912
  ? [requestedProvider]
3743
3913
  : providerPriority;
@@ -3757,7 +3927,8 @@ Current user's request: ${currentInput}`;
3757
3927
  logger.debug(`[${functionTag}] Attempting provider: ${providerName}`);
3758
3928
  // Get conversation messages for context (use pre-compacted if provided)
3759
3929
  const optionsWithMessages = options;
3760
- let conversationMessages = optionsWithMessages.conversationMessages?.length
3930
+ let conversationMessages = optionsWithMessages.conversationMessages
3931
+ ?.length
3761
3932
  ? optionsWithMessages.conversationMessages
3762
3933
  : await getConversationMessages(this.conversationMemory, options);
3763
3934
  // Pre-generation budget check
@@ -3768,17 +3939,22 @@ Current user's request: ${currentInput}`;
3768
3939
  systemPrompt: options.systemPrompt,
3769
3940
  conversationMessages: conversationMessages,
3770
3941
  currentPrompt: options.prompt,
3771
- toolDefinitions: options.tools ? Object.values(options.tools) : undefined,
3942
+ toolDefinitions: options.tools
3943
+ ? Object.values(options.tools)
3944
+ : undefined,
3772
3945
  });
3773
3946
  const dpgMessageCount = conversationMessages?.length || 0;
3774
3947
  const dpgCompactionSessionId = this.getCompactionSessionId(options);
3775
3948
  if (budgetCheck.shouldCompact &&
3776
3949
  this.conversationMemory &&
3777
- dpgMessageCount > (this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
3950
+ dpgMessageCount >
3951
+ (this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
3778
3952
  const compactor = new ContextCompactor({
3779
3953
  provider: providerName,
3780
- summarizationProvider: this.conversationMemoryConfig?.conversationMemory?.summarizationProvider,
3781
- summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
3954
+ summarizationProvider: this.conversationMemoryConfig?.conversationMemory
3955
+ ?.summarizationProvider,
3956
+ summarizationModel: this.conversationMemoryConfig?.conversationMemory
3957
+ ?.summarizationModel,
3782
3958
  });
3783
3959
  const compactionResult = await compactor.compact(conversationMessages, budgetCheck.availableInputTokens, this.conversationMemoryConfig?.conversationMemory, options.context?.requestId);
3784
3960
  if (compactionResult.compacted) {
@@ -3794,7 +3970,9 @@ Current user's request: ${currentInput}`;
3794
3970
  systemPrompt: options.systemPrompt,
3795
3971
  conversationMessages: conversationMessages,
3796
3972
  currentPrompt: options.prompt,
3797
- toolDefinitions: options.tools ? Object.values(options.tools) : undefined,
3973
+ toolDefinitions: options.tools
3974
+ ? Object.values(options.tools)
3975
+ : undefined,
3798
3976
  });
3799
3977
  if (!postCompactBudget.withinBudget) {
3800
3978
  logger.warn("[NeuroLink] directProviderGeneration: post-compaction still over budget, emergency truncation", {
@@ -3810,7 +3988,9 @@ Current user's request: ${currentInput}`;
3810
3988
  systemPrompt: options.systemPrompt,
3811
3989
  conversationMessages: conversationMessages,
3812
3990
  currentPrompt: options.prompt,
3813
- toolDefinitions: options.tools ? Object.values(options.tools) : undefined,
3991
+ toolDefinitions: options.tools
3992
+ ? Object.values(options.tools)
3993
+ : undefined,
3814
3994
  });
3815
3995
  if (!finalBudget.withinBudget) {
3816
3996
  throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
@@ -4064,367 +4244,285 @@ Current user's request: ${currentInput}`;
4064
4244
  * @throws {Error} When conversation memory operations fail (if enabled)
4065
4245
  */
4066
4246
  async stream(options) {
4067
- // Shallow-copy caller's object to avoid mutating their original reference
4068
- options = { ...options };
4069
- // Set metrics trace context for parent-child span linking
4070
- const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
4071
- const metricsParentSpanId = crypto.randomUUID().replace(/-/g, "").substring(0, 16);
4072
- // Scope trace context to this request via AsyncLocalStorage
4073
- // so concurrent generate/stream calls don't race.
4074
- return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsParentSpanId }, async () => {
4075
- // Manual span lifecycle: the span must stay open until the stream is fully consumed,
4076
- // NOT when the StreamResult object is returned. withSpan would end the span too early
4077
- // because streaming results resolve lazily via the async generator.
4078
- const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
4079
- kind: SpanKind.INTERNAL,
4080
- attributes: {
4081
- [ATTR.NL_PROVIDER]: options.provider || "default",
4082
- [ATTR.GEN_AI_MODEL]: options.model || "default",
4083
- [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
4084
- [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
4085
- [ATTR.NL_STREAM_MODE]: true,
4086
- },
4247
+ return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...options }));
4248
+ }
4249
+ async executeStreamRequest(options) {
4250
+ const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
4251
+ kind: SpanKind.INTERNAL,
4252
+ attributes: {
4253
+ [ATTR.NL_PROVIDER]: options.provider || "default",
4254
+ [ATTR.GEN_AI_MODEL]: options.model || "default",
4255
+ [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
4256
+ [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
4257
+ [ATTR.NL_STREAM_MODE]: true,
4258
+ },
4259
+ });
4260
+ const spanStartTime = Date.now();
4261
+ this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
4262
+ try {
4263
+ options.model = resolveModel(options.model, this.modelAliasConfig);
4264
+ const startTime = Date.now();
4265
+ const hrTimeStart = process.hrtime.bigint();
4266
+ const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
4267
+ const originalPrompt = options.input.text;
4268
+ options.fileRegistry = this.fileRegistry;
4269
+ await this.validateStreamRequestOptions(options, startTime);
4270
+ const workflowResult = await this.maybeHandleWorkflowStreamRequest({
4271
+ options,
4272
+ startTime,
4273
+ streamSpan,
4274
+ spanStartTime,
4087
4275
  });
4088
- const spanStartTime = Date.now();
4089
- // MCP Enhancement: propagate disableToolCache to tool execution
4090
- this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
4091
- try {
4092
- // NL-004: Resolve model aliases/deprecations before processing
4093
- options.model = resolveModel(options.model, this.modelAliasConfig);
4094
- const startTime = Date.now();
4095
- const hrTimeStart = process.hrtime.bigint();
4096
- const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
4097
- const originalPrompt = options.input.text; // Store the original prompt for memory storage
4098
- // Inject file registry for lazy on-demand file processing
4099
- options.fileRegistry = this.fileRegistry;
4100
- await this.validateStreamInput(options);
4101
- // Check budget limit before making API call
4102
- if (options.maxBudgetUsd !== undefined &&
4103
- options.maxBudgetUsd > 0 &&
4104
- this._sessionCostUsd >= options.maxBudgetUsd) {
4105
- throw new NeuroLinkError({
4106
- code: "SESSION_BUDGET_EXCEEDED",
4107
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
4108
- category: ErrorCategory.VALIDATION,
4109
- severity: ErrorSeverity.HIGH,
4110
- retriable: false,
4111
- context: {
4112
- spent: this._sessionCostUsd,
4113
- limit: options.maxBudgetUsd,
4114
- },
4115
- });
4116
- }
4117
- // Handle per-call auth token validation
4118
- if (options.auth?.token) {
4119
- const { AuthError } = await import("./auth/errors.js");
4120
- await this.ensureAuthProvider();
4121
- if (!this.authProvider) {
4122
- throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
4123
- }
4124
- let authResult;
4125
- try {
4126
- authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
4127
- }
4128
- catch (err) {
4129
- // Rethrow auth errors as-is; wrap anything else
4130
- if (err instanceof Error && "feature" in err && err.feature === "Auth") {
4131
- throw err;
4276
+ if (workflowResult) {
4277
+ return workflowResult;
4278
+ }
4279
+ return this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
4280
+ options,
4281
+ streamSpan,
4282
+ spanStartTime,
4283
+ startTime,
4284
+ hrTimeStart,
4285
+ streamId,
4286
+ originalPrompt,
4287
+ }));
4288
+ }
4289
+ catch (error) {
4290
+ streamSpan.setStatus({
4291
+ code: SpanStatusCode.ERROR,
4292
+ message: error instanceof Error ? error.message : String(error),
4293
+ });
4294
+ if (error instanceof Error) {
4295
+ streamSpan.recordException(error);
4296
+ }
4297
+ streamSpan.end();
4298
+ throw error;
4299
+ }
4300
+ }
4301
+ async validateStreamRequestOptions(options, startTime) {
4302
+ await this.validateStreamInput(options);
4303
+ this.enforceSessionBudget(options.maxBudgetUsd);
4304
+ await this.applyAuthenticatedRequestContext(options);
4305
+ this.emitStreamStartEvents(options, startTime);
4306
+ this.applyStreamLifecycleMiddleware(options);
4307
+ }
4308
+ async maybeHandleWorkflowStreamRequest(params) {
4309
+ if (!params.options.workflow && !params.options.workflowConfig) {
4310
+ return null;
4311
+ }
4312
+ const result = await this.streamWithWorkflow(params.options, params.startTime);
4313
+ const originalWorkflowStream = result.stream;
4314
+ const self = this;
4315
+ result.stream = (async function* () {
4316
+ try {
4317
+ for await (const chunk of originalWorkflowStream) {
4318
+ yield chunk;
4319
+ }
4320
+ params.streamSpan.setStatus({ code: SpanStatusCode.OK });
4321
+ }
4322
+ catch (error) {
4323
+ params.streamSpan.setStatus({
4324
+ code: SpanStatusCode.ERROR,
4325
+ message: error instanceof Error ? error.message : String(error),
4326
+ });
4327
+ throw error;
4328
+ }
4329
+ finally {
4330
+ self._disableToolCacheForCurrentRequest = false;
4331
+ params.streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - params.spanStartTime);
4332
+ params.streamSpan.end();
4333
+ }
4334
+ })();
4335
+ return result;
4336
+ }
4337
+ async runStandardStreamRequest(params) {
4338
+ const { options, streamSpan, spanStartTime, startTime, hrTimeStart, streamId, originalPrompt, } = params;
4339
+ try {
4340
+ const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
4341
+ const { stream: mcpStream, provider: providerName, usage: streamUsage, model: streamModel, finishReason: streamFinishReason, toolCalls: streamToolCalls, toolResults: streamToolResults, analytics: streamAnalytics, } = await this.createMCPStream(enhancedOptions);
4342
+ const streamState = {
4343
+ finishReason: streamFinishReason ?? "stop",
4344
+ toolCalls: streamToolCalls,
4345
+ toolResults: streamToolResults,
4346
+ };
4347
+ streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
4348
+ let accumulatedContent = "";
4349
+ let chunkCount = 0;
4350
+ const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
4351
+ const metadata = {
4352
+ fallbackAttempted: false,
4353
+ guardrailsBlocked: false,
4354
+ error: undefined,
4355
+ fallbackProvider: undefined,
4356
+ fallbackModel: undefined,
4357
+ };
4358
+ const self = this;
4359
+ const streamStartTime = Date.now();
4360
+ const sessionId = enhancedOptions.context
4361
+ ?.sessionId;
4362
+ const processedStream = (async function* () {
4363
+ let streamError;
4364
+ try {
4365
+ for await (const chunk of mcpStream) {
4366
+ chunkCount++;
4367
+ if (chunk &&
4368
+ "content" in chunk &&
4369
+ typeof chunk.content === "string") {
4370
+ accumulatedContent += chunk.content;
4371
+ self.emitter.emit("response:chunk", chunk.content);
4372
+ self.emitter.emit("stream:chunk", {
4373
+ type: "stream:chunk",
4374
+ content: chunk.content,
4375
+ metadata: {
4376
+ chunkIndex: chunkCount,
4377
+ totalLength: accumulatedContent.length,
4378
+ },
4379
+ timestamp: Date.now(),
4380
+ });
4132
4381
  }
4133
- throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${err instanceof Error ? err.message : String(err)}`);
4382
+ yield chunk;
4134
4383
  }
4135
- if (!authResult.valid) {
4136
- throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
4137
- }
4138
- // Fail closed: token valid but no user identity is a provider bug
4139
- if (!authResult.user) {
4140
- throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
4141
- }
4142
- if (!authResult.user.id) {
4143
- throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
4384
+ if (chunkCount === 0 &&
4385
+ !metadata.fallbackAttempted &&
4386
+ !enhancedOptions.disableInternalFallback &&
4387
+ streamState.toolCalls.length === 0 &&
4388
+ streamState.toolResults.length === 0) {
4389
+ yield* self.handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, (content) => {
4390
+ accumulatedContent += content;
4391
+ });
4144
4392
  }
4145
- // Merge validated user into context
4146
- options.context = {
4147
- ...(options.context || {}),
4148
- userId: authResult.user.id,
4149
- userEmail: authResult.user.email,
4150
- userRoles: authResult.user.roles,
4151
- };
4152
- }
4153
- // Handle pre-validated requestContext
4154
- if (options.requestContext) {
4155
- // When auth token was validated, token-derived identity fields
4156
- // MUST take precedence over requestContext to prevent privilege escalation.
4157
- const tokenDerivedFields = options.auth?.token && this.authProvider
4158
- ? {
4159
- userId: options.context?.userId,
4160
- userEmail: options.context?.userEmail,
4161
- userRoles: options.context?.userRoles,
4162
- }
4163
- : {};
4164
- options.context = {
4165
- ...(options.context || {}),
4166
- ...options.requestContext,
4167
- ...tokenDerivedFields,
4168
- };
4169
- }
4170
- this.emitStreamStartEvents(options, startTime);
4171
- // Auto-inject lifecycle middleware when callbacks are provided
4172
- // (must happen before workflow early return so that path gets middleware too)
4173
- if (options.onFinish || options.onError || options.onChunk) {
4174
- options.middleware = {
4175
- ...options.middleware,
4176
- middlewareConfig: {
4177
- ...options.middleware?.middlewareConfig,
4178
- lifecycle: {
4179
- ...options.middleware?.middlewareConfig?.lifecycle,
4180
- enabled: true,
4181
- config: {
4182
- ...options.middleware?.middlewareConfig?.lifecycle?.config,
4183
- ...(options.onFinish !== undefined ? { onFinish: options.onFinish } : {}),
4184
- ...(options.onError !== undefined ? { onError: options.onError } : {}),
4185
- ...(options.onChunk !== undefined ? { onChunk: options.onChunk } : {}),
4186
- },
4187
- },
4188
- },
4189
- };
4190
- }
4191
- // Check if workflow is requested
4192
- if (options.workflow || options.workflowConfig) {
4193
- const result = await this.streamWithWorkflow(options, startTime);
4194
- // Wrap the workflow stream so the span stays open until fully consumed
4195
- const originalWorkflowStream = result.stream;
4196
- const selfWorkflow = this;
4197
- result.stream = (async function* () {
4393
+ let resolvedUsage = streamUsage;
4394
+ if (!resolvedUsage && streamAnalytics) {
4198
4395
  try {
4199
- for await (const chunk of originalWorkflowStream) {
4200
- yield chunk;
4396
+ const resolved = await Promise.resolve(streamAnalytics);
4397
+ if (resolved?.tokenUsage) {
4398
+ resolvedUsage = resolved.tokenUsage;
4201
4399
  }
4202
- streamSpan.setStatus({ code: SpanStatusCode.OK });
4203
- }
4204
- catch (error) {
4205
- streamSpan.setStatus({
4206
- code: SpanStatusCode.ERROR,
4207
- message: error instanceof Error ? error.message : String(error),
4208
- });
4209
- throw error;
4210
4400
  }
4211
- finally {
4212
- selfWorkflow._disableToolCacheForCurrentRequest = false;
4213
- streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4214
- streamSpan.end();
4401
+ catch {
4402
+ // non-blocking
4215
4403
  }
4216
- })();
4217
- return result;
4404
+ }
4405
+ self.emitter.emit("stream:complete", {
4406
+ type: "stream:complete",
4407
+ content: accumulatedContent,
4408
+ provider: metadata.fallbackProvider ?? providerName,
4409
+ model: metadata.fallbackModel ?? streamModel ?? enhancedOptions.model,
4410
+ prompt: enhancedOptions.input?.text ||
4411
+ enhancedOptions.prompt,
4412
+ metadata: {
4413
+ chunkCount,
4414
+ totalLength: accumulatedContent.length,
4415
+ durationMs: Date.now() - streamStartTime,
4416
+ sessionId,
4417
+ usage: resolvedUsage,
4418
+ ...(metadata.fallbackAttempted && {
4419
+ primaryProvider: providerName,
4420
+ primaryModel: enhancedOptions.model,
4421
+ fallback: true,
4422
+ }),
4423
+ },
4424
+ timestamp: Date.now(),
4425
+ });
4218
4426
  }
4219
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
4220
- return await this.setLangfuseContextFromOptions(options, async () => {
4221
- try {
4222
- // Prepare options: init memory, MCP, orchestration, Ollama auto-disable, tool detection
4223
- const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
4224
- const { stream: mcpStream, provider: providerName, usage: streamUsage, model: streamModel, finishReason: streamFinishReason, toolCalls: streamToolCalls, toolResults: streamToolResults, analytics: streamAnalytics, } = await this.createMCPStream(enhancedOptions);
4225
- const streamState = {
4226
- finishReason: streamFinishReason ?? "stop",
4227
- toolCalls: streamToolCalls,
4228
- toolResults: streamToolResults,
4229
- };
4230
- // Update span with resolved provider name
4231
- streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
4232
- let accumulatedContent = "";
4233
- let chunkCount = 0;
4234
- // Set up event capture listeners
4235
- const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
4236
- const metadata = {
4237
- fallbackAttempted: false,
4238
- guardrailsBlocked: false,
4239
- error: undefined,
4240
- fallbackProvider: undefined,
4241
- fallbackModel: undefined,
4242
- };
4243
- const self = this;
4244
- const streamStartTime = Date.now();
4245
- const sessionId = enhancedOptions.context?.sessionId;
4246
- const processedStream = (async function* () {
4247
- let streamError;
4248
- try {
4249
- for await (const chunk of mcpStream) {
4250
- chunkCount++;
4251
- if (chunk && "content" in chunk && typeof chunk.content === "string") {
4252
- accumulatedContent += chunk.content;
4253
- self.emitter.emit("response:chunk", chunk.content);
4254
- // Emit stream:chunk event (Observability Solution 8)
4255
- self.emitter.emit("stream:chunk", {
4256
- type: "stream:chunk",
4257
- content: chunk.content,
4258
- metadata: {
4259
- chunkIndex: chunkCount,
4260
- totalLength: accumulatedContent.length,
4261
- },
4262
- timestamp: Date.now(),
4263
- });
4264
- }
4265
- yield chunk;
4266
- }
4267
- if (chunkCount === 0 &&
4268
- !metadata.fallbackAttempted &&
4269
- !enhancedOptions.disableInternalFallback &&
4270
- streamState.toolCalls.length === 0 &&
4271
- streamState.toolResults.length === 0) {
4272
- yield* self.handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
4273
- accumulatedContent += content;
4274
- });
4275
- }
4276
- // Emit stream:complete event (Observability Solution 8)
4277
- // When fallback took over, attribute the completion to the
4278
- // fallback provider so downstream telemetry reflects reality.
4279
- const effectiveProvider = metadata.fallbackProvider ?? providerName;
4280
- const effectiveModel = metadata.fallbackModel ?? streamModel ?? enhancedOptions.model;
4281
- // Resolve analytics promise to get final token usage
4282
- let resolvedUsage = streamUsage;
4283
- if (!resolvedUsage && streamAnalytics) {
4284
- try {
4285
- const resolved = await Promise.resolve(streamAnalytics);
4286
- if (resolved?.tokenUsage) {
4287
- resolvedUsage = resolved.tokenUsage;
4288
- }
4289
- }
4290
- catch {
4291
- /* non-blocking */
4292
- }
4293
- }
4294
- self.emitter.emit("stream:complete", {
4295
- type: "stream:complete",
4296
- content: accumulatedContent,
4297
- provider: effectiveProvider,
4298
- model: effectiveModel,
4299
- prompt: enhancedOptions.input?.text || enhancedOptions.prompt,
4300
- metadata: {
4301
- chunkCount,
4302
- totalLength: accumulatedContent.length,
4303
- durationMs: Date.now() - streamStartTime,
4304
- sessionId,
4305
- usage: resolvedUsage,
4306
- ...(metadata.fallbackAttempted && {
4307
- primaryProvider: providerName,
4308
- primaryModel: enhancedOptions.model,
4309
- fallback: true,
4310
- }),
4311
- },
4312
- timestamp: Date.now(),
4313
- });
4314
- }
4315
- catch (error) {
4316
- streamError = error;
4317
- // Emit stream:error event (Observability Solution 8)
4318
- self.emitter.emit("stream:error", {
4319
- type: "stream:error",
4320
- content: error instanceof Error ? error.message : String(error),
4321
- provider: providerName,
4322
- model: enhancedOptions.model,
4323
- metadata: {
4324
- chunkCount,
4325
- totalLength: accumulatedContent.length,
4326
- durationMs: Date.now() - streamStartTime,
4327
- errorName: error instanceof Error ? error.name : "UnknownError",
4328
- sessionId,
4329
- },
4330
- timestamp: Date.now(),
4331
- });
4332
- throw error;
4333
- }
4334
- finally {
4335
- self._disableToolCacheForCurrentRequest = false;
4336
- cleanupListeners();
4337
- // Finalize span now that the stream is fully consumed
4338
- streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4339
- streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
4340
- // When fallback took over, the primary provider's span must
4341
- // reflect that it failed — never mark it as successful.
4342
- const primaryFailed = !!(metadata.error || streamError);
4343
- streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, primaryFailed ? "error" : "stop");
4344
- if (metadata.fallbackAttempted) {
4345
- streamSpan.setAttribute("neurolink.fallback_triggered", true);
4346
- if (metadata.fallbackProvider) {
4347
- streamSpan.setAttribute("neurolink.fallback_provider", metadata.fallbackProvider);
4348
- }
4349
- }
4350
- if (primaryFailed) {
4351
- streamSpan.setStatus({
4352
- code: SpanStatusCode.ERROR,
4353
- message: metadata.error || (streamError instanceof Error ? streamError.message : String(streamError)),
4354
- });
4355
- }
4356
- else {
4357
- streamSpan.setStatus({ code: SpanStatusCode.OK });
4358
- }
4359
- streamSpan.end();
4360
- if (accumulatedContent.trim()) {
4361
- logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
4362
- provider: providerName,
4363
- model: enhancedOptions.model,
4364
- responseTimeMs: Date.now() - startTime,
4365
- contentLength: accumulatedContent.length,
4366
- fallback: metadata.fallbackAttempted,
4367
- });
4368
- }
4369
- await self.storeStreamConversationMemory({
4370
- enhancedOptions,
4371
- providerName,
4372
- originalPrompt,
4373
- accumulatedContent,
4374
- startTime,
4375
- eventSequence,
4376
- });
4377
- }
4378
- })();
4379
- const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
4380
- streamResult.finishReason = streamState.finishReason || streamResult.finishReason;
4381
- streamResult.toolCalls = streamState.toolCalls;
4382
- streamResult.toolResults = streamState.toolResults;
4383
- if (!streamResult.usage) {
4384
- streamResult.usage = streamUsage;
4385
- }
4386
- if (!streamResult.analytics) {
4387
- streamResult.analytics = streamAnalytics instanceof Promise ? await streamAnalytics : streamAnalytics;
4388
- }
4389
- const responseTime = Date.now() - startTime;
4390
- // Accumulate session cost for budget tracking
4391
- if (streamResult.analytics?.cost && streamResult.analytics.cost > 0) {
4392
- this._sessionCostUsd += streamResult.analytics.cost;
4427
+ catch (error) {
4428
+ streamError = error;
4429
+ self.emitter.emit("stream:error", {
4430
+ type: "stream:error",
4431
+ content: error instanceof Error ? error.message : String(error),
4432
+ provider: providerName,
4433
+ model: enhancedOptions.model,
4434
+ metadata: {
4435
+ chunkCount,
4436
+ totalLength: accumulatedContent.length,
4437
+ durationMs: Date.now() - streamStartTime,
4438
+ errorName: error instanceof Error ? error.name : "UnknownError",
4439
+ sessionId,
4440
+ },
4441
+ timestamp: Date.now(),
4442
+ });
4443
+ throw error;
4444
+ }
4445
+ finally {
4446
+ self._disableToolCacheForCurrentRequest = false;
4447
+ cleanupListeners();
4448
+ streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4449
+ streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
4450
+ const primaryFailed = !!(metadata.error || streamError);
4451
+ streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, primaryFailed ? "error" : "stop");
4452
+ if (metadata.fallbackAttempted) {
4453
+ streamSpan.setAttribute("neurolink.fallback_triggered", true);
4454
+ if (metadata.fallbackProvider) {
4455
+ streamSpan.setAttribute("neurolink.fallback_provider", metadata.fallbackProvider);
4393
4456
  }
4394
- this.emitStreamEndEvents(streamResult);
4395
- return this.createStreamResponse(streamResult, processedStream, {
4396
- providerName,
4397
- options,
4398
- startTime,
4399
- responseTime,
4400
- streamId,
4401
- fallback: metadata.fallbackAttempted,
4402
- guardrailsBlocked: metadata.guardrailsBlocked,
4403
- error: metadata.error,
4404
- events: eventSequence,
4457
+ }
4458
+ if (primaryFailed) {
4459
+ streamSpan.setStatus({
4460
+ code: SpanStatusCode.ERROR,
4461
+ message: metadata.error ||
4462
+ (streamError instanceof Error
4463
+ ? streamError.message
4464
+ : String(streamError)),
4405
4465
  });
4406
4466
  }
4407
- catch (error) {
4408
- if (options.disableInternalFallback) {
4409
- throw error;
4410
- }
4411
- return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
4467
+ else {
4468
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
4412
4469
  }
4413
- });
4414
- }
4415
- catch (error) {
4416
- // End span on error before re-throwing
4417
- streamSpan.setStatus({
4418
- code: SpanStatusCode.ERROR,
4419
- message: error instanceof Error ? error.message : String(error),
4420
- });
4421
- if (error instanceof Error) {
4422
- streamSpan.recordException(error);
4470
+ streamSpan.end();
4471
+ if (accumulatedContent.trim()) {
4472
+ logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
4473
+ provider: providerName,
4474
+ model: enhancedOptions.model,
4475
+ responseTimeMs: Date.now() - startTime,
4476
+ contentLength: accumulatedContent.length,
4477
+ fallback: metadata.fallbackAttempted,
4478
+ });
4479
+ }
4480
+ await self.storeStreamConversationMemory({
4481
+ enhancedOptions,
4482
+ providerName,
4483
+ originalPrompt,
4484
+ accumulatedContent,
4485
+ startTime,
4486
+ eventSequence,
4487
+ });
4423
4488
  }
4424
- streamSpan.end();
4489
+ })();
4490
+ const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
4491
+ streamResult.finishReason =
4492
+ streamState.finishReason || streamResult.finishReason;
4493
+ streamResult.toolCalls = streamState.toolCalls;
4494
+ streamResult.toolResults = streamState.toolResults;
4495
+ if (!streamResult.usage) {
4496
+ streamResult.usage = streamUsage;
4497
+ }
4498
+ if (!streamResult.analytics) {
4499
+ streamResult.analytics =
4500
+ streamAnalytics instanceof Promise
4501
+ ? await streamAnalytics
4502
+ : streamAnalytics;
4503
+ }
4504
+ if (streamResult.analytics?.cost && streamResult.analytics.cost > 0) {
4505
+ this._sessionCostUsd += streamResult.analytics.cost;
4506
+ }
4507
+ this.emitStreamEndEvents(streamResult);
4508
+ return this.createStreamResponse(streamResult, processedStream, {
4509
+ providerName,
4510
+ options,
4511
+ startTime,
4512
+ responseTime: Date.now() - startTime,
4513
+ streamId,
4514
+ fallback: metadata.fallbackAttempted,
4515
+ guardrailsBlocked: metadata.guardrailsBlocked,
4516
+ error: metadata.error,
4517
+ events: eventSequence,
4518
+ });
4519
+ }
4520
+ catch (error) {
4521
+ if (options.disableInternalFallback) {
4425
4522
  throw error;
4426
4523
  }
4427
- }); // end metricsTraceContextStorage.run
4524
+ return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
4525
+ }
4428
4526
  }
4429
4527
  /**
4430
4528
  * Prepare stream options: initialize memory, MCP, retrieval, orchestration,
@@ -4436,7 +4534,8 @@ Current user's request: ${currentInput}`;
4436
4534
  // Initialize MCP
4437
4535
  await this.initializeMCP();
4438
4536
  // Memory retrieval
4439
- if (this.shouldReadMemory(options.memory, options.context?.userId) && options.context?.userId) {
4537
+ if (this.shouldReadMemory(options.memory, options.context?.userId) &&
4538
+ options.context?.userId) {
4440
4539
  try {
4441
4540
  options.input.text = await this.retrieveMemory(options.input.text, options.context.userId, options.memory?.additionalUsers);
4442
4541
  logger.debug("Memory retrieval successful");
@@ -4481,7 +4580,8 @@ Current user's request: ${currentInput}`;
4481
4580
  if (!options.tools) {
4482
4581
  options.tools = {};
4483
4582
  }
4484
- options.tools[ragResult.toolName] = ragResult.tool;
4583
+ options.tools[ragResult.toolName] =
4584
+ ragResult.tool;
4485
4585
  // Inject RAG-aware system prompt so the AI uses the RAG tool first
4486
4586
  const ragSystemInstruction = [
4487
4587
  `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
@@ -4490,7 +4590,8 @@ Current user's request: ${currentInput}`;
4490
4590
  `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
4491
4591
  `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
4492
4592
  ].join(" ");
4493
- options.systemPrompt = (options.systemPrompt || "") + ragSystemInstruction;
4593
+ options.systemPrompt =
4594
+ (options.systemPrompt || "") + ragSystemInstruction;
4494
4595
  logger.info("[RAG] Tool injected into stream()", {
4495
4596
  toolName: ragResult.toolName,
4496
4597
  filesLoaded: ragResult.filesLoaded,
@@ -4518,7 +4619,8 @@ Current user's request: ${currentInput}`;
4518
4619
  * Prevents overwhelming smaller models with massive tool descriptions in the system message.
4519
4620
  */
4520
4621
  async autoDisableOllamaStreamTools(options) {
4521
- if ((options.provider === "ollama" || options.provider?.toLowerCase().includes("ollama")) &&
4622
+ if ((options.provider === "ollama" ||
4623
+ options.provider?.toLowerCase().includes("ollama")) &&
4522
4624
  !options.disableTools) {
4523
4625
  const { ModelConfigurationManager } = await import("./core/modelConfiguration.js");
4524
4626
  const modelConfig = ModelConfigurationManager.getInstance();
@@ -4560,16 +4662,31 @@ Current user's request: ${currentInput}`;
4560
4662
  };
4561
4663
  const onToolStart = (...args) => {
4562
4664
  const data = args[0];
4563
- captureEvent("tool:start", data);
4665
+ captureEvent("tool:start", {
4666
+ ...data,
4667
+ toolName: data.toolName ?? data.tool,
4668
+ });
4564
4669
  };
4565
4670
  const onToolEnd = (...args) => {
4566
4671
  const data = args[0];
4567
- captureEvent("tool:end", data);
4568
- if (data.result && data.result.uiComponent === true) {
4672
+ const toolName = data.toolName ?? data.tool;
4673
+ const responseTime = data.responseTime ?? data.duration;
4674
+ const success = data.success ?? (data.error !== undefined ? false : undefined);
4675
+ const augmented = {
4676
+ ...data,
4677
+ toolName,
4678
+ ...(responseTime !== undefined ? { responseTime } : {}),
4679
+ ...(success !== undefined ? { success } : {}),
4680
+ ...(data.error !== undefined ? { error: data.error } : {}),
4681
+ };
4682
+ captureEvent("tool:end", augmented);
4683
+ if (augmented.result && augmented.result.uiComponent === true) {
4569
4684
  captureEvent("ui-component", {
4570
- toolName: data.toolName,
4571
- componentData: data.result,
4685
+ toolName,
4686
+ componentData: augmented.result,
4572
4687
  timestamp: Date.now(),
4688
+ ...(success !== undefined ? { success } : {}),
4689
+ ...(responseTime !== undefined ? { responseTime } : {}),
4573
4690
  });
4574
4691
  }
4575
4692
  };
@@ -4602,7 +4719,7 @@ Current user's request: ${currentInput}`;
4602
4719
  * Handle fallback when the primary stream returns 0 chunks.
4603
4720
  * Yields chunks from a fallback provider and updates metadata accordingly.
4604
4721
  */
4605
- async *handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, _accumulatedContent, appendContent) {
4722
+ async *handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, appendContent) {
4606
4723
  metadata.fallbackAttempted = true;
4607
4724
  const errorMsg = "Stream completed with 0 chunks (possible guardrails block)";
4608
4725
  metadata.error = errorMsg;
@@ -4665,18 +4782,23 @@ Current user's request: ${currentInput}`;
4665
4782
  if (fallbackToolCalls.length > 0 || fallbackToolResults.length > 0) {
4666
4783
  streamState.toolCalls = fallbackToolCalls;
4667
4784
  streamState.toolResults = fallbackToolResults;
4668
- streamState.finishReason = fallbackResult.finishReason ?? streamState.finishReason;
4785
+ streamState.finishReason =
4786
+ fallbackResult.finishReason ?? streamState.finishReason;
4669
4787
  }
4670
4788
  let fallbackChunkCount = 0;
4671
4789
  for await (const fallbackChunk of fallbackResult.stream) {
4672
4790
  fallbackChunkCount++;
4673
- if (fallbackChunk && "content" in fallbackChunk && typeof fallbackChunk.content === "string") {
4791
+ if (fallbackChunk &&
4792
+ "content" in fallbackChunk &&
4793
+ typeof fallbackChunk.content === "string") {
4674
4794
  appendContent(fallbackChunk.content);
4675
4795
  this.emitter.emit("response:chunk", fallbackChunk.content);
4676
4796
  }
4677
4797
  yield fallbackChunk;
4678
4798
  }
4679
- if (fallbackChunkCount === 0 && fallbackToolCalls.length === 0 && fallbackToolResults.length === 0) {
4799
+ if (fallbackChunkCount === 0 &&
4800
+ fallbackToolCalls.length === 0 &&
4801
+ fallbackToolResults.length === 0) {
4680
4802
  throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
4681
4803
  }
4682
4804
  // Fallback succeeded - likely guardrails blocked primary
@@ -4685,7 +4807,9 @@ Current user's request: ${currentInput}`;
4685
4807
  metadata.guardrailsBlocked = true;
4686
4808
  }
4687
4809
  catch (fallbackError) {
4688
- const fallbackErrorMsg = fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
4810
+ const fallbackErrorMsg = fallbackError instanceof Error
4811
+ ? fallbackError.message
4812
+ : String(fallbackError);
4689
4813
  metadata.error = `${errorMsg}; Fallback failed: ${fallbackErrorMsg}`;
4690
4814
  logger.error("Fallback provider failed", {
4691
4815
  fallbackProvider: fallbackRoute.provider,
@@ -4699,19 +4823,22 @@ Current user's request: ${currentInput}`;
4699
4823
  * Handles conversation memory storage in the background.
4700
4824
  */
4701
4825
  async storeStreamConversationMemory(params) {
4702
- const { enhancedOptions, providerName, originalPrompt, accumulatedContent, startTime, eventSequence } = params;
4826
+ const { enhancedOptions, providerName, originalPrompt, accumulatedContent, startTime, eventSequence, } = params;
4703
4827
  // Guard: skip storing if no meaningful content was produced (no text AND no tool activity)
4704
4828
  const hasToolEvents = eventSequence.some((e) => e.type === "tool:start" || e.type === "tool:end");
4705
4829
  if (!accumulatedContent.trim() && !hasToolEvents) {
4706
4830
  logger.warn("[NeuroLink.stream] Skipping conversation turn storage — no text content or tool activity", {
4707
- sessionId: enhancedOptions.context?.sessionId,
4831
+ sessionId: enhancedOptions.context
4832
+ ?.sessionId,
4708
4833
  });
4709
4834
  return;
4710
4835
  }
4711
4836
  // Store memory after stream consumption is complete
4712
4837
  if (this.conversationMemory && enhancedOptions.context?.sessionId) {
4713
- const sessionId = enhancedOptions.context?.sessionId;
4714
- const userId = enhancedOptions.context?.userId;
4838
+ const sessionId = enhancedOptions.context
4839
+ ?.sessionId;
4840
+ const userId = enhancedOptions.context
4841
+ ?.userId;
4715
4842
  let providerDetails;
4716
4843
  if (enhancedOptions.model) {
4717
4844
  providerDetails = {
@@ -4730,7 +4857,8 @@ Current user's request: ${currentInput}`;
4730
4857
  providerDetails,
4731
4858
  enableSummarization: enhancedOptions.enableSummarization,
4732
4859
  events: eventSequence.length > 0 ? eventSequence : undefined,
4733
- requestId: enhancedOptions.context?.requestId,
4860
+ requestId: enhancedOptions.context
4861
+ ?.requestId,
4734
4862
  });
4735
4863
  this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "stream" }, Date.now() - memStoreStart, SpanStatus.OK);
4736
4864
  logger.debug("[NeuroLink.stream] Stored conversation turn with events", {
@@ -4760,7 +4888,8 @@ Current user's request: ${currentInput}`;
4760
4888
  validationStartTimeNs: validationStartTime.toString(),
4761
4889
  message: "Starting comprehensive input validation process",
4762
4890
  });
4763
- const hasText = typeof options?.input?.text === "string" && options.input.text.trim().length > 0;
4891
+ const hasText = typeof options?.input?.text === "string" &&
4892
+ options.input.text.trim().length > 0;
4764
4893
  // Accept audio when frames are present; sampleRateHz is optional (defaults applied later)
4765
4894
  const hasAudio = !!(options?.input?.audio &&
4766
4895
  options.input.audio.frames &&
@@ -4839,10 +4968,12 @@ Current user's request: ${currentInput}`;
4839
4968
  const streamCompactionSessionId = this.getCompactionSessionId(options);
4840
4969
  if (streamBudget.shouldCompact &&
4841
4970
  (hasCallerConversationHistory || this.conversationMemory) &&
4842
- streamMessageCount > (this.lastCompactionMessageCount.get(streamCompactionSessionId) ?? 0)) {
4971
+ streamMessageCount >
4972
+ (this.lastCompactionMessageCount.get(streamCompactionSessionId) ?? 0)) {
4843
4973
  const compactor = new ContextCompactor({
4844
4974
  provider: providerName,
4845
- summarizationProvider: this.conversationMemoryConfig?.conversationMemory?.summarizationProvider,
4975
+ summarizationProvider: this.conversationMemoryConfig?.conversationMemory
4976
+ ?.summarizationProvider,
4846
4977
  summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
4847
4978
  });
4848
4979
  const compactionResult = await compactor.compact(conversationMessages, streamBudget.availableInputTokens, this.conversationMemoryConfig?.conversationMemory, options.context?.requestId);
@@ -4987,7 +5118,8 @@ Current user's request: ${currentInput}`;
4987
5118
  parentSpanId: traceCtx?.parentSpanId,
4988
5119
  });
4989
5120
  failedSpan = SpanSerializer.endSpan(failedSpan, SpanStatus.ERROR);
4990
- failedSpan.statusMessage = error instanceof Error ? error.message : String(error);
5121
+ failedSpan.statusMessage =
5122
+ error instanceof Error ? error.message : String(error);
4991
5123
  failedSpan.durationMs = Date.now() - startTime;
4992
5124
  this.metricsAggregator.recordSpan(failedSpan);
4993
5125
  getMetricsAggregator().recordSpan(failedSpan);
@@ -5011,7 +5143,9 @@ Current user's request: ${currentInput}`;
5011
5143
  const fallbackProcessedStream = (async function* (self) {
5012
5144
  try {
5013
5145
  for await (const chunk of fallbackStreamResult.stream) {
5014
- if (chunk && "content" in chunk && typeof chunk.content === "string") {
5146
+ if (chunk &&
5147
+ "content" in chunk &&
5148
+ typeof chunk.content === "string") {
5015
5149
  fallbackAccumulatedContent += chunk.content;
5016
5150
  // Emit chunk event
5017
5151
  self.emitter.emit("response:chunk", chunk.content);
@@ -5030,9 +5164,12 @@ Current user's request: ${currentInput}`;
5030
5164
  }
5031
5165
  // Store memory after fallback stream consumption is complete
5032
5166
  // Guard: skip storing if fallback accumulated content is empty
5033
- if (self.conversationMemory && enhancedOptions?.context?.sessionId && fallbackAccumulatedContent.trim()) {
5167
+ if (self.conversationMemory &&
5168
+ enhancedOptions?.context?.sessionId &&
5169
+ fallbackAccumulatedContent.trim()) {
5034
5170
  const sessionId = enhancedOptions?.context?.sessionId;
5035
- const userId = enhancedOptions?.context?.userId;
5171
+ const userId = enhancedOptions?.context
5172
+ ?.userId;
5036
5173
  let providerDetails;
5037
5174
  if (options.model) {
5038
5175
  providerDetails = {
@@ -5051,7 +5188,8 @@ Current user's request: ${currentInput}`;
5051
5188
  providerDetails,
5052
5189
  enableSummarization: enhancedOptions?.enableSummarization,
5053
5190
  requestId: enhancedOptions?.context?.requestId ||
5054
- options.context?.requestId,
5191
+ options.context
5192
+ ?.requestId,
5055
5193
  });
5056
5194
  self.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "fallback-stream" }, Date.now() - memStoreStart, SpanStatus.OK);
5057
5195
  }
@@ -5287,12 +5425,11 @@ Current user's request: ${currentInput}`;
5287
5425
  this.activeToolExecutions.set(executionId, context);
5288
5426
  this.currentStreamToolExecutions.push(context);
5289
5427
  // Emit event (NeuroLinkEvents format for compatibility)
5290
- this.emitter.emit("tool:start", {
5291
- tool: toolName,
5428
+ this.emitter.emit("tool:start", createToolEventPayload(toolName, {
5292
5429
  input,
5293
5430
  timestamp: startTime,
5294
5431
  executionId,
5295
- });
5432
+ }));
5296
5433
  logger.debug(`tool:start emitted for ${toolName}`, {
5297
5434
  toolName,
5298
5435
  executionId,
@@ -5350,14 +5487,15 @@ Current user's request: ${currentInput}`;
5350
5487
  // Store in history
5351
5488
  this.toolExecutionHistory.push(summary);
5352
5489
  // Emit event (NeuroLinkEvents format for compatibility)
5353
- this.emitter.emit("tool:end", {
5354
- tool: toolName,
5490
+ this.emitter.emit("tool:end", createToolEventPayload(toolName, {
5355
5491
  result,
5356
5492
  error,
5493
+ success,
5494
+ responseTime: duration,
5357
5495
  timestamp: endTime,
5358
5496
  duration,
5359
5497
  executionId: finalExecutionId,
5360
- });
5498
+ }));
5361
5499
  logger.debug(`tool:end emitted for ${toolName}`, {
5362
5500
  toolName,
5363
5501
  executionId: finalExecutionId,
@@ -5460,7 +5598,9 @@ Current user's request: ${currentInput}`;
5460
5598
  // Compose with any parent abortSignal from ToolExecutionOptions
5461
5599
  const execOptions = args[1];
5462
5600
  const parentSignal = execOptions?.abortSignal;
5463
- const composedSignal = parentSignal ? AbortSignal.any([parentSignal, timeoutSignal]) : timeoutSignal;
5601
+ const composedSignal = parentSignal
5602
+ ? AbortSignal.any([parentSignal, timeoutSignal])
5603
+ : timeoutSignal;
5464
5604
  // Replace the abortSignal in execution options
5465
5605
  const augmentedContext = {
5466
5606
  ...execOptions,
@@ -5517,7 +5657,9 @@ Current user's request: ${currentInput}`;
5517
5657
  * @returns Current context or undefined if not set
5518
5658
  */
5519
5659
  getToolContext() {
5520
- return this.toolExecutionContext ? { ...this.toolExecutionContext } : undefined;
5660
+ return this.toolExecutionContext
5661
+ ? { ...this.toolExecutionContext }
5662
+ : undefined;
5521
5663
  }
5522
5664
  /**
5523
5665
  * Clear the tool execution context
@@ -5621,7 +5763,8 @@ Current user's request: ${currentInput}`;
5621
5763
  typeof this.conversationMemory.updateAgenticLoopReport !== "function") {
5622
5764
  throw new ConversationMemoryError("updateAgenticLoopReport is only supported with Redis conversation memory.", "CONFIG_ERROR");
5623
5765
  }
5624
- await withTimeout(this.conversationMemory.updateAgenticLoopReport(sessionId, userId, report), 5000);
5766
+ await withTimeout(this
5767
+ .conversationMemory.updateAgenticLoopReport(sessionId, userId, report), 5000);
5625
5768
  }
5626
5769
  /**
5627
5770
  * Get all registered custom tools
@@ -5639,10 +5782,14 @@ Current user's request: ${currentInput}`;
5639
5782
  description: tool.description,
5640
5783
  hasParameters: !!tool.parameters,
5641
5784
  parametersType: typeof tool.parameters,
5642
- parametersKeys: tool.parameters && typeof tool.parameters === "object" ? Object.keys(tool.parameters) : "NOT_OBJECT",
5785
+ parametersKeys: tool.parameters && typeof tool.parameters === "object"
5786
+ ? Object.keys(tool.parameters)
5787
+ : "NOT_OBJECT",
5643
5788
  hasInputSchema: !!tool.inputSchema,
5644
5789
  inputSchemaType: typeof tool.inputSchema,
5645
- inputSchemaKeys: tool.inputSchema && typeof tool.inputSchema === "object" ? Object.keys(tool.inputSchema) : "NOT_OBJECT",
5790
+ inputSchemaKeys: tool.inputSchema && typeof tool.inputSchema === "object"
5791
+ ? Object.keys(tool.inputSchema)
5792
+ : "NOT_OBJECT",
5646
5793
  hasEffectiveSchema: !!effectiveSchema,
5647
5794
  effectiveSchemaType: typeof effectiveSchema,
5648
5795
  effectiveSchemaHasProperties: !!effectiveSchema?.properties,
@@ -5663,14 +5810,18 @@ Current user's request: ${currentInput}`;
5663
5810
  execute: async (params, context) => {
5664
5811
  // CONTEXT MERGING: Combine all available contexts for maximum information
5665
5812
  const storedContext = this.toolExecutionContext || {};
5666
- const runtimeContext = context && isNonNullObject(context) ? context : {};
5813
+ const runtimeContext = context && isNonNullObject(context)
5814
+ ? context
5815
+ : {};
5667
5816
  // Merge contexts with runtime context taking precedence
5668
5817
  // This ensures we have the richest possible context for tool execution
5669
5818
  const executionContext = {
5670
5819
  ...storedContext, // Base context from setToolContext (session, tokens, etc.)
5671
5820
  ...runtimeContext, // Runtime context from AI model (if any)
5672
5821
  // Ensure we always have at least a sessionId for tracing
5673
- sessionId: runtimeContext.sessionId || storedContext.sessionId || `fallback-${Date.now()}`,
5822
+ sessionId: runtimeContext.sessionId ||
5823
+ storedContext.sessionId ||
5824
+ `fallback-${Date.now()}`,
5674
5825
  };
5675
5826
  // Enhanced logging for context debugging
5676
5827
  logger.debug("Tool execution context merged", {
@@ -5678,7 +5829,8 @@ Current user's request: ${currentInput}`;
5678
5829
  storedContextKeys: Object.keys(storedContext),
5679
5830
  runtimeContextKeys: Object.keys(runtimeContext),
5680
5831
  finalContextKeys: Object.keys(executionContext),
5681
- hasJuspayToken: !!executionContext.juspayToken,
5832
+ hasJuspayToken: !!executionContext
5833
+ .juspayToken,
5682
5834
  hasShopId: !!executionContext.shopId,
5683
5835
  sessionId: executionContext.sessionId,
5684
5836
  });
@@ -5706,7 +5858,9 @@ Current user's request: ${currentInput}`;
5706
5858
  toolMap.set(toolName, {
5707
5859
  name: toolName,
5708
5860
  description: toolDef.description || `File tool: ${toolName}`,
5709
- inputSchema: typeof toolParams === "object" && toolParams !== null ? toolParams : { type: "object", properties: {} },
5861
+ inputSchema: typeof toolParams === "object" && toolParams !== null
5862
+ ? toolParams
5863
+ : { type: "object", properties: {} },
5710
5864
  execute: async (params) => {
5711
5865
  return await toolDef.execute(params, {
5712
5866
  toolCallId: `file-tool-${Date.now()}`,
@@ -5802,361 +5956,345 @@ Current user's request: ${currentInput}`;
5802
5956
  * @returns Tool execution result
5803
5957
  */
5804
5958
  async executeTool(toolName, params = {}, options) {
5805
- const functionTag = "NeuroLink.executeTool";
5806
- const executionStartTime = Date.now();
5807
- // === MCP ENHANCEMENT: RequestBatcher — batch programmatic tool calls ===
5808
- // LIMITATION: When the request batcher is enabled, per-tool timeout and retry
5809
- // settings (from registration options or call-site options) are NOT applied.
5810
- // The batcher uses its own hardcoded defaults for timeout and retry behavior.
5811
- // Use `bypassBatcher: true` to ensure per-tool timeout/retry is respected.
5812
- // Additionally, note that executeToolInternal's safe-tool retry logic may still
5813
- // trigger even when maxRetries is set to 0, since it operates independently.
5814
5959
  if (this.mcpToolBatcher && !options?.bypassBatcher) {
5815
5960
  return this.mcpToolBatcher.execute(toolName, params);
5816
5961
  }
5817
- // Determine tool type for span attributes
5818
- const externalTools = this.externalServerManager.getAllTools();
5819
- const externalTool = externalTools.find((tool) => tool.name === toolName);
5820
- const toolType = externalTool ? "mcp" : this.getCustomTools().has(toolName) ? "custom" : "external";
5821
- // Compute truncated input size for the span
5822
- const inputStr = typeof params === "string" ? params : params ? JSON.stringify(params) : "";
5823
- const inputSize = inputStr.length;
5824
- const truncatedInput = inputStr.length > 2048 ? inputStr.substring(0, 2048) : inputStr;
5962
+ const executionContext = this.createToolExecutionContext(toolName, params, options);
5825
5963
  return tracers.mcp.startActiveSpan("neurolink.tool.execute", {
5826
5964
  attributes: {
5827
5965
  "tool.name": toolName,
5828
- "tool.type": toolType,
5829
- "tool.input_size": inputSize,
5830
- "tool.input_preview": truncatedInput,
5966
+ "tool.type": executionContext.toolType,
5967
+ "tool.input_size": executionContext.inputSize,
5968
+ "tool.input_preview": executionContext.truncatedInput,
5831
5969
  },
5832
- }, async (toolSpan) => {
5833
- try {
5834
- // Debug: Log tool execution attempt
5835
- logger.debug(`[${functionTag}] Tool execution requested:`, {
5836
- toolName,
5837
- params: isNonNullObject(params) ? transformParamsForLogging(params) : params,
5838
- hasExternalManager: !!this.externalServerManager,
5839
- });
5840
- // 🔧 PARAMETER TRACE: Log tool execution details for debugging
5841
- logger.debug(`Tool execution detailed analysis`, {
5842
- toolName,
5843
- executionStartTime,
5844
- paramsAnalysis: {
5845
- type: typeof params,
5846
- isNull: params === null,
5847
- isUndefined: params === undefined,
5848
- isEmpty: params && typeof params === "object" && Object.keys(params).length === 0,
5849
- keys: params && typeof params === "object" ? Object.keys(params) : "NOT_OBJECT",
5850
- keysLength: params && typeof params === "object" ? Object.keys(params).length : 0,
5970
+ }, (toolSpan) => this.executeToolWithSpan(toolName, params, options, executionContext, toolSpan));
5971
+ }
5972
+ createToolExecutionContext(toolName, params, options) {
5973
+ const externalTool = this.externalServerManager
5974
+ .getAllTools()
5975
+ .find((tool) => tool.name === toolName);
5976
+ const toolType = externalTool
5977
+ ? "mcp"
5978
+ : this.getCustomTools().has(toolName)
5979
+ ? "custom"
5980
+ : "external";
5981
+ const inputStr = typeof params === "string"
5982
+ ? params
5983
+ : params
5984
+ ? JSON.stringify(params)
5985
+ : "";
5986
+ return {
5987
+ functionTag: "NeuroLink.executeTool",
5988
+ executionStartTime: Date.now(),
5989
+ externalTool,
5990
+ toolType,
5991
+ inputSize: inputStr.length,
5992
+ truncatedInput: inputStr.length > 2048 ? inputStr.substring(0, 2048) : inputStr,
5993
+ options,
5994
+ };
5995
+ }
5996
+ async executeToolWithSpan(toolName, params, options, executionContext, toolSpan) {
5997
+ try {
5998
+ const prepared = await this.prepareToolExecutionState(toolName, params, options, executionContext);
5999
+ return await this.runPreparedToolExecution(toolName, params, prepared, executionContext, toolSpan);
6000
+ }
6001
+ catch (outerError) {
6002
+ if (!(outerError instanceof NeuroLinkError)) {
6003
+ const errMsg = outerError instanceof Error ? outerError.message : String(outerError);
6004
+ toolSpan.recordException(outerError instanceof Error ? outerError : new Error(errMsg));
6005
+ toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
6006
+ }
6007
+ throw outerError;
6008
+ }
6009
+ finally {
6010
+ toolSpan.end();
6011
+ }
6012
+ }
6013
+ async prepareToolExecutionState(toolName, params, options, executionContext) {
6014
+ logger.debug(`[${executionContext.functionTag}] Tool execution requested:`, {
6015
+ toolName,
6016
+ params: isNonNullObject(params)
6017
+ ? transformParamsForLogging(params)
6018
+ : params,
6019
+ hasExternalManager: !!this.externalServerManager,
6020
+ });
6021
+ logger.debug(`Tool execution detailed analysis`, {
6022
+ toolName,
6023
+ executionStartTime: executionContext.executionStartTime,
6024
+ paramsAnalysis: {
6025
+ type: typeof params,
6026
+ isNull: params === null,
6027
+ isUndefined: params === undefined,
6028
+ isEmpty: params &&
6029
+ typeof params === "object" &&
6030
+ Object.keys(params).length === 0,
6031
+ keys: params && typeof params === "object"
6032
+ ? Object.keys(params)
6033
+ : "NOT_OBJECT",
6034
+ keysLength: params && typeof params === "object"
6035
+ ? Object.keys(params).length
6036
+ : 0,
6037
+ },
6038
+ isTargetTool: toolName === "juspay-analytics_SuccessRateSRByTime",
6039
+ options,
6040
+ hasExternalManager: !!this.externalServerManager,
6041
+ });
6042
+ this.emitter.emit("tool:start", createToolEventPayload(toolName, {
6043
+ timestamp: executionContext.executionStartTime,
6044
+ input: params,
6045
+ }));
6046
+ const toolInfo = this.toolRegistry.getToolInfo(toolName);
6047
+ const finalOptions = {
6048
+ timeout: options?.timeout ??
6049
+ toolInfo?.tool?.timeoutMs ??
6050
+ TOOL_TIMEOUTS.EXECUTION_DEFAULT_MS,
6051
+ maxRetries: options?.maxRetries ??
6052
+ toolInfo?.tool?.maxRetries ??
6053
+ RETRY_ATTEMPTS.DEFAULT,
6054
+ retryDelayMs: options?.retryDelayMs || RETRY_DELAYS.BASE_MS,
6055
+ authContext: options?.authContext,
6056
+ disableToolCache: options?.disableToolCache,
6057
+ };
6058
+ const { MemoryManager } = await import("./utils/performance.js");
6059
+ const startMemory = MemoryManager.getMemoryUsageMB();
6060
+ const breakerServerId = executionContext.externalTool?.serverId ||
6061
+ toolInfo?.tool?.serverId ||
6062
+ "unknown";
6063
+ const breakerKey = `${breakerServerId}.${toolName}`;
6064
+ let circuitBreaker = this.toolCircuitBreakers.get(breakerKey);
6065
+ if (!circuitBreaker) {
6066
+ circuitBreaker = new CircuitBreaker(CIRCUIT_BREAKER.FAILURE_THRESHOLD, CIRCUIT_BREAKER_RESET_MS);
6067
+ this.toolCircuitBreakers.set(breakerKey, circuitBreaker);
6068
+ }
6069
+ let metrics = this.toolExecutionMetrics.get(toolName);
6070
+ if (!metrics) {
6071
+ metrics = {
6072
+ totalExecutions: 0,
6073
+ successfulExecutions: 0,
6074
+ failedExecutions: 0,
6075
+ averageExecutionTime: 0,
6076
+ lastExecutionTime: 0,
6077
+ errorCategories: {},
6078
+ };
6079
+ this.toolExecutionMetrics.set(toolName, metrics);
6080
+ }
6081
+ metrics.totalExecutions++;
6082
+ return {
6083
+ finalOptions,
6084
+ startMemory,
6085
+ circuitBreaker,
6086
+ breakerKey,
6087
+ metrics,
6088
+ };
6089
+ }
6090
+ async runPreparedToolExecution(toolName, params, prepared, executionContext, toolSpan) {
6091
+ try {
6092
+ mcpLogger.debug(`[${executionContext.functionTag}] Executing tool: ${toolName}`, {
6093
+ toolName,
6094
+ params,
6095
+ options: prepared.finalOptions,
6096
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6097
+ });
6098
+ const result = await prepared.circuitBreaker.execute(async () => {
6099
+ return withRetry(async () => withTimeout(this.executeToolInternal(toolName, params, prepared.finalOptions), prepared.finalOptions.timeout, ErrorFactory.toolTimeout(toolName, prepared.finalOptions.timeout)), {
6100
+ maxAttempts: prepared.finalOptions.maxRetries + 1,
6101
+ delayMs: prepared.finalOptions.retryDelayMs,
6102
+ isRetriable: isRetriableError,
6103
+ onRetry: (attempt, error) => {
6104
+ mcpLogger.warn(`[${executionContext.functionTag}] Retrying tool execution (attempt ${attempt})`, {
6105
+ toolName,
6106
+ error: error.message,
6107
+ attempt,
6108
+ });
5851
6109
  },
5852
- isTargetTool: toolName === "juspay-analytics_SuccessRateSRByTime",
5853
- options,
5854
- hasExternalManager: !!this.externalServerManager,
5855
6110
  });
5856
- // Emit tool start event (NeuroLink format - keep existing)
5857
- this.emitter.emit("tool:start", {
5858
- toolName,
5859
- timestamp: executionStartTime,
5860
- input: params, // Enhanced: add input parameters
6111
+ });
6112
+ return await this.handleSuccessfulToolExecution(toolName, result, prepared, executionContext, toolSpan);
6113
+ }
6114
+ catch (error) {
6115
+ return this.handleFailedToolExecution(toolName, params, error, prepared, executionContext, toolSpan);
6116
+ }
6117
+ }
6118
+ async handleSuccessfulToolExecution(toolName, result, prepared, executionContext, toolSpan) {
6119
+ const executionTime = Date.now() - executionContext.executionStartTime;
6120
+ prepared.metrics.successfulExecutions++;
6121
+ prepared.metrics.lastExecutionTime = executionTime;
6122
+ prepared.metrics.averageExecutionTime =
6123
+ (prepared.metrics.averageExecutionTime *
6124
+ (prepared.metrics.successfulExecutions - 1) +
6125
+ executionTime) /
6126
+ prepared.metrics.successfulExecutions;
6127
+ const { MemoryManager } = await import("./utils/performance.js");
6128
+ const endMemory = MemoryManager.getMemoryUsageMB();
6129
+ const memoryDelta = endMemory.heapUsed - prepared.startMemory.heapUsed;
6130
+ if (memoryDelta > 20) {
6131
+ mcpLogger.warn(`Tool '${toolName}' used excessive memory: ${memoryDelta}MB`, {
6132
+ toolName,
6133
+ memoryDelta,
6134
+ executionTime,
6135
+ });
6136
+ }
6137
+ mcpLogger.debug(`[${executionContext.functionTag}] Tool executed successfully`, {
6138
+ toolName,
6139
+ executionTime,
6140
+ memoryDelta,
6141
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6142
+ });
6143
+ const resultObj = result && typeof result === "object"
6144
+ ? result
6145
+ : undefined;
6146
+ const isToolError = (resultObj && "isError" in resultObj && resultObj.isError === true) ||
6147
+ (resultObj && "success" in resultObj && resultObj.success === false);
6148
+ if (isToolError) {
6149
+ try {
6150
+ await prepared.circuitBreaker.execute(async () => {
6151
+ throw new Error(`Tool ${toolName} returned isError:true`);
5861
6152
  });
5862
- // NL-004: Use composite key (serverId.toolName) to avoid cross-server collisions
5863
- // Fetch toolInfo early so per-tool timeout is available for finalOptions
5864
- const toolInfo = this.toolRegistry.getToolInfo(toolName);
5865
- // Set default options — per-tool values from registration take precedence over global defaults.
5866
- // When not explicitly set at registration, global defaults are preserved for backward compatibility.
5867
- const registeredTimeout = toolInfo?.tool?.timeoutMs;
5868
- const registeredMaxRetries = toolInfo?.tool?.maxRetries;
5869
- const finalOptions = {
5870
- timeout: options?.timeout ?? registeredTimeout ?? TOOL_TIMEOUTS.EXECUTION_DEFAULT_MS,
5871
- maxRetries: options?.maxRetries ?? registeredMaxRetries ?? RETRY_ATTEMPTS.DEFAULT,
5872
- retryDelayMs: options?.retryDelayMs || RETRY_DELAYS.BASE_MS,
5873
- authContext: options?.authContext,
5874
- disableToolCache: options?.disableToolCache,
5875
- };
5876
- // Track memory usage for tool execution
5877
- const { MemoryManager } = await import("./utils/performance.js");
5878
- const startMemory = MemoryManager.getMemoryUsageMB();
5879
- const breakerServerId = externalTool?.serverId || toolInfo?.tool?.serverId || "unknown";
5880
- const breakerKey = `${breakerServerId}.${toolName}`;
5881
- // Get or create circuit breaker for this tool
5882
- if (!this.toolCircuitBreakers.has(breakerKey)) {
5883
- this.toolCircuitBreakers.set(breakerKey, new CircuitBreaker(CIRCUIT_BREAKER.FAILURE_THRESHOLD, CIRCUIT_BREAKER_RESET_MS));
5884
- }
5885
- const circuitBreaker = this.toolCircuitBreakers.get(breakerKey);
5886
- // Initialize metrics for this tool if not exists
5887
- if (!this.toolExecutionMetrics.has(toolName)) {
5888
- this.toolExecutionMetrics.set(toolName, {
5889
- totalExecutions: 0,
5890
- successfulExecutions: 0,
5891
- failedExecutions: 0,
5892
- averageExecutionTime: 0,
5893
- lastExecutionTime: 0,
5894
- errorCategories: {},
5895
- });
5896
- }
5897
- const metrics = this.toolExecutionMetrics.get(toolName);
5898
- if (metrics) {
5899
- metrics.totalExecutions++;
5900
- }
5901
- try {
5902
- mcpLogger.debug(`[${functionTag}] Executing tool: ${toolName}`, {
5903
- toolName,
5904
- params,
5905
- options: finalOptions,
5906
- circuitBreakerState: circuitBreaker?.getState(),
5907
- });
5908
- // Execute with circuit breaker, timeout, and retry logic
5909
- if (!circuitBreaker) {
5910
- throw new Error(`Circuit breaker not initialized for tool: ${toolName}`);
5911
- }
5912
- const result = await circuitBreaker.execute(async () => {
5913
- return await withRetry(async () => {
5914
- return await withTimeout(this.executeToolInternal(toolName, params, finalOptions), finalOptions.timeout, ErrorFactory.toolTimeout(toolName, finalOptions.timeout));
5915
- }, {
5916
- maxAttempts: finalOptions.maxRetries + 1, // +1 for initial attempt
5917
- delayMs: finalOptions.retryDelayMs,
5918
- isRetriable: isRetriableError,
5919
- onRetry: (attempt, error) => {
5920
- mcpLogger.warn(`[${functionTag}] Retrying tool execution (attempt ${attempt})`, {
5921
- toolName,
5922
- error: error.message,
5923
- attempt,
5924
- });
5925
- },
5926
- });
5927
- });
5928
- // Update success metrics
5929
- const executionTime = Date.now() - executionStartTime;
5930
- if (metrics) {
5931
- metrics.successfulExecutions++;
5932
- metrics.lastExecutionTime = executionTime;
5933
- metrics.averageExecutionTime =
5934
- (metrics.averageExecutionTime * (metrics.successfulExecutions - 1) + executionTime) /
5935
- metrics.successfulExecutions;
5936
- }
5937
- // Track memory usage
5938
- const endMemory = MemoryManager.getMemoryUsageMB();
5939
- const memoryDelta = endMemory.heapUsed - startMemory.heapUsed;
5940
- if (memoryDelta > 20) {
5941
- mcpLogger.warn(`Tool '${toolName}' used excessive memory: ${memoryDelta}MB`, {
5942
- toolName,
5943
- memoryDelta,
5944
- executionTime,
5945
- });
5946
- }
5947
- mcpLogger.debug(`[${functionTag}] Tool executed successfully`, {
5948
- toolName,
5949
- executionTime,
5950
- memoryDelta,
5951
- circuitBreakerState: circuitBreaker?.getState(),
5952
- });
5953
- // Set span success attributes
5954
- // Check if result has isError flag (MCP tool error result)
5955
- // Also detect toolRegistry-wrapped errors that return { success: false }
5956
- const resultObj = result && typeof result === "object" ? result : undefined;
5957
- const isToolError = (resultObj && "isError" in resultObj && resultObj.isError === true) ||
5958
- (resultObj && "success" in resultObj && resultObj.success === false);
5959
- // NL-001: Count isError:true results as circuit breaker failures
5960
- // This ensures tools that return error results (not just thrown errors) are tracked
5961
- // TODO(NL-009): This records a failure AFTER the circuit breaker already recorded
5962
- // success inside `circuitBreaker.execute()`. The correct fix is to check `isToolError`
5963
- // inside the execute callback and throw before returning, so the breaker never sees
5964
- // success. Deferred because moving the check inside the callback requires restructuring
5965
- // the retry/timeout wrapper chain and is high-risk for a hot-path change.
5966
- if (isToolError && circuitBreaker) {
5967
- // Record a failure by executing a rejected promise through the breaker
5968
- try {
5969
- await circuitBreaker.execute(async () => {
5970
- throw new Error(`Tool ${toolName} returned isError:true`);
5971
- });
5972
- }
5973
- catch {
5974
- // Expected — we intentionally triggered the failure recording
5975
- }
5976
- mcpLogger.debug(`[${functionTag}] Circuit breaker failure recorded for isError result`, {
5977
- toolName,
5978
- circuitBreakerState: circuitBreaker.getState(),
5979
- circuitBreakerFailures: circuitBreaker.getFailureCount(),
5980
- });
5981
- }
5982
- // NL-002 + NL-003: Format and capture MCP error results
5983
- if (isToolError) {
5984
- const resultObj = result;
5985
- const contentArr = resultObj.content;
5986
- const errorText = contentArr
5987
- ?.filter((c) => c.type === "text" && c.text)
5988
- .map((c) => c.text)
5989
- .join(" ") || (typeof resultObj.error === "string" ? resultObj.error : "Unknown error");
5990
- const errorCategory = classifyMcpErrorMessage(errorText);
5991
- const prefix = `[TOOL_ERROR: ${toolName} failed (${errorCategory})] `;
5992
- // NL-002: Clone content array to avoid mutating shared objects, then prefix error
5993
- if (contentArr && Array.isArray(contentArr)) {
5994
- const clonedContent = contentArr.map((c) => ({ ...c }));
5995
- for (const content of clonedContent) {
5996
- if (content.type === "text" && content.text) {
5997
- content.text = prefix + content.text;
5998
- break; // Only prefix the first text content
5999
- }
6000
- }
6001
- resultObj.content = clonedContent;
6002
- }
6003
- // NL-003: Capture error details in span attributes for telemetry
6004
- toolSpan.setAttribute("tool.error.message", errorText.substring(0, 500));
6005
- toolSpan.setAttribute("tool.error.category", errorCategory);
6006
- toolSpan.setStatus({
6007
- code: SpanStatusCode.ERROR,
6008
- message: `MCP tool returned isError: ${errorText.substring(0, 200)}`,
6009
- });
6010
- if (metrics) {
6011
- metrics.failedExecutions++;
6012
- const prevSuccessful = metrics.successfulExecutions;
6013
- metrics.successfulExecutions = Math.max(0, metrics.successfulExecutions - 1);
6014
- // Recompute averageExecutionTime: back out this execution's duration
6015
- // which was incorrectly included as a success
6016
- if (prevSuccessful > 1) {
6017
- metrics.averageExecutionTime =
6018
- (metrics.averageExecutionTime * prevSuccessful - executionTime) / (prevSuccessful - 1);
6019
- }
6020
- else {
6021
- // No remaining successful executions, reset to 0
6022
- metrics.averageExecutionTime = 0;
6023
- }
6024
- const mappedCategory = mcpCategoryToErrorCategory(errorCategory);
6025
- metrics.errorCategories[mappedCategory] = (metrics.errorCategories[mappedCategory] || 0) + 1;
6026
- }
6027
- }
6028
- // Emit tool end event AFTER isError check so success flag is correct
6029
- this.emitToolEndEvent(toolName, executionStartTime, !isToolError, result);
6030
- toolSpan.setAttribute("tool.result.status", isToolError ? "error" : "success");
6031
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6032
- return result;
6033
- }
6034
- catch (error) {
6035
- // Update failure metrics
6036
- if (metrics) {
6037
- metrics.failedExecutions++;
6038
- }
6039
- const executionTime = Date.now() - executionStartTime;
6040
- // Circuit breaker open: return a structured non-retryable isError result
6041
- // so the AI model understands the tool is temporarily unavailable.
6042
- // Log at warn (not error) since this is expected circuit breaker behavior.
6043
- if (error instanceof CircuitBreakerOpenError) {
6044
- mcpLogger.warn(`[${functionTag}] Tool blocked by circuit breaker: ${toolName}`, {
6045
- toolName,
6046
- breakerState: error.breakerState,
6047
- retryAfter: error.retryAfter,
6048
- retryAfterMs: error.retryAfterMs,
6049
- failureCount: error.failureCount,
6050
- executionTime,
6051
- });
6052
- if (metrics) {
6053
- const category = ErrorCategory.EXECUTION;
6054
- metrics.errorCategories[category] = (metrics.errorCategories[category] || 0) + 1;
6055
- }
6056
- // Emit tool end event for circuit breaker open
6057
- this.emitToolEndEvent(toolName, executionStartTime, false, undefined);
6058
- toolSpan.setAttribute("tool.result.status", "circuit_breaker_open");
6059
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6060
- toolSpan.setAttribute("tool.circuit_breaker.state", error.breakerState);
6061
- toolSpan.setAttribute("tool.circuit_breaker.retry_after_ms", error.retryAfterMs);
6062
- toolSpan.setAttribute("tool.circuit_breaker.failure_count", error.failureCount);
6063
- toolSpan.setStatus({
6064
- code: SpanStatusCode.ERROR,
6065
- message: `Circuit breaker open for ${toolName}: ${error.message}`,
6066
- });
6067
- // Return an isError tool result so the AI can inform the user
6068
- // instead of throwing, which would cause a generic retry
6069
- return {
6070
- isError: true,
6071
- content: [
6072
- {
6073
- type: "text",
6074
- text: `TOOL TEMPORARILY UNAVAILABLE: "${toolName}" has been disabled after ` +
6075
- `${error.failureCount} failures. ` +
6076
- `This is a circuit breaker protection — do NOT retry this tool. ` +
6077
- `It will become available again after ${Math.ceil(error.retryAfterMs / 1000)} seconds ` +
6078
- `(at ${error.retryAfter}). ` +
6079
- `Instead, inform the user that the operation failed and suggest trying again later.`,
6080
- },
6081
- ],
6082
- };
6083
- }
6084
- // Create structured error
6085
- let structuredError;
6086
- if (error instanceof NeuroLinkError) {
6087
- structuredError = error;
6088
- }
6089
- else if (error instanceof Error) {
6090
- // Categorize the error based on the message
6091
- if (error.message.includes("timeout")) {
6092
- structuredError = ErrorFactory.toolTimeout(toolName, finalOptions.timeout);
6093
- }
6094
- else if (error.message.includes("not found")) {
6095
- const availableTools = await this.getAllAvailableTools();
6096
- structuredError = ErrorFactory.toolNotFound(toolName, extractToolNames(availableTools.map((t) => ({ name: t.name }))));
6097
- }
6098
- else if (error.message.includes("validation") || error.message.includes("parameter")) {
6099
- structuredError = ErrorFactory.invalidParameters(toolName, error, params);
6100
- }
6101
- else if (error.message.includes("network") || error.message.includes("connection")) {
6102
- structuredError = ErrorFactory.networkError(toolName, error);
6103
- }
6104
- else {
6105
- structuredError = ErrorFactory.toolExecutionFailed(toolName, error);
6106
- }
6107
- }
6108
- else {
6109
- structuredError = ErrorFactory.toolExecutionFailed(toolName, new Error(String(error)));
6110
- }
6111
- if (metrics) {
6112
- const category = structuredError.category || ErrorCategory.EXECUTION;
6113
- metrics.errorCategories[category] = (metrics.errorCategories[category] || 0) + 1;
6153
+ }
6154
+ catch {
6155
+ // Expected intentionally records the failure
6156
+ }
6157
+ mcpLogger.debug(`[${executionContext.functionTag}] Circuit breaker failure recorded for isError result`, {
6158
+ toolName,
6159
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6160
+ circuitBreakerFailures: prepared.circuitBreaker.getFailureCount(),
6161
+ });
6162
+ const contentArr = resultObj?.content;
6163
+ const errorText = contentArr
6164
+ ?.filter((content) => content.type === "text" && content.text)
6165
+ .map((content) => content.text)
6166
+ .join(" ") ||
6167
+ (typeof resultObj?.error === "string"
6168
+ ? resultObj.error
6169
+ : "Unknown error");
6170
+ const errorCategory = classifyMcpErrorMessage(errorText);
6171
+ const prefix = `[TOOL_ERROR: ${toolName} failed (${errorCategory})] `;
6172
+ if (resultObj && Array.isArray(contentArr)) {
6173
+ const clonedContent = contentArr.map((content) => ({ ...content }));
6174
+ for (const content of clonedContent) {
6175
+ if (content.type === "text" && content.text) {
6176
+ content.text = prefix + content.text;
6177
+ break;
6114
6178
  }
6115
- // Emit tool end event BEFORE the error event.
6116
- // Node.js EventEmitter throws on unhandled 'error' events,
6117
- // which would prevent tool:end from being emitted.
6118
- this.emitToolEndEvent(toolName, executionStartTime, false, undefined, structuredError);
6119
- // Centralized error event emission
6120
- this.emitter.emit("error", structuredError);
6121
- // Add execution context to structured error
6122
- structuredError = new NeuroLinkError({
6123
- ...structuredError,
6124
- context: {
6125
- ...structuredError.context,
6126
- executionTime,
6127
- params,
6128
- options: finalOptions,
6129
- circuitBreakerState: circuitBreaker?.getState(),
6130
- circuitBreakerFailures: circuitBreaker?.getFailureCount(),
6131
- metrics: { ...metrics },
6132
- },
6133
- });
6134
- // Log structured error
6135
- logStructuredError(structuredError);
6136
- // Record error on span
6137
- toolSpan.setAttribute("tool.result.status", "error");
6138
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6139
- toolSpan.recordException(structuredError);
6140
- toolSpan.setStatus({
6141
- code: SpanStatusCode.ERROR,
6142
- message: structuredError.message,
6143
- });
6144
- throw structuredError;
6145
6179
  }
6180
+ resultObj.content = clonedContent;
6146
6181
  }
6147
- catch (outerError) {
6148
- // If the error was not already recorded on the span (from inner catch), record it
6149
- if (!(outerError instanceof NeuroLinkError)) {
6150
- const errMsg = outerError instanceof Error ? outerError.message : String(outerError);
6151
- toolSpan.recordException(outerError instanceof Error ? outerError : new Error(errMsg));
6152
- toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
6153
- }
6154
- throw outerError;
6182
+ toolSpan.setAttribute("tool.error.message", errorText.substring(0, 500));
6183
+ toolSpan.setAttribute("tool.error.category", errorCategory);
6184
+ toolSpan.setStatus({
6185
+ code: SpanStatusCode.ERROR,
6186
+ message: `MCP tool returned isError: ${errorText.substring(0, 200)}`,
6187
+ });
6188
+ prepared.metrics.failedExecutions++;
6189
+ const prevSuccessful = prepared.metrics.successfulExecutions;
6190
+ prepared.metrics.successfulExecutions = Math.max(0, prepared.metrics.successfulExecutions - 1);
6191
+ prepared.metrics.averageExecutionTime =
6192
+ prevSuccessful > 1
6193
+ ? (prepared.metrics.averageExecutionTime * prevSuccessful -
6194
+ executionTime) /
6195
+ (prevSuccessful - 1)
6196
+ : 0;
6197
+ const mappedCategory = mcpCategoryToErrorCategory(errorCategory);
6198
+ prepared.metrics.errorCategories[mappedCategory] =
6199
+ (prepared.metrics.errorCategories[mappedCategory] || 0) + 1;
6200
+ }
6201
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, !isToolError, result);
6202
+ toolSpan.setAttribute("tool.result.status", isToolError ? "error" : "success");
6203
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6204
+ return result;
6205
+ }
6206
+ async handleFailedToolExecution(toolName, params, error, prepared, executionContext, toolSpan) {
6207
+ prepared.metrics.failedExecutions++;
6208
+ const executionTime = Date.now() - executionContext.executionStartTime;
6209
+ if (error instanceof CircuitBreakerOpenError) {
6210
+ mcpLogger.warn(`[${executionContext.functionTag}] Tool blocked by circuit breaker: ${toolName}`, {
6211
+ toolName,
6212
+ breakerState: error.breakerState,
6213
+ retryAfter: error.retryAfter,
6214
+ retryAfterMs: error.retryAfterMs,
6215
+ failureCount: error.failureCount,
6216
+ executionTime,
6217
+ });
6218
+ prepared.metrics.errorCategories[ErrorCategory.EXECUTION] =
6219
+ (prepared.metrics.errorCategories[ErrorCategory.EXECUTION] || 0) + 1;
6220
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, false, undefined);
6221
+ toolSpan.setAttribute("tool.result.status", "circuit_breaker_open");
6222
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6223
+ toolSpan.setAttribute("tool.circuit_breaker.state", error.breakerState);
6224
+ toolSpan.setAttribute("tool.circuit_breaker.retry_after_ms", error.retryAfterMs);
6225
+ toolSpan.setAttribute("tool.circuit_breaker.failure_count", error.failureCount);
6226
+ toolSpan.setStatus({
6227
+ code: SpanStatusCode.ERROR,
6228
+ message: `Circuit breaker open for ${toolName}: ${error.message}`,
6229
+ });
6230
+ return {
6231
+ isError: true,
6232
+ content: [
6233
+ {
6234
+ type: "text",
6235
+ text: `TOOL TEMPORARILY UNAVAILABLE: "${toolName}" has been disabled after ` +
6236
+ `${error.failureCount} failures. ` +
6237
+ `This is a circuit breaker protection — do NOT retry this tool. ` +
6238
+ `It will become available again after ${Math.ceil(error.retryAfterMs / 1000)} seconds ` +
6239
+ `(at ${error.retryAfter}). ` +
6240
+ `Instead, inform the user that the operation failed and suggest trying again later.`,
6241
+ },
6242
+ ],
6243
+ };
6244
+ }
6245
+ let structuredError;
6246
+ if (error instanceof NeuroLinkError) {
6247
+ structuredError = error;
6248
+ }
6249
+ else if (error instanceof Error) {
6250
+ if (error.message.includes("timeout")) {
6251
+ structuredError = ErrorFactory.toolTimeout(toolName, prepared.finalOptions.timeout);
6155
6252
  }
6156
- finally {
6157
- toolSpan.end();
6253
+ else if (error.message.includes("not found")) {
6254
+ const availableTools = await this.getAllAvailableTools();
6255
+ structuredError = ErrorFactory.toolNotFound(toolName, extractToolNames(availableTools.map((tool) => ({ name: tool.name }))));
6256
+ }
6257
+ else if (error.message.includes("validation") ||
6258
+ error.message.includes("parameter")) {
6259
+ structuredError = ErrorFactory.invalidParameters(toolName, error, params);
6260
+ }
6261
+ else if (error.message.includes("network") ||
6262
+ error.message.includes("connection")) {
6263
+ structuredError = ErrorFactory.networkError(toolName, error);
6264
+ }
6265
+ else {
6266
+ structuredError = ErrorFactory.toolExecutionFailed(toolName, error);
6158
6267
  }
6268
+ }
6269
+ else {
6270
+ structuredError = ErrorFactory.toolExecutionFailed(toolName, new Error(String(error)));
6271
+ }
6272
+ const category = structuredError.category || ErrorCategory.EXECUTION;
6273
+ prepared.metrics.errorCategories[category] =
6274
+ (prepared.metrics.errorCategories[category] || 0) + 1;
6275
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, false, undefined, structuredError);
6276
+ this.emitter.emit("error", structuredError);
6277
+ structuredError = new NeuroLinkError({
6278
+ ...structuredError,
6279
+ context: {
6280
+ ...structuredError.context,
6281
+ executionTime,
6282
+ params,
6283
+ options: prepared.finalOptions,
6284
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6285
+ circuitBreakerFailures: prepared.circuitBreaker.getFailureCount(),
6286
+ metrics: { ...prepared.metrics },
6287
+ },
6159
6288
  });
6289
+ logStructuredError(structuredError);
6290
+ toolSpan.setAttribute("tool.result.status", "error");
6291
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6292
+ toolSpan.recordException(structuredError);
6293
+ toolSpan.setStatus({
6294
+ code: SpanStatusCode.ERROR,
6295
+ message: structuredError.message,
6296
+ });
6297
+ throw structuredError;
6160
6298
  }
6161
6299
  /**
6162
6300
  * Internal tool execution method with MCP enhancements wired in:
@@ -6234,7 +6372,9 @@ Current user's request: ${currentInput}`;
6234
6372
  inputSchema: {},
6235
6373
  };
6236
6374
  const decision = this.mcpToolRouter.route(mcpTool);
6237
- externalTool = matchingTools.find((t) => t.serverId === decision.serverId) || matchingTools[0];
6375
+ externalTool =
6376
+ matchingTools.find((t) => t.serverId === decision.serverId) ||
6377
+ matchingTools[0];
6238
6378
  logger.debug(`[${functionTag}] Router selected server: ${decision.serverId}`, {
6239
6379
  strategy: decision.strategy,
6240
6380
  confidence: decision.confidence,
@@ -6290,7 +6430,10 @@ Current user's request: ${currentInput}`;
6290
6430
  });
6291
6431
  const result = (await this.toolRegistry.executeTool(toolName, params, context));
6292
6432
  // Check if result indicates a failure and emit error event
6293
- if (result && typeof result === "object" && "success" in result && result.success === false) {
6433
+ if (result &&
6434
+ typeof result === "object" &&
6435
+ "success" in result &&
6436
+ result.success === false) {
6294
6437
  const errorMessage = result.error || "Tool execution failed";
6295
6438
  const errorToEmit = new Error(errorMessage);
6296
6439
  this.emitter.emit("error", errorToEmit);
@@ -6328,7 +6471,10 @@ Current user's request: ${currentInput}`;
6328
6471
  execute: async () => ({}),
6329
6472
  }
6330
6473
  : undefined;
6331
- if (toolStubForRetry && isSafeToRetry(toolStubForRetry) && error instanceof Error && isRetriableError(error)) {
6474
+ if (toolStubForRetry &&
6475
+ isSafeToRetry(toolStubForRetry) &&
6476
+ error instanceof Error &&
6477
+ isRetriableError(error)) {
6332
6478
  logger.debug(`[${functionTag}] Tool ${toolName} is safe to retry, attempting once more`);
6333
6479
  try {
6334
6480
  const retryResult = await executeWithMiddleware(executeCore);
@@ -6373,7 +6519,8 @@ Current user's request: ${currentInput}`;
6373
6519
  }
6374
6520
  async getAllAvailableTools() {
6375
6521
  // Return from cache if available and not stale
6376
- if (this.toolCache && Date.now() - this.toolCache.timestamp < this.toolCacheDuration) {
6522
+ if (this.toolCache &&
6523
+ Date.now() - this.toolCache.timestamp < this.toolCacheDuration) {
6377
6524
  logger.debug("Returning available tools from cache");
6378
6525
  return this.toolCache.tools;
6379
6526
  }
@@ -6454,7 +6601,9 @@ Current user's request: ${currentInput}`;
6454
6601
  if (!allTools.has(tool.name)) {
6455
6602
  const optimizedTool = optimizeToolForCollection(tool, {
6456
6603
  category: detectCategory({
6457
- existingCategory: typeof tool.metadata?.category === "string" ? tool.metadata.category : undefined,
6604
+ existingCategory: typeof tool.metadata?.category === "string"
6605
+ ? tool.metadata.category
6606
+ : undefined,
6458
6607
  isExternal: true,
6459
6608
  serverId: tool.serverId,
6460
6609
  }),
@@ -6610,7 +6759,9 @@ Current user's request: ${currentInput}`;
6610
6759
  status: "failed",
6611
6760
  configured: false,
6612
6761
  authenticated: false,
6613
- error: error instanceof Error ? error.message : "Ollama service not running",
6762
+ error: error instanceof Error
6763
+ ? error.message
6764
+ : "Ollama service not running",
6614
6765
  responseTime: Date.now() - startTime,
6615
6766
  };
6616
6767
  }
@@ -6733,7 +6884,9 @@ Current user's request: ${currentInput}`;
6733
6884
  inMemoryServerInfos.length +
6734
6885
  builtInServerInfos.length +
6735
6886
  autoDiscoveredServerInfos.length;
6736
- const availableServers = externalStats.connectedServers + inMemoryServerInfos.length + builtInServerInfos.length; // in-memory and built-in always available
6887
+ const availableServers = externalStats.connectedServers +
6888
+ inMemoryServerInfos.length +
6889
+ builtInServerInfos.length; // in-memory and built-in always available
6737
6890
  const totalTools = allTools.length + externalStats.totalTools;
6738
6891
  return {
6739
6892
  mcpInitialized: this.mcpInitialized,
@@ -6802,7 +6955,8 @@ Current user's request: ${currentInput}`;
6802
6955
  // Test external MCP servers
6803
6956
  const externalServer = this.externalServerManager.getServer(serverId);
6804
6957
  if (externalServer) {
6805
- return externalServer.status === "connected" && externalServer.client !== null;
6958
+ return (externalServer.status === "connected" &&
6959
+ externalServer.client !== null);
6806
6960
  }
6807
6961
  return false;
6808
6962
  }
@@ -6922,7 +7076,9 @@ Current user's request: ${currentInput}`;
6922
7076
  metrics[toolName] = {
6923
7077
  ...toolMetrics,
6924
7078
  errorCategories: { ...toolMetrics.errorCategories },
6925
- successRate: toolMetrics.totalExecutions > 0 ? toolMetrics.successfulExecutions / toolMetrics.totalExecutions : 0,
7079
+ successRate: toolMetrics.totalExecutions > 0
7080
+ ? toolMetrics.successfulExecutions / toolMetrics.totalExecutions
7081
+ : 0,
6926
7082
  };
6927
7083
  }
6928
7084
  return metrics;
@@ -6942,7 +7098,7 @@ Current user's request: ${currentInput}`;
6942
7098
  */
6943
7099
  getToolCircuitBreakerStatus() {
6944
7100
  const status = {};
6945
- for (const [toolName, circuitBreaker] of this.toolCircuitBreakers.entries()) {
7101
+ for (const [toolName, circuitBreaker,] of this.toolCircuitBreakers.entries()) {
6946
7102
  status[toolName] = {
6947
7103
  state: circuitBreaker.getState(),
6948
7104
  failureCount: circuitBreaker.getFailureCount(),
@@ -6995,7 +7151,8 @@ Current user's request: ${currentInput}`;
6995
7151
  ? metrics.successfulExecutions / metrics.totalExecutions
6996
7152
  : 0
6997
7153
  : 0;
6998
- const isHealthy = (!circuitBreaker || circuitBreaker.getState() === "closed") && successRate >= 0.8;
7154
+ const isHealthy = (!circuitBreaker || circuitBreaker.getState() === "closed") &&
7155
+ successRate >= 0.8;
6999
7156
  if (isHealthy) {
7000
7157
  healthyCount++;
7001
7158
  }
@@ -7036,7 +7193,9 @@ Current user's request: ${currentInput}`;
7036
7193
  successRate,
7037
7194
  averageExecutionTime: metrics?.averageExecutionTime || 0,
7038
7195
  lastExecutionTime: metrics?.lastExecutionTime || 0,
7039
- errorCategories: metrics?.errorCategories ? { ...metrics.errorCategories } : {},
7196
+ errorCategories: metrics?.errorCategories
7197
+ ? { ...metrics.errorCategories }
7198
+ : {},
7040
7199
  },
7041
7200
  circuitBreaker: {
7042
7201
  state: circuitBreaker?.getState() || "closed",
@@ -7188,7 +7347,8 @@ Current user's request: ${currentInput}`;
7188
7347
  */
7189
7348
  async storeToolExecutions(sessionId, userId, toolCalls, toolResults, currentTime) {
7190
7349
  // Check if tools are not empty
7191
- const hasToolData = (toolCalls && toolCalls.length > 0) || (toolResults && toolResults.length > 0);
7350
+ const hasToolData = (toolCalls && toolCalls.length > 0) ||
7351
+ (toolResults && toolResults.length > 0);
7192
7352
  if (!hasToolData) {
7193
7353
  logger.debug("Tool execution storage skipped", {
7194
7354
  hasToolData,
@@ -7198,7 +7358,8 @@ Current user's request: ${currentInput}`;
7198
7358
  return;
7199
7359
  }
7200
7360
  // Type guard to ensure it's Redis conversation memory manager
7201
- const redisMemory = this.conversationMemory;
7361
+ const redisMemory = this
7362
+ .conversationMemory;
7202
7363
  try {
7203
7364
  await redisMemory.storeToolExecution(sessionId, userId, toolCalls, toolResults, currentTime);
7204
7365
  }
@@ -7217,7 +7378,9 @@ Current user's request: ${currentInput}`;
7217
7378
  */
7218
7379
  isToolExecutionStorageAvailable() {
7219
7380
  const isRedisStorage = process.env.STORAGE_TYPE === "redis";
7220
- const hasRedisConversationMemory = this.conversationMemory && this.conversationMemory.constructor.name === "RedisConversationMemoryManager";
7381
+ const hasRedisConversationMemory = this.conversationMemory &&
7382
+ this.conversationMemory.constructor.name ===
7383
+ "RedisConversationMemoryManager";
7221
7384
  return !!(isRedisStorage && hasRedisConversationMemory);
7222
7385
  }
7223
7386
  /**
@@ -7736,7 +7899,8 @@ Current user's request: ${currentInput}`;
7736
7899
  return null;
7737
7900
  }
7738
7901
  // Check for explicit annotations set on the tool first
7739
- const explicitAnnotations = toolInfo.tool.annotations;
7902
+ const explicitAnnotations = toolInfo.tool
7903
+ .annotations;
7740
7904
  // Infer annotations from the tool name/description as fallback
7741
7905
  const inferredAnnotations = inferAnnotations({
7742
7906
  name: toolInfo.tool.name,
@@ -7768,7 +7932,9 @@ Current user's request: ${currentInput}`;
7768
7932
  const result = await this.externalServerManager.executeTool(tool.serverId, tool.name, params, { timeout: 30000 });
7769
7933
  mcpLogger.debug(`[NeuroLink] External MCP tool execution result: ${tool.name}`, {
7770
7934
  success: !!result,
7771
- hasData: !!(result && typeof result === "object" && "content" in result),
7935
+ hasData: !!(result &&
7936
+ typeof result === "object" &&
7937
+ "content" in result),
7772
7938
  });
7773
7939
  return result;
7774
7940
  }
@@ -8184,7 +8350,9 @@ Current user's request: ${currentInput}`;
8184
8350
  logger.debug("[NeuroLink] OpenTelemetry shutdown successfully");
8185
8351
  }
8186
8352
  catch (error) {
8187
- const err = error instanceof Error ? error : new Error(`OpenTelemetry shutdown error: ${String(error)}`);
8353
+ const err = error instanceof Error
8354
+ ? error
8355
+ : new Error(`OpenTelemetry shutdown error: ${String(error)}`);
8188
8356
  cleanupErrors.push(err);
8189
8357
  logger.warn("[NeuroLink] Error shutting down OpenTelemetry:", error);
8190
8358
  }
@@ -8196,7 +8364,9 @@ Current user's request: ${currentInput}`;
8196
8364
  logger.debug("[NeuroLink] External MCP servers shutdown successfully");
8197
8365
  }
8198
8366
  catch (error) {
8199
- const err = error instanceof Error ? error : new Error(`External server shutdown error: ${String(error)}`);
8367
+ const err = error instanceof Error
8368
+ ? error
8369
+ : new Error(`External server shutdown error: ${String(error)}`);
8200
8370
  cleanupErrors.push(err);
8201
8371
  logger.warn("[NeuroLink] Error shutting down external MCP servers:", error);
8202
8372
  }
@@ -8210,7 +8380,9 @@ Current user's request: ${currentInput}`;
8210
8380
  logger.debug("[NeuroLink] Event listeners removed successfully");
8211
8381
  }
8212
8382
  catch (error) {
8213
- const err = error instanceof Error ? error : new Error(`Event emitter cleanup error: ${String(error)}`);
8383
+ const err = error instanceof Error
8384
+ ? error
8385
+ : new Error(`Event emitter cleanup error: ${String(error)}`);
8214
8386
  cleanupErrors.push(err);
8215
8387
  logger.warn("[NeuroLink] Error removing event listeners:", error);
8216
8388
  }
@@ -8223,7 +8395,9 @@ Current user's request: ${currentInput}`;
8223
8395
  logger.debug("[NeuroLink] Circuit breakers cleared successfully");
8224
8396
  }
8225
8397
  catch (error) {
8226
- const err = error instanceof Error ? error : new Error(`Circuit breaker cleanup error: ${String(error)}`);
8398
+ const err = error instanceof Error
8399
+ ? error
8400
+ : new Error(`Circuit breaker cleanup error: ${String(error)}`);
8227
8401
  cleanupErrors.push(err);
8228
8402
  logger.warn("[NeuroLink] Error clearing circuit breakers:", error);
8229
8403
  }
@@ -8260,7 +8434,9 @@ Current user's request: ${currentInput}`;
8260
8434
  logger.debug("[NeuroLink] Maps and caches cleared successfully");
8261
8435
  }
8262
8436
  catch (error) {
8263
- const err = error instanceof Error ? error : new Error(`Cache cleanup error: ${String(error)}`);
8437
+ const err = error instanceof Error
8438
+ ? error
8439
+ : new Error(`Cache cleanup error: ${String(error)}`);
8264
8440
  cleanupErrors.push(err);
8265
8441
  logger.warn("[NeuroLink] Error clearing caches:", error);
8266
8442
  }
@@ -8286,7 +8462,9 @@ Current user's request: ${currentInput}`;
8286
8462
  logger.debug("[NeuroLink] Initialization state reset successfully");
8287
8463
  }
8288
8464
  catch (error) {
8289
- const err = error instanceof Error ? error : new Error(`State reset error: ${String(error)}`);
8465
+ const err = error instanceof Error
8466
+ ? error
8467
+ : new Error(`State reset error: ${String(error)}`);
8290
8468
  cleanupErrors.push(err);
8291
8469
  logger.warn("[NeuroLink] Error resetting state:", error);
8292
8470
  }
@@ -8330,8 +8508,11 @@ Current user's request: ${currentInput}`;
8330
8508
  }
8331
8509
  const compactor = new ContextCompactor({
8332
8510
  ...config,
8333
- summarizationProvider: config?.summarizationProvider ?? this.conversationMemoryConfig?.conversationMemory?.summarizationProvider,
8334
- summarizationModel: config?.summarizationModel ?? this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
8511
+ summarizationProvider: config?.summarizationProvider ??
8512
+ this.conversationMemoryConfig?.conversationMemory
8513
+ ?.summarizationProvider,
8514
+ summarizationModel: config?.summarizationModel ??
8515
+ this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
8335
8516
  });
8336
8517
  // Use actual context window to determine target, not arbitrary heuristic
8337
8518
  const budgetInfo = checkContextBudget({
@@ -8406,7 +8587,8 @@ Current user's request: ${currentInput}`;
8406
8587
  let provider;
8407
8588
  let providerType;
8408
8589
  // Duck-type check: direct MastraAuthProvider instance
8409
- if ("authenticateToken" in config && typeof config.authenticateToken === "function") {
8590
+ if ("authenticateToken" in config &&
8591
+ typeof config.authenticateToken === "function") {
8410
8592
  provider = config;
8411
8593
  providerType = provider.type;
8412
8594
  }
@@ -8450,7 +8632,8 @@ Current user's request: ${currentInput}`;
8450
8632
  }
8451
8633
  finally {
8452
8634
  if (this.authInitPromise &&
8453
- (this.pendingAuthConfig === undefined || this.pendingAuthConfig === pendingAuthConfig)) {
8635
+ (this.pendingAuthConfig === undefined ||
8636
+ this.pendingAuthConfig === pendingAuthConfig)) {
8454
8637
  this.authInitPromise = undefined;
8455
8638
  }
8456
8639
  }