@juspay/neurolink 9.42.0 → 9.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/CHANGELOG.md +2 -0
  2. package/dist/auth/anthropicOAuth.js +12 -0
  3. package/dist/browser/neurolink.min.js +337 -336
  4. package/dist/cli/commands/mcp.d.ts +6 -0
  5. package/dist/cli/commands/mcp.js +188 -184
  6. package/dist/cli/commands/proxy.js +537 -518
  7. package/dist/core/baseProvider.d.ts +6 -1
  8. package/dist/core/baseProvider.js +208 -230
  9. package/dist/core/factory.d.ts +3 -0
  10. package/dist/core/factory.js +138 -188
  11. package/dist/evaluation/pipeline/evaluationPipeline.js +5 -2
  12. package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
  13. package/dist/evaluation/scorers/scorerRegistry.js +353 -282
  14. package/dist/lib/auth/anthropicOAuth.js +12 -0
  15. package/dist/lib/core/baseProvider.d.ts +6 -1
  16. package/dist/lib/core/baseProvider.js +208 -230
  17. package/dist/lib/core/factory.d.ts +3 -0
  18. package/dist/lib/core/factory.js +138 -188
  19. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +5 -2
  20. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
  21. package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
  22. package/dist/lib/mcp/toolRegistry.d.ts +2 -0
  23. package/dist/lib/mcp/toolRegistry.js +32 -31
  24. package/dist/lib/neurolink.d.ts +38 -0
  25. package/dist/lib/neurolink.js +1858 -1689
  26. package/dist/lib/providers/googleAiStudio.js +0 -5
  27. package/dist/lib/providers/googleVertex.d.ts +10 -0
  28. package/dist/lib/providers/googleVertex.js +436 -444
  29. package/dist/lib/providers/litellm.d.ts +1 -0
  30. package/dist/lib/providers/litellm.js +73 -64
  31. package/dist/lib/providers/ollama.js +17 -4
  32. package/dist/lib/providers/openAI.d.ts +2 -0
  33. package/dist/lib/providers/openAI.js +139 -140
  34. package/dist/lib/proxy/claudeFormat.js +12 -4
  35. package/dist/lib/proxy/oauthFetch.js +298 -318
  36. package/dist/lib/proxy/proxyConfig.js +3 -1
  37. package/dist/lib/proxy/proxyFetch.js +250 -222
  38. package/dist/lib/proxy/requestLogger.js +132 -45
  39. package/dist/lib/proxy/sseInterceptor.js +36 -11
  40. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +10 -1
  41. package/dist/lib/server/routes/claudeProxyRoutes.js +2726 -2272
  42. package/dist/lib/services/server/ai/observability/instrumentation.js +194 -218
  43. package/dist/lib/tasks/backends/bullmqBackend.js +24 -18
  44. package/dist/lib/tasks/store/redisTaskStore.js +23 -16
  45. package/dist/lib/tasks/taskManager.d.ts +2 -0
  46. package/dist/lib/tasks/taskManager.js +100 -5
  47. package/dist/lib/telemetry/telemetryService.js +9 -5
  48. package/dist/lib/types/proxyTypes.d.ts +124 -1
  49. package/dist/lib/utils/providerHealth.d.ts +1 -0
  50. package/dist/lib/utils/providerHealth.js +46 -31
  51. package/dist/lib/utils/providerUtils.js +11 -22
  52. package/dist/mcp/toolRegistry.d.ts +2 -0
  53. package/dist/mcp/toolRegistry.js +32 -31
  54. package/dist/neurolink.d.ts +38 -0
  55. package/dist/neurolink.js +1858 -1689
  56. package/dist/providers/googleAiStudio.js +0 -5
  57. package/dist/providers/googleVertex.d.ts +10 -0
  58. package/dist/providers/googleVertex.js +436 -444
  59. package/dist/providers/litellm.d.ts +1 -0
  60. package/dist/providers/litellm.js +73 -64
  61. package/dist/providers/ollama.js +17 -4
  62. package/dist/providers/openAI.d.ts +2 -0
  63. package/dist/providers/openAI.js +139 -140
  64. package/dist/proxy/claudeFormat.js +12 -4
  65. package/dist/proxy/oauthFetch.js +298 -318
  66. package/dist/proxy/proxyConfig.js +3 -1
  67. package/dist/proxy/proxyFetch.js +250 -222
  68. package/dist/proxy/requestLogger.js +132 -45
  69. package/dist/proxy/sseInterceptor.js +36 -11
  70. package/dist/server/routes/claudeProxyRoutes.d.ts +10 -1
  71. package/dist/server/routes/claudeProxyRoutes.js +2726 -2272
  72. package/dist/services/server/ai/observability/instrumentation.js +194 -218
  73. package/dist/tasks/backends/bullmqBackend.js +24 -18
  74. package/dist/tasks/store/redisTaskStore.js +23 -16
  75. package/dist/tasks/taskManager.d.ts +2 -0
  76. package/dist/tasks/taskManager.js +100 -5
  77. package/dist/telemetry/telemetryService.js +9 -5
  78. package/dist/types/proxyTypes.d.ts +124 -1
  79. package/dist/utils/providerHealth.d.ts +1 -0
  80. package/dist/utils/providerHealth.js +46 -31
  81. package/dist/utils/providerUtils.js +12 -22
  82. package/package.json +3 -2
  83. package/scripts/observability/check-proxy-telemetry.mjs +1 -1
  84. package/scripts/observability/manage-local-openobserve.sh +36 -5
@@ -22,7 +22,7 @@ import pLimit from "p-limit";
22
22
  import { ErrorCategory, ErrorSeverity } from "./constants/enums.js";
23
23
  import { CIRCUIT_BREAKER, CIRCUIT_BREAKER_RESET_MS, MEMORY_THRESHOLDS, NANOSECOND_TO_MS_DIVISOR, PERFORMANCE_THRESHOLDS, PROVIDER_TIMEOUTS, RETRY_ATTEMPTS, RETRY_DELAYS, TOOL_TIMEOUTS, } from "./constants/index.js";
24
24
  import { checkContextBudget } from "./context/budgetChecker.js";
25
- import { ContextCompactor } from "./context/contextCompactor.js";
25
+ import { ContextCompactor, } from "./context/contextCompactor.js";
26
26
  import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
27
27
  import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
28
28
  import { ContextBudgetExceededError } from "./context/errors.js";
@@ -44,9 +44,9 @@ import { ToolRouter } from "./mcp/routing/index.js";
44
44
  import { directToolsServer } from "./mcp/servers/agent/directToolsServer.js";
45
45
  import { inferAnnotations, isSafeToRetry } from "./mcp/toolAnnotations.js";
46
46
  import { MCPToolRegistry } from "./mcp/toolRegistry.js";
47
- import { initializeHippocampus } from "./memory/hippocampusInitializer.js";
47
+ import { initializeHippocampus, } from "./memory/hippocampusInitializer.js";
48
48
  import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
49
- import { getMetricsAggregator, MetricsAggregator } from "./observability/metricsAggregator.js";
49
+ import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
50
50
  import { SpanStatus, SpanType } from "./observability/types/spanTypes.js";
51
51
  import { SpanSerializer } from "./observability/utils/spanSerializer.js";
52
52
  import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
@@ -56,14 +56,14 @@ import { ATTR } from "./telemetry/attributes.js";
56
56
  import { tracers } from "./telemetry/tracers.js";
57
57
  import { CircuitBreakerOpenError } from "./types/circuitBreakerErrors.js";
58
58
  import { ConversationMemoryError } from "./types/conversation.js";
59
- import { AuthenticationError, AuthorizationError, InvalidModelError } from "./types/errors.js";
60
- import { getConversationMessages, storeConversationTurn } from "./utils/conversationMemory.js";
59
+ import { AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/errors.js";
60
+ import { getConversationMessages, storeConversationTurn, } from "./utils/conversationMemory.js";
61
61
  // Enhanced error handling imports
62
62
  import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableError, logStructuredError, NeuroLinkError, withRetry, withTimeout, } from "./utils/errorHandling.js";
63
63
  // Factory processing imports
64
64
  import { createCleanStreamOptions, enhanceTextGenerationOptions, processFactoryOptions, processStreamingFactoryOptions, validateFactoryConfig, } from "./utils/factoryProcessing.js";
65
65
  import { logger, mcpLogger } from "./utils/logger.js";
66
- import { createCustomToolServerInfo, detectCategory } from "./utils/mcpDefaults.js";
66
+ import { createCustomToolServerInfo, detectCategory, } from "./utils/mcpDefaults.js";
67
67
  import { resolveModel } from "./utils/modelAliasResolver.js";
68
68
  // Import orchestration components
69
69
  import { ModelRouter } from "./utils/modelRouter.js";
@@ -97,7 +97,9 @@ function classifyMcpErrorMessage(text) {
97
97
  lower.includes("access denied")) {
98
98
  return "permission_denied";
99
99
  }
100
- if (lower.includes("timeout") || lower.includes("timed out") || lower.includes("deadline exceeded")) {
100
+ if (lower.includes("timeout") ||
101
+ lower.includes("timed out") ||
102
+ lower.includes("deadline exceeded")) {
101
103
  return "timeout";
102
104
  }
103
105
  if (lower.includes("rate limit") ||
@@ -154,7 +156,11 @@ function isNonRetryableProviderError(error) {
154
156
  // Check for HTTP status codes on error objects (e.g., from Vercel AI SDK)
155
157
  if (error && typeof error === "object") {
156
158
  const err = error;
157
- const status = typeof err.status === "number" ? err.status : typeof err.statusCode === "number" ? err.statusCode : undefined;
159
+ const status = typeof err.status === "number"
160
+ ? err.status
161
+ : typeof err.statusCode === "number"
162
+ ? err.statusCode
163
+ : undefined;
158
164
  if (status && NON_RETRYABLE_HTTP_STATUS_CODES.includes(status)) {
159
165
  return true;
160
166
  }
@@ -200,7 +206,8 @@ export class NeuroLink {
200
206
  lastCompactionMessageCount = new Map();
201
207
  /** Extract sessionId from options context for compaction watermark keying */
202
208
  getCompactionSessionId(options) {
203
- return options.context?.sessionId || "__default__";
209
+ return (options.context
210
+ ?.sessionId || "__default__");
204
211
  }
205
212
  // MCP Enhancement modules - wired into core execution path
206
213
  mcpToolResultCache;
@@ -263,19 +270,28 @@ export class NeuroLink {
263
270
  * Extract and set Langfuse context from options with proper async scoping
264
271
  */
265
272
  async setLangfuseContextFromOptions(options, callback) {
266
- if (options.context && typeof options.context === "object" && options.context !== null) {
273
+ if (options.context &&
274
+ typeof options.context === "object" &&
275
+ options.context !== null) {
267
276
  let callbackExecuted = false;
268
277
  try {
269
278
  const ctx = options.context;
270
279
  // Trigger context scoping if any meaningful Langfuse field is present
271
- if (ctx.userId || ctx.sessionId || ctx.conversationId || ctx.requestId || ctx.traceName || ctx.metadata) {
280
+ if (ctx.userId ||
281
+ ctx.sessionId ||
282
+ ctx.conversationId ||
283
+ ctx.requestId ||
284
+ ctx.traceName ||
285
+ ctx.metadata) {
272
286
  // Build customAttributes from top-level metadata string/number/boolean fields
273
287
  let customAttributes;
274
288
  if (ctx.metadata && typeof ctx.metadata === "object") {
275
289
  const metaObj = ctx.metadata;
276
290
  const attrs = {};
277
291
  for (const [k, v] of Object.entries(metaObj)) {
278
- if (typeof v === "string" || typeof v === "number" || typeof v === "boolean") {
292
+ if (typeof v === "string" ||
293
+ typeof v === "number" ||
294
+ typeof v === "boolean") {
279
295
  attrs[k] = v;
280
296
  }
281
297
  }
@@ -287,10 +303,14 @@ export class NeuroLink {
287
303
  setLangfuseContext({
288
304
  userId: typeof ctx.userId === "string" ? ctx.userId : null,
289
305
  sessionId: typeof ctx.sessionId === "string" ? ctx.sessionId : null,
290
- conversationId: typeof ctx.conversationId === "string" ? ctx.conversationId : null,
306
+ conversationId: typeof ctx.conversationId === "string"
307
+ ? ctx.conversationId
308
+ : null,
291
309
  requestId: typeof ctx.requestId === "string" ? ctx.requestId : null,
292
310
  traceName: typeof ctx.traceName === "string" ? ctx.traceName : null,
293
- metadata: ctx.metadata && typeof ctx.metadata === "object" ? ctx.metadata : null,
311
+ metadata: ctx.metadata && typeof ctx.metadata === "object"
312
+ ? ctx.metadata
313
+ : null,
294
314
  ...(customAttributes !== undefined && { customAttributes }),
295
315
  }, async () => {
296
316
  try {
@@ -319,6 +339,137 @@ export class NeuroLink {
319
339
  }
320
340
  return await callback();
321
341
  }
342
+ createMetricsTraceContext() {
343
+ return {
344
+ traceId: crypto.randomUUID().replace(/-/g, ""),
345
+ parentSpanId: crypto.randomUUID().replace(/-/g, "").substring(0, 16),
346
+ };
347
+ }
348
+ enforceSessionBudget(maxBudgetUsd) {
349
+ if (maxBudgetUsd === undefined ||
350
+ maxBudgetUsd <= 0 ||
351
+ this._sessionCostUsd < maxBudgetUsd) {
352
+ return;
353
+ }
354
+ throw new NeuroLinkError({
355
+ code: "SESSION_BUDGET_EXCEEDED",
356
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${maxBudgetUsd.toFixed(4)} limit`,
357
+ category: ErrorCategory.VALIDATION,
358
+ severity: ErrorSeverity.HIGH,
359
+ retriable: false,
360
+ context: {
361
+ spent: this._sessionCostUsd,
362
+ limit: maxBudgetUsd,
363
+ },
364
+ });
365
+ }
366
+ assertInputText(text, message) {
367
+ if (!text || typeof text !== "string") {
368
+ throw new Error(message);
369
+ }
370
+ }
371
+ async applyAuthenticatedRequestContext(options) {
372
+ if (options.auth?.token) {
373
+ const { AuthError } = await import("./auth/errors.js");
374
+ await this.ensureAuthProvider();
375
+ if (!this.authProvider) {
376
+ throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
377
+ }
378
+ let authResult;
379
+ try {
380
+ authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
381
+ }
382
+ catch (error) {
383
+ if (error instanceof Error &&
384
+ "feature" in error &&
385
+ error.feature === "Auth") {
386
+ throw error;
387
+ }
388
+ throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${error instanceof Error ? error.message : String(error)}`);
389
+ }
390
+ if (!authResult.valid) {
391
+ throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
392
+ }
393
+ if (!authResult.user) {
394
+ throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
395
+ }
396
+ if (!authResult.user.id) {
397
+ throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
398
+ }
399
+ options.context = {
400
+ ...(options.context || {}),
401
+ userId: authResult.user.id,
402
+ userEmail: authResult.user.email,
403
+ userRoles: authResult.user.roles,
404
+ };
405
+ }
406
+ if (!options.requestContext) {
407
+ return;
408
+ }
409
+ const tokenDerivedFields = options.auth?.token && this.authProvider
410
+ ? {
411
+ userId: options.context?.userId,
412
+ userEmail: options.context?.userEmail,
413
+ userRoles: options.context?.userRoles,
414
+ }
415
+ : {};
416
+ options.context = {
417
+ ...(options.context || {}),
418
+ ...options.requestContext,
419
+ ...tokenDerivedFields,
420
+ };
421
+ }
422
+ applyGenerateLifecycleMiddleware(options) {
423
+ if (!options.onFinish && !options.onError) {
424
+ return;
425
+ }
426
+ options.middleware = {
427
+ ...options.middleware,
428
+ middlewareConfig: {
429
+ ...options.middleware?.middlewareConfig,
430
+ lifecycle: {
431
+ ...options.middleware?.middlewareConfig?.lifecycle,
432
+ enabled: true,
433
+ config: {
434
+ ...options.middleware?.middlewareConfig?.lifecycle?.config,
435
+ ...(options.onFinish !== undefined
436
+ ? { onFinish: options.onFinish }
437
+ : {}),
438
+ ...(options.onError !== undefined
439
+ ? { onError: options.onError }
440
+ : {}),
441
+ },
442
+ },
443
+ },
444
+ };
445
+ }
446
+ applyStreamLifecycleMiddleware(options) {
447
+ if (!options.onFinish && !options.onError && !options.onChunk) {
448
+ return;
449
+ }
450
+ options.middleware = {
451
+ ...options.middleware,
452
+ middlewareConfig: {
453
+ ...options.middleware?.middlewareConfig,
454
+ lifecycle: {
455
+ ...options.middleware?.middlewareConfig?.lifecycle,
456
+ enabled: true,
457
+ config: {
458
+ ...options.middleware?.middlewareConfig?.lifecycle?.config,
459
+ ...(options.onFinish !== undefined
460
+ ? { onFinish: options.onFinish }
461
+ : {}),
462
+ ...(options.onError !== undefined
463
+ ? { onError: options.onError }
464
+ : {}),
465
+ ...(options.onChunk !== undefined
466
+ ? { onChunk: options.onChunk }
467
+ : {}),
468
+ },
469
+ },
470
+ },
471
+ };
472
+ }
322
473
  initializeMemoryConfig() {
323
474
  const memory = this.conversationMemoryConfig?.conversationMemory?.memory;
324
475
  if (!memory?.enabled) {
@@ -424,7 +575,9 @@ export class NeuroLink {
424
575
  logger.setEventEmitter(this.emitter);
425
576
  // Read tool cache duration from environment variables, with a default
426
577
  const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
427
- this.toolCacheDuration = cacheDurationEnv ? parseInt(cacheDurationEnv, 10) : 20000;
578
+ this.toolCacheDuration = cacheDurationEnv
579
+ ? parseInt(cacheDurationEnv, 10)
580
+ : 20000;
428
581
  const constructorStartTime = Date.now();
429
582
  const constructorHrTimeStart = process.hrtime.bigint();
430
583
  const constructorId = `neurolink-constructor-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
@@ -779,7 +932,9 @@ export class NeuroLink {
779
932
  // memory manager supports getSessionRaw.
780
933
  const memConfig = this.conversationMemoryConfig?.conversationMemory;
781
934
  const hasRedisConfig = !!memConfig?.redisConfig ||
782
- (memConfig && "redis" in memConfig && !!memConfig.redis) ||
935
+ (memConfig &&
936
+ "redis" in memConfig &&
937
+ !!memConfig.redis) ||
783
938
  process.env.STORAGE_TYPE === "redis";
784
939
  if (!memConfig?.enabled || !hasRedisConfig) {
785
940
  logger.debug("[NeuroLink] Skipping memory retrieval tools — requires Redis conversation memory");
@@ -810,8 +965,13 @@ export class NeuroLink {
810
965
  messages: [],
811
966
  });
812
967
  // Check if the tool itself reported an error
813
- const hasError = result && typeof result === "object" && "error" in result && !("messages" in result);
814
- const errorMsg = hasError ? result.error : undefined;
968
+ const hasError = result &&
969
+ typeof result === "object" &&
970
+ "error" in result &&
971
+ !("messages" in result);
972
+ const errorMsg = hasError
973
+ ? result.error
974
+ : undefined;
815
975
  return {
816
976
  success: !hasError,
817
977
  data: result,
@@ -888,7 +1048,8 @@ Current user's request: ${currentInput}`;
888
1048
  * Respects both the global memory SDK config and per-call overrides.
889
1049
  */
890
1050
  shouldReadMemory(perCallMemory, userId) {
891
- if (!this.conversationMemoryConfig?.conversationMemory?.memory?.enabled || !userId) {
1051
+ if (!this.conversationMemoryConfig?.conversationMemory?.memory?.enabled ||
1052
+ !userId) {
892
1053
  return false;
893
1054
  }
894
1055
  if (perCallMemory?.enabled === false) {
@@ -904,7 +1065,8 @@ Current user's request: ${currentInput}`;
904
1065
  * Respects both the global memory SDK config and per-call overrides.
905
1066
  */
906
1067
  shouldWriteMemory(perCallMemory, userId, content) {
907
- if (!this.conversationMemoryConfig?.conversationMemory?.memory?.enabled || !userId) {
1068
+ if (!this.conversationMemoryConfig?.conversationMemory?.memory?.enabled ||
1069
+ !userId) {
908
1070
  return false;
909
1071
  }
910
1072
  if (!content?.trim()) {
@@ -978,7 +1140,9 @@ Current user's request: ${currentInput}`;
978
1140
  const writeOps = [client.add(userId, content)];
979
1141
  const writableAdditional = (additionalUsers || []).filter((u) => u.write !== false);
980
1142
  for (const user of writableAdditional) {
981
- const addOptions = user.prompt || user.maxWords ? { prompt: user.prompt, maxWords: user.maxWords } : undefined;
1143
+ const addOptions = user.prompt || user.maxWords
1144
+ ? { prompt: user.prompt, maxWords: user.maxWords }
1145
+ : undefined;
982
1146
  writeOps.push(client.add(user.userId, content, addOptions));
983
1147
  }
984
1148
  await Promise.all(writeOps);
@@ -1137,7 +1301,8 @@ Current user's request: ${currentInput}`;
1137
1301
  try {
1138
1302
  const langfuseConfig = this.observabilityConfig?.langfuse;
1139
1303
  // Check if we should use external provider mode - bypass enabled check
1140
- const useExternalProvider = langfuseConfig?.autoDetectExternalProvider === true || langfuseConfig?.useExternalTracerProvider === true;
1304
+ const useExternalProvider = langfuseConfig?.autoDetectExternalProvider === true ||
1305
+ langfuseConfig?.useExternalTracerProvider === true;
1141
1306
  if (langfuseConfig?.enabled || useExternalProvider) {
1142
1307
  logger.debug(`[NeuroLink] 📊 LOG_POINT_C019_LANGFUSE_INIT_START`, {
1143
1308
  logPoint: "C019_LANGFUSE_INIT_START",
@@ -1152,7 +1317,9 @@ Current user's request: ${currentInput}`;
1152
1317
  initializeOpenTelemetry(langfuseConfig);
1153
1318
  const healthStatus = getLangfuseHealthStatus();
1154
1319
  const langfuseInitDurationNs = process.hrtime.bigint() - langfuseInitStartTime;
1155
- if (healthStatus.initialized && healthStatus.hasProcessor && healthStatus.isHealthy) {
1320
+ if (healthStatus.initialized &&
1321
+ healthStatus.hasProcessor &&
1322
+ healthStatus.isHealthy) {
1156
1323
  logger.debug(`[NeuroLink] ✅ LOG_POINT_C020_LANGFUSE_INIT_SUCCESS`, {
1157
1324
  logPoint: "C020_LANGFUSE_INIT_SUCCESS",
1158
1325
  constructorId,
@@ -1428,7 +1595,9 @@ Current user's request: ${currentInput}`;
1428
1595
  }
1429
1596
  catch (configError) {
1430
1597
  mcpLogger.warn("[NeuroLink] MCP configuration loading failed", {
1431
- error: configError instanceof Error ? configError.message : String(configError),
1598
+ error: configError instanceof Error
1599
+ ? configError.message
1600
+ : String(configError),
1432
1601
  });
1433
1602
  }
1434
1603
  }
@@ -1553,7 +1722,9 @@ Current user's request: ${currentInput}`;
1553
1722
  taskType: classification.type,
1554
1723
  routedProvider: route.provider,
1555
1724
  routedModel: route.model,
1556
- reason: error instanceof Error ? error.message : "Ollama service check failed",
1725
+ reason: error instanceof Error
1726
+ ? error.message
1727
+ : "Ollama service check failed",
1557
1728
  orchestrationTime: `${Date.now() - startTime}ms`,
1558
1729
  });
1559
1730
  return {}; // Return empty object to preserve existing fallback behavior
@@ -1689,7 +1860,9 @@ Current user's request: ${currentInput}`;
1689
1860
  taskType: classification.type,
1690
1861
  routedProvider: route.provider,
1691
1862
  routedModel: route.model,
1692
- reason: error instanceof Error ? error.message : "Ollama service check failed",
1863
+ reason: error instanceof Error
1864
+ ? error.message
1865
+ : "Ollama service check failed",
1693
1866
  orchestrationTime: `${Date.now() - startTime}ms`,
1694
1867
  });
1695
1868
  return {}; // Return empty object to preserve existing fallback behavior
@@ -1740,7 +1913,9 @@ Current user's request: ${currentInput}`;
1740
1913
  const anyOptions = optionsOrPrompt;
1741
1914
  if (anyOptions.messages && anyOptions.messages.length > 0) {
1742
1915
  const lastMessage = anyOptions.messages[anyOptions.messages.length - 1];
1743
- return typeof lastMessage.content === "string" ? lastMessage.content : JSON.stringify(lastMessage.content);
1916
+ return typeof lastMessage.content === "string"
1917
+ ? lastMessage.content
1918
+ : JSON.stringify(lastMessage.content);
1744
1919
  }
1745
1920
  // Handle input.text format
1746
1921
  return optionsOrPrompt.input?.text || "";
@@ -1832,7 +2007,8 @@ Current user's request: ${currentInput}`;
1832
2007
  endpoint: otelConfig.endpoint,
1833
2008
  serviceName: otelConfig.serviceName,
1834
2009
  }
1835
- : isOpenTelemetryInitialized() || process.env.OTEL_EXPORTER_OTLP_ENDPOINT
2010
+ : isOpenTelemetryInitialized() ||
2011
+ process.env.OTEL_EXPORTER_OTLP_ENDPOINT
1836
2012
  ? {
1837
2013
  enabled: isOpenTelemetryInitialized(),
1838
2014
  endpoint: process.env.OTEL_EXPORTER_OTLP_ENDPOINT,
@@ -1974,7 +2150,9 @@ Current user's request: ${currentInput}`;
1974
2150
  const result = data.result;
1975
2151
  const usage = result?.usage;
1976
2152
  const analytics = result?.analytics;
1977
- const provider = data.provider || result?.provider || "unknown";
2153
+ const provider = data.provider ||
2154
+ result?.provider ||
2155
+ "unknown";
1978
2156
  const model = result?.model || "unknown";
1979
2157
  const responseTime = data.responseTime || 0;
1980
2158
  const traceCtx = this._metricsTraceContext;
@@ -1993,7 +2171,9 @@ Current user's request: ${currentInput}`;
1993
2171
  span.parentSpanId = undefined;
1994
2172
  }
1995
2173
  // Mark failed generations with ERROR status so metrics count them correctly
1996
- const spanStatus = data.success === false || data.error ? SpanStatus.ERROR : SpanStatus.OK;
2174
+ const spanStatus = data.success === false || data.error
2175
+ ? SpanStatus.ERROR
2176
+ : SpanStatus.OK;
1997
2177
  span = SpanSerializer.endSpan(span, spanStatus, data.error ? String(data.error) : undefined);
1998
2178
  span.durationMs = responseTime;
1999
2179
  if (usage) {
@@ -2029,7 +2209,9 @@ Current user's request: ${currentInput}`;
2029
2209
  const content = result?.content || result?.text;
2030
2210
  if (content) {
2031
2211
  span = SpanSerializer.updateAttributes(span, {
2032
- output: content.length > 5000 ? content.substring(0, 5000) + "...[truncated]" : content,
2212
+ output: content.length > 5000
2213
+ ? content.substring(0, 5000) + "...[truncated]"
2214
+ : content,
2033
2215
  });
2034
2216
  }
2035
2217
  this.metricsAggregator.recordSpan(span);
@@ -2068,14 +2250,18 @@ Current user's request: ${currentInput}`;
2068
2250
  if (data.prompt) {
2069
2251
  const promptStr = String(data.prompt);
2070
2252
  span = SpanSerializer.updateAttributes(span, {
2071
- input: promptStr.length > 5000 ? promptStr.substring(0, 5000) + "...[truncated]" : promptStr,
2253
+ input: promptStr.length > 5000
2254
+ ? promptStr.substring(0, 5000) + "...[truncated]"
2255
+ : promptStr,
2072
2256
  });
2073
2257
  }
2074
2258
  // Record streamed output (truncated for safety)
2075
2259
  const streamContent = data.content;
2076
2260
  if (streamContent) {
2077
2261
  span = SpanSerializer.updateAttributes(span, {
2078
- output: streamContent.length > 5000 ? streamContent.substring(0, 5000) + "...[truncated]" : streamContent,
2262
+ output: streamContent.length > 5000
2263
+ ? streamContent.substring(0, 5000) + "...[truncated]"
2264
+ : streamContent,
2079
2265
  });
2080
2266
  }
2081
2267
  // Enrich stream span with token usage if available
@@ -2092,7 +2278,8 @@ Current user's request: ${currentInput}`;
2092
2278
  const pricing = tokenTracker.getModelPricing(model);
2093
2279
  if (pricing) {
2094
2280
  const inputCost = ((usage.input || 0) / 1_000_000) * pricing.inputPricePerMillion;
2095
- const outputCost = ((usage.output || 0) / 1_000_000) * pricing.outputPricePerMillion;
2281
+ const outputCost = ((usage.output || 0) / 1_000_000) *
2282
+ pricing.outputPricePerMillion;
2096
2283
  const totalCost = inputCost + outputCost;
2097
2284
  if (totalCost > 0) {
2098
2285
  span = SpanSerializer.enrichWithCost(span, {
@@ -2127,7 +2314,8 @@ Current user's request: ${currentInput}`;
2127
2314
  span = SpanSerializer.endSpan(span, success ? SpanStatus.OK : SpanStatus.ERROR);
2128
2315
  span.durationMs = responseTime;
2129
2316
  if (!success && data.error) {
2130
- span.statusMessage = data.error.message || String(data.error);
2317
+ span.statusMessage =
2318
+ data.error.message || String(data.error);
2131
2319
  }
2132
2320
  if (data.result) {
2133
2321
  try {
@@ -2279,398 +2467,313 @@ Current user's request: ${currentInput}`;
2279
2467
  * @since 1.0.0
2280
2468
  */
2281
2469
  async generate(optionsOrPrompt) {
2282
- return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, async (generateSpan) => {
2283
- // Set metrics trace context for parent-child span linking.
2284
- // The generation span will be the root (no parentSpanId).
2285
- // Tool spans will be children of the root span via rootSpanId.
2286
- const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
2287
- const metricsRootSpanId = crypto.randomUUID().replace(/-/g, "").substring(0, 16);
2288
- // Scope trace context to this request via AsyncLocalStorage
2289
- // so concurrent generate/stream calls don't race.
2290
- return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsRootSpanId }, async () => {
2291
- try {
2292
- const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
2293
- // Convert string prompt to full options
2294
- // Shallow-copy caller's object to avoid mutating their original reference
2295
- const options = typeof optionsOrPrompt === "string" ? { input: { text: optionsOrPrompt } } : { ...optionsOrPrompt };
2296
- // NL-004: Resolve model aliases/deprecations before processing
2297
- options.model = resolveModel(options.model, this.modelAliasConfig);
2298
- // MCP Enhancement: propagate disableToolCache to tool execution
2299
- this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
2300
- // Set span attributes for observability
2301
- generateSpan.setAttribute("neurolink.provider", options.provider || "default");
2302
- generateSpan.setAttribute("neurolink.model", options.model || "default");
2303
- generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string" ? optionsOrPrompt.length : options.input?.text?.length || 0);
2304
- generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
2305
- // Validate prompt
2306
- if (!options.input?.text || typeof options.input.text !== "string") {
2307
- throw new Error("Input text is required and must be a non-empty string");
2308
- }
2309
- // Check budget limit before making API call
2310
- if (options.maxBudgetUsd !== undefined &&
2311
- options.maxBudgetUsd > 0 &&
2312
- this._sessionCostUsd >= options.maxBudgetUsd) {
2313
- throw new NeuroLinkError({
2314
- code: "SESSION_BUDGET_EXCEEDED",
2315
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
2316
- category: ErrorCategory.VALIDATION,
2317
- severity: ErrorSeverity.HIGH,
2318
- retriable: false,
2319
- context: {
2320
- spent: this._sessionCostUsd,
2321
- limit: options.maxBudgetUsd,
2322
- },
2323
- });
2324
- }
2325
- // Auto-inject lifecycle middleware when callbacks are provided
2326
- // (must happen before workflow/PPT early returns so those paths get middleware too)
2327
- if (options.onFinish || options.onError) {
2328
- options.middleware = {
2329
- ...options.middleware,
2330
- middlewareConfig: {
2331
- ...options.middleware?.middlewareConfig,
2332
- lifecycle: {
2333
- ...options.middleware?.middlewareConfig?.lifecycle,
2334
- enabled: true,
2335
- config: {
2336
- ...options.middleware?.middlewareConfig?.lifecycle?.config,
2337
- ...(options.onFinish !== undefined ? { onFinish: options.onFinish } : {}),
2338
- ...(options.onError !== undefined ? { onError: options.onError } : {}),
2339
- },
2340
- },
2341
- },
2342
- };
2343
- }
2344
- // Handle per-call auth token validation
2345
- if (options.auth?.token) {
2346
- const { AuthError } = await import("./auth/errors.js");
2347
- await this.ensureAuthProvider();
2348
- if (!this.authProvider) {
2349
- throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
2350
- }
2351
- let authResult;
2352
- try {
2353
- authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
2354
- }
2355
- catch (err) {
2356
- // Rethrow auth errors as-is; wrap anything else
2357
- if (err instanceof Error && "feature" in err && err.feature === "Auth") {
2358
- throw err;
2359
- }
2360
- throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${err instanceof Error ? err.message : String(err)}`);
2361
- }
2362
- if (!authResult.valid) {
2363
- throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
2364
- }
2365
- // Fail closed: token valid but no user identity is a provider bug
2366
- if (!authResult.user) {
2367
- throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
2368
- }
2369
- if (!authResult.user.id) {
2370
- throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
2371
- }
2372
- // Merge validated user into context
2373
- options.context = {
2374
- ...(options.context || {}),
2375
- userId: authResult.user.id,
2376
- userEmail: authResult.user.email,
2377
- userRoles: authResult.user.roles,
2378
- };
2379
- }
2380
- // Handle pre-validated requestContext
2381
- if (options.requestContext) {
2382
- // When auth token was validated, token-derived identity fields
2383
- // MUST take precedence over requestContext to prevent privilege escalation.
2384
- const tokenDerivedFields = options.auth?.token && this.authProvider
2385
- ? {
2386
- userId: options.context?.userId,
2387
- userEmail: options.context?.userEmail,
2388
- userRoles: options.context?.userRoles,
2389
- }
2390
- : {};
2391
- options.context = {
2392
- ...(options.context || {}),
2393
- ...options.requestContext,
2394
- ...tokenDerivedFields,
2395
- };
2396
- }
2397
- // Check if workflow is requested
2398
- if (options.workflow || options.workflowConfig) {
2399
- return await this.generateWithWorkflow(options);
2400
- }
2401
- // Check if PPT output mode is requested
2402
- if (options.output?.mode === "ppt") {
2403
- const pptResult = await this.generateWithPPT(options);
2404
- generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
2405
- if (pptResult.analytics) {
2406
- generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
2407
- generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
2408
- generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
2409
- }
2410
- generateSpan.setStatus({ code: SpanStatusCode.OK });
2411
- return pptResult;
2412
- }
2413
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
2414
- return await this.setLangfuseContextFromOptions(options, async () => {
2415
- const startTime = Date.now();
2416
- // Apply orchestration if enabled and no specific provider/model requested
2417
- if (this.enableOrchestration && !options.provider && !options.model) {
2418
- try {
2419
- const orchestratedOptions = await this.applyOrchestration(options);
2420
- logger.debug("Orchestration applied", {
2421
- originalProvider: options.provider || "auto",
2422
- orchestratedProvider: orchestratedOptions.provider,
2423
- orchestratedModel: orchestratedOptions.model,
2424
- prompt: options.input.text.substring(0, 100),
2425
- });
2426
- // Use orchestrated options
2427
- Object.assign(options, orchestratedOptions);
2428
- // Re-resolve model alias in case orchestration returned an alias
2429
- if (orchestratedOptions.model) {
2430
- options.model = resolveModel(options.model, this.modelAliasConfig);
2431
- }
2432
- }
2433
- catch (error) {
2434
- logger.warn("Orchestration failed, continuing with original options", {
2435
- error: error instanceof Error ? error.message : String(error),
2436
- originalProvider: options.provider || "auto",
2437
- });
2438
- // Continue with original options if orchestration fails
2439
- }
2440
- }
2441
- // Emit generation start event (NeuroLink format - keep existing)
2442
- this.emitter.emit("generation:start", {
2443
- provider: options.provider || "auto",
2444
- timestamp: startTime,
2445
- });
2446
- // ADD: Bedrock-compatible response:start event
2447
- this.emitter.emit("response:start");
2448
- // ADD: Bedrock-compatible message event
2449
- this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
2450
- // Process factory configuration
2451
- const factoryResult = processFactoryOptions(options);
2452
- // Validate factory configuration if present
2453
- if (factoryResult.hasFactoryConfig && options.factoryConfig) {
2454
- const validation = validateFactoryConfig(options.factoryConfig);
2455
- if (!validation.isValid) {
2456
- logger.warn("Invalid factory configuration detected", {
2457
- errors: validation.errors,
2458
- });
2459
- // Continue with warning rather than throwing - graceful degradation
2460
- }
2461
- }
2462
- // RAG Integration: If rag config is provided, prepare the RAG search tool
2463
- if (options.rag?.files?.length) {
2464
- try {
2465
- const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2466
- const ragResult = await prepareRAGTool(options.rag, options.provider);
2467
- // Inject the RAG tool into the tools record
2468
- if (!options.tools) {
2469
- options.tools = {};
2470
- }
2471
- options.tools[ragResult.toolName] = ragResult.tool;
2472
- // Inject RAG-aware system prompt so the AI uses the RAG tool first
2473
- const ragSystemInstruction = [
2474
- `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2475
- `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2476
- `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2477
- `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2478
- `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2479
- ].join(" ");
2480
- options.systemPrompt = (options.systemPrompt || "") + ragSystemInstruction;
2481
- logger.info("[RAG] Tool injected into generate()", {
2482
- toolName: ragResult.toolName,
2483
- filesLoaded: ragResult.filesLoaded,
2484
- chunksIndexed: ragResult.chunksIndexed,
2485
- });
2486
- }
2487
- catch (error) {
2488
- logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2489
- error: error instanceof Error ? error.message : String(error),
2490
- });
2491
- }
2492
- }
2493
- // Memory retrieval for generate path
2494
- if (this.shouldReadMemory(options.memory, options.context?.userId) && options.context?.userId) {
2495
- try {
2496
- options.input.text = await this.retrieveMemory(options.input.text, options.context.userId, options.memory?.additionalUsers);
2497
- logger.debug("Memory retrieval successful (generate)");
2498
- }
2499
- catch (error) {
2500
- logger.warn("Memory retrieval failed (generate):", error);
2501
- }
2502
- }
2503
- // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
2504
- const baseOptions = {
2505
- prompt: options.input.text,
2506
- provider: options.provider,
2507
- model: options.model,
2508
- temperature: options.temperature,
2509
- maxTokens: options.maxTokens,
2510
- systemPrompt: options.systemPrompt,
2511
- schema: options.schema,
2512
- output: options.output,
2513
- tools: options.tools, // Includes RAG tools if rag config was provided
2514
- disableTools: options.disableTools,
2515
- toolFilter: options.toolFilter,
2516
- excludeTools: options.excludeTools,
2517
- maxSteps: options.maxSteps,
2518
- toolChoice: options.toolChoice,
2519
- prepareStep: options.prepareStep,
2520
- enableAnalytics: options.enableAnalytics,
2521
- enableEvaluation: options.enableEvaluation,
2522
- context: options.context,
2523
- evaluationDomain: options.evaluationDomain,
2524
- toolUsageContext: options.toolUsageContext,
2525
- input: options.input, // This includes text, images, and content arrays
2526
- region: options.region,
2527
- tts: options.tts,
2528
- fileRegistry: this.fileRegistry,
2529
- abortSignal: options.abortSignal,
2530
- skipToolPromptInjection: options.skipToolPromptInjection,
2531
- middleware: options.middleware,
2532
- // Pass through conversation messages for task continuation and external callers
2533
- conversationMessages: options.conversationMessages,
2534
- };
2535
- // Auto-map top-level sessionId/userId to context for convenience
2536
- // Tests and users may pass sessionId/userId as top-level options
2537
- const extraContext = options;
2538
- if (extraContext.sessionId || extraContext.userId) {
2539
- baseOptions.context = {
2540
- ...baseOptions.context,
2541
- ...(extraContext.sessionId && !baseOptions.context?.sessionId
2542
- ? { sessionId: extraContext.sessionId }
2543
- : {}),
2544
- ...(extraContext.userId && !baseOptions.context?.userId
2545
- ? { userId: extraContext.userId }
2546
- : {}),
2547
- };
2548
- }
2549
- // Apply factory enhancement using centralized utilities
2550
- const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
2551
- // Pass conversation memory config if available
2552
- if (this.conversationMemory) {
2553
- textOptions.conversationMemoryConfig = this.conversationMemory.config;
2554
- // Include original prompt for context summarization
2555
- textOptions.originalPrompt = originalPrompt;
2556
- }
2557
- // Detect and execute domain-specific tools
2558
- const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
2559
- // Update prompt with tool results if available
2560
- if (enhancedPrompt !== textOptions.prompt) {
2561
- textOptions.prompt = enhancedPrompt;
2562
- logger.debug("Enhanced prompt with tool results", {
2563
- originalLength: options.input.text.length,
2564
- enhancedLength: enhancedPrompt.length,
2565
- toolResults: toolResults.length,
2566
- });
2567
- }
2568
- const textResult = await this.generateTextInternal(textOptions);
2569
- // Emit generation completion event (NeuroLink format - enhanced with content)
2570
- this.emitter.emit("generation:end", {
2571
- provider: textResult.provider,
2572
- responseTime: Date.now() - startTime,
2573
- toolsUsed: textResult.toolsUsed,
2574
- timestamp: Date.now(),
2575
- result: textResult, // Enhanced: include full result
2576
- prompt: options.input?.text || options.prompt,
2577
- temperature: textOptions.temperature,
2578
- maxTokens: textOptions.maxTokens,
2579
- });
2580
- // ADD: Bedrock-compatible response:end event with content
2581
- this.emitter.emit("response:end", textResult.content || "");
2582
- // ADD: Bedrock-compatible message event
2583
- this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
2584
- // Convert back to GenerateResult
2585
- const generateResult = {
2586
- content: textResult.content,
2587
- finishReason: textResult.finishReason,
2588
- provider: textResult.provider,
2589
- model: textResult.model,
2590
- usage: textResult.usage
2591
- ? {
2592
- input: textResult.usage.input || 0,
2593
- output: textResult.usage.output || 0,
2594
- total: textResult.usage.total || 0,
2595
- }
2596
- : undefined,
2597
- responseTime: textResult.responseTime,
2598
- toolsUsed: textResult.toolsUsed,
2599
- toolExecutions: transformToolExecutions(textResult.toolExecutions),
2600
- enhancedWithTools: textResult.enhancedWithTools,
2601
- availableTools: transformAvailableTools(textResult.availableTools),
2602
- analytics: textResult.analytics,
2603
- // CRITICAL FIX: Include imageOutput for image generation models
2604
- imageOutput: textResult.imageOutput,
2605
- evaluation: textResult.evaluation
2606
- ? {
2607
- ...textResult.evaluation,
2608
- isOffTopic: textResult.evaluation.isOffTopic ?? false,
2609
- alertSeverity: textResult.evaluation.alertSeverity ?? "none",
2610
- reasoning: textResult.evaluation.reasoning ?? "No evaluation provided",
2611
- evaluationModel: textResult.evaluation.evaluationModel ?? "unknown",
2612
- evaluationTime: textResult.evaluation.evaluationTime ?? Date.now(),
2613
- evaluationDomain: textResult.evaluation.evaluationDomain ??
2614
- textOptions.evaluationDomain ??
2615
- factoryResult.domainType,
2616
- }
2617
- : undefined,
2618
- audio: textResult.audio,
2619
- video: textResult.video,
2620
- ppt: textResult.ppt,
2621
- // NL-007: Copy retry metadata from MCP generation path
2622
- ...(textResult.retries && { retries: textResult.retries }),
2623
- };
2624
- // Accumulate session cost for budget tracking
2625
- if (generateResult.analytics?.cost && generateResult.analytics.cost > 0) {
2626
- this._sessionCostUsd += generateResult.analytics.cost;
2627
- }
2628
- this.scheduleGenerateMemoryStorage(options, originalPrompt, generateResult);
2629
- // Set completion span attributes
2630
- generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2631
- generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2632
- generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2633
- generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2634
- generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2635
- generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2636
- // NL-007: Expose retry count in OTel span
2637
- generateSpan.setAttribute("generate.retry_count", generateResult.retries?.count || 0);
2638
- generateSpan.setStatus({ code: SpanStatusCode.OK });
2639
- return generateResult;
2640
- });
2470
+ return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan));
2471
+ }
2472
+ async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
2473
+ return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
2474
+ }
2475
+ async executeGenerateRequest(optionsOrPrompt, generateSpan) {
2476
+ try {
2477
+ const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
2478
+ const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
2479
+ if (earlyResult) {
2480
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2481
+ return earlyResult;
2482
+ }
2483
+ const result = await this.setLangfuseContextFromOptions(options, () => this.runStandardGenerateRequest(options, originalPrompt, generateSpan));
2484
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2485
+ return result;
2486
+ }
2487
+ catch (error) {
2488
+ generateSpan.setStatus({
2489
+ code: SpanStatusCode.ERROR,
2490
+ message: error instanceof Error ? error.message : String(error),
2491
+ });
2492
+ this.emitGenerateErrorEvent(optionsOrPrompt, error);
2493
+ throw error;
2494
+ }
2495
+ finally {
2496
+ this._disableToolCacheForCurrentRequest = false;
2497
+ generateSpan.end();
2498
+ }
2499
+ }
2500
+ async prepareGenerateRequest(optionsOrPrompt, generateSpan) {
2501
+ const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
2502
+ const options = typeof optionsOrPrompt === "string"
2503
+ ? { input: { text: optionsOrPrompt } }
2504
+ : { ...optionsOrPrompt };
2505
+ options.model = resolveModel(options.model, this.modelAliasConfig);
2506
+ this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
2507
+ generateSpan.setAttribute("neurolink.provider", options.provider || "default");
2508
+ generateSpan.setAttribute("neurolink.model", options.model || "default");
2509
+ generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
2510
+ ? optionsOrPrompt.length
2511
+ : options.input?.text?.length || 0);
2512
+ generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
2513
+ this.assertInputText(options.input?.text, "Input text is required and must be a non-empty string");
2514
+ this.enforceSessionBudget(options.maxBudgetUsd);
2515
+ this.applyGenerateLifecycleMiddleware(options);
2516
+ await this.applyAuthenticatedRequestContext(options);
2517
+ return { options, originalPrompt };
2518
+ }
2519
+ async maybeHandleEarlyGenerateResult(options, generateSpan) {
2520
+ if (options.workflow || options.workflowConfig) {
2521
+ return this.generateWithWorkflow(options);
2522
+ }
2523
+ if (options.output?.mode !== "ppt") {
2524
+ return null;
2525
+ }
2526
+ const pptResult = await this.generateWithPPT(options);
2527
+ generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
2528
+ if (pptResult.analytics) {
2529
+ generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
2530
+ generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
2531
+ generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
2532
+ }
2533
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2534
+ return pptResult;
2535
+ }
2536
+ async runStandardGenerateRequest(options, originalPrompt, generateSpan) {
2537
+ const startTime = Date.now();
2538
+ await this.maybeApplyGenerateOrchestration(options);
2539
+ this.emitter.emit("generation:start", {
2540
+ provider: options.provider || "auto",
2541
+ timestamp: startTime,
2542
+ });
2543
+ this.emitter.emit("response:start");
2544
+ this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
2545
+ const factoryResult = processFactoryOptions(options);
2546
+ if (factoryResult.hasFactoryConfig && options.factoryConfig) {
2547
+ const validation = validateFactoryConfig(options.factoryConfig);
2548
+ if (!validation.isValid) {
2549
+ logger.warn("Invalid factory configuration detected", {
2550
+ errors: validation.errors,
2551
+ });
2552
+ }
2553
+ }
2554
+ await this.prepareGenerateAugmentations(options);
2555
+ const textOptions = await this.buildGenerateTextOptions(options, originalPrompt, factoryResult);
2556
+ const textResult = await this.generateTextInternal(textOptions);
2557
+ return this.finalizeGenerateRequestResult({
2558
+ generateSpan,
2559
+ options,
2560
+ textOptions,
2561
+ textResult,
2562
+ factoryResult,
2563
+ originalPrompt,
2564
+ startTime,
2565
+ });
2566
+ }
2567
+ async maybeApplyGenerateOrchestration(options) {
2568
+ if (!this.enableOrchestration || options.provider || options.model) {
2569
+ return;
2570
+ }
2571
+ try {
2572
+ const orchestratedOptions = await this.applyOrchestration(options);
2573
+ logger.debug("Orchestration applied", {
2574
+ originalProvider: options.provider || "auto",
2575
+ orchestratedProvider: orchestratedOptions.provider,
2576
+ orchestratedModel: orchestratedOptions.model,
2577
+ prompt: options.input.text.substring(0, 100),
2578
+ });
2579
+ Object.assign(options, orchestratedOptions);
2580
+ if (orchestratedOptions.model) {
2581
+ options.model = resolveModel(options.model, this.modelAliasConfig);
2582
+ }
2583
+ }
2584
+ catch (error) {
2585
+ logger.warn("Orchestration failed, continuing with original options", {
2586
+ error: error instanceof Error ? error.message : String(error),
2587
+ originalProvider: options.provider || "auto",
2588
+ });
2589
+ }
2590
+ }
2591
+ async prepareGenerateAugmentations(options) {
2592
+ if (options.rag?.files?.length) {
2593
+ try {
2594
+ const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2595
+ const ragResult = await prepareRAGTool(options.rag, options.provider);
2596
+ if (!options.tools) {
2597
+ options.tools = {};
2641
2598
  }
2642
- catch (error) {
2643
- generateSpan.setStatus({
2644
- code: SpanStatusCode.ERROR,
2645
- message: error instanceof Error ? error.message : String(error),
2646
- });
2647
- // Emit generation:end on error so metrics listeners still record the failure.
2648
- // Note: variables declared inside try blocks are not accessible in error
2649
- // handlers, so we extract what we can from the original input.
2650
- const errProvider = typeof optionsOrPrompt === "object"
2651
- ? optionsOrPrompt.provider || "unknown"
2652
- : "unknown";
2653
- const errModel = typeof optionsOrPrompt === "object" ? optionsOrPrompt.model || "unknown" : "unknown";
2654
- try {
2655
- this.emitter.emit("generation:end", {
2656
- provider: errProvider,
2657
- model: errModel,
2658
- responseTime: 0,
2659
- error: error instanceof Error ? error.message : String(error),
2660
- success: false,
2661
- });
2662
- }
2663
- catch (emitError) {
2664
- void emitError; // non-blocking — error event emission is best-effort
2665
- }
2666
- throw error;
2599
+ options.tools[ragResult.toolName] =
2600
+ ragResult.tool;
2601
+ options.systemPrompt =
2602
+ (options.systemPrompt || "") +
2603
+ [
2604
+ `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2605
+ `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2606
+ `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2607
+ `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2608
+ `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2609
+ ].join(" ");
2610
+ logger.info("[RAG] Tool injected into generate()", {
2611
+ toolName: ragResult.toolName,
2612
+ filesLoaded: ragResult.filesLoaded,
2613
+ chunksIndexed: ragResult.chunksIndexed,
2614
+ });
2615
+ }
2616
+ catch (error) {
2617
+ logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2618
+ error: error instanceof Error ? error.message : String(error),
2619
+ });
2620
+ }
2621
+ }
2622
+ if (!this.shouldReadMemory(options.memory, options.context?.userId) ||
2623
+ !options.context?.userId) {
2624
+ return;
2625
+ }
2626
+ try {
2627
+ options.input.text = await this.retrieveMemory(options.input.text, options.context.userId, options.memory?.additionalUsers);
2628
+ logger.debug("Memory retrieval successful (generate)");
2629
+ }
2630
+ catch (error) {
2631
+ logger.warn("Memory retrieval failed (generate):", error);
2632
+ }
2633
+ }
2634
+ async buildGenerateTextOptions(options, originalPrompt, factoryResult) {
2635
+ const baseOptions = {
2636
+ prompt: options.input.text,
2637
+ provider: options.provider,
2638
+ model: options.model,
2639
+ temperature: options.temperature,
2640
+ maxTokens: options.maxTokens,
2641
+ systemPrompt: options.systemPrompt,
2642
+ schema: options.schema,
2643
+ output: options.output,
2644
+ tools: options.tools,
2645
+ disableTools: options.disableTools,
2646
+ toolFilter: options.toolFilter,
2647
+ excludeTools: options.excludeTools,
2648
+ maxSteps: options.maxSteps,
2649
+ toolChoice: options.toolChoice,
2650
+ prepareStep: options.prepareStep,
2651
+ enableAnalytics: options.enableAnalytics,
2652
+ enableEvaluation: options.enableEvaluation,
2653
+ context: options.context,
2654
+ evaluationDomain: options.evaluationDomain,
2655
+ toolUsageContext: options.toolUsageContext,
2656
+ input: options.input,
2657
+ region: options.region,
2658
+ tts: options.tts,
2659
+ fileRegistry: this.fileRegistry,
2660
+ abortSignal: options.abortSignal,
2661
+ skipToolPromptInjection: options.skipToolPromptInjection,
2662
+ middleware: options.middleware,
2663
+ conversationMessages: options.conversationMessages,
2664
+ };
2665
+ const extraContext = options;
2666
+ if (extraContext.sessionId || extraContext.userId) {
2667
+ baseOptions.context = {
2668
+ ...baseOptions.context,
2669
+ ...(extraContext.sessionId && !baseOptions.context?.sessionId
2670
+ ? { sessionId: extraContext.sessionId }
2671
+ : {}),
2672
+ ...(extraContext.userId && !baseOptions.context?.userId
2673
+ ? { userId: extraContext.userId }
2674
+ : {}),
2675
+ };
2676
+ }
2677
+ const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
2678
+ if (this.conversationMemory) {
2679
+ textOptions.conversationMemoryConfig = this.conversationMemory.config;
2680
+ textOptions.originalPrompt = originalPrompt;
2681
+ }
2682
+ const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
2683
+ if (enhancedPrompt !== textOptions.prompt) {
2684
+ textOptions.prompt = enhancedPrompt;
2685
+ logger.debug("Enhanced prompt with tool results", {
2686
+ originalLength: options.input.text.length,
2687
+ enhancedLength: enhancedPrompt.length,
2688
+ toolResults: toolResults.length,
2689
+ });
2690
+ }
2691
+ return textOptions;
2692
+ }
2693
+ finalizeGenerateRequestResult(params) {
2694
+ const { generateSpan, options, textOptions, textResult, factoryResult, originalPrompt, startTime, } = params;
2695
+ this.emitter.emit("generation:end", {
2696
+ provider: textResult.provider,
2697
+ responseTime: Date.now() - startTime,
2698
+ toolsUsed: textResult.toolsUsed,
2699
+ timestamp: Date.now(),
2700
+ result: textResult,
2701
+ prompt: options.input?.text || options.prompt,
2702
+ temperature: textOptions.temperature,
2703
+ maxTokens: textOptions.maxTokens,
2704
+ });
2705
+ this.emitter.emit("response:end", textResult.content || "");
2706
+ this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
2707
+ const generateResult = {
2708
+ content: textResult.content,
2709
+ finishReason: textResult.finishReason,
2710
+ provider: textResult.provider,
2711
+ model: textResult.model,
2712
+ usage: textResult.usage
2713
+ ? {
2714
+ input: textResult.usage.input || 0,
2715
+ output: textResult.usage.output || 0,
2716
+ total: textResult.usage.total || 0,
2667
2717
  }
2668
- finally {
2669
- this._disableToolCacheForCurrentRequest = false;
2670
- generateSpan.end();
2718
+ : undefined,
2719
+ responseTime: textResult.responseTime,
2720
+ toolsUsed: textResult.toolsUsed,
2721
+ toolExecutions: transformToolExecutions(textResult.toolExecutions),
2722
+ enhancedWithTools: textResult.enhancedWithTools,
2723
+ availableTools: transformAvailableTools(textResult.availableTools),
2724
+ analytics: textResult.analytics,
2725
+ imageOutput: textResult.imageOutput,
2726
+ evaluation: textResult.evaluation
2727
+ ? {
2728
+ ...textResult.evaluation,
2729
+ isOffTopic: textResult.evaluation.isOffTopic ?? false,
2730
+ alertSeverity: textResult.evaluation.alertSeverity ?? "none",
2731
+ reasoning: textResult.evaluation.reasoning ?? "No evaluation provided",
2732
+ evaluationModel: textResult.evaluation.evaluationModel ?? "unknown",
2733
+ evaluationTime: textResult.evaluation.evaluationTime ?? Date.now(),
2734
+ evaluationDomain: textResult.evaluation.evaluationDomain ??
2735
+ textOptions.evaluationDomain ??
2736
+ factoryResult.domainType,
2671
2737
  }
2672
- }); // end metricsTraceContextStorage.run
2673
- });
2738
+ : undefined,
2739
+ audio: textResult.audio,
2740
+ video: textResult.video,
2741
+ ppt: textResult.ppt,
2742
+ ...(textResult.retries && { retries: textResult.retries }),
2743
+ };
2744
+ if (generateResult.analytics?.cost && generateResult.analytics.cost > 0) {
2745
+ this._sessionCostUsd += generateResult.analytics.cost;
2746
+ }
2747
+ this.scheduleGenerateMemoryStorage(options, originalPrompt, generateResult);
2748
+ generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2749
+ generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2750
+ generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2751
+ generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2752
+ generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2753
+ generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2754
+ generateSpan.setAttribute("generate.retry_count", generateResult.retries?.count || 0);
2755
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2756
+ return generateResult;
2757
+ }
2758
+ emitGenerateErrorEvent(optionsOrPrompt, error) {
2759
+ const errProvider = typeof optionsOrPrompt === "object"
2760
+ ? optionsOrPrompt.provider || "unknown"
2761
+ : "unknown";
2762
+ const errModel = typeof optionsOrPrompt === "object"
2763
+ ? optionsOrPrompt.model || "unknown"
2764
+ : "unknown";
2765
+ try {
2766
+ this.emitter.emit("generation:end", {
2767
+ provider: errProvider,
2768
+ model: errModel,
2769
+ responseTime: 0,
2770
+ error: error instanceof Error ? error.message : String(error),
2771
+ success: false,
2772
+ });
2773
+ }
2774
+ catch (emitError) {
2775
+ void emitError;
2776
+ }
2674
2777
  }
2675
2778
  /**
2676
2779
  * Schedule non-blocking memory storage after generate completes.
@@ -2751,8 +2854,11 @@ Current user's request: ${currentInput}`;
2751
2854
  ?.filter((m) => m.role === "user" || m.role === "assistant")
2752
2855
  .map((m) => ({
2753
2856
  role: m.role,
2754
- content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
2755
- })) ?? options.conversationHistory,
2857
+ content: typeof m.content === "string"
2858
+ ? m.content
2859
+ : JSON.stringify(m.content),
2860
+ })) ??
2861
+ options.conversationHistory,
2756
2862
  timeout: options.timeout,
2757
2863
  verbose: false,
2758
2864
  metadata: options.context,
@@ -2762,8 +2868,10 @@ Current user's request: ${currentInput}`;
2762
2868
  // Primary output (backward compatible) - use the original best response
2763
2869
  content: workflowResult.content,
2764
2870
  // Provider info from selected response
2765
- provider: workflowResult.selectedResponse?.provider || workflowConfig.models[0]?.provider,
2766
- model: workflowResult.selectedResponse?.model || workflowConfig.models[0]?.model,
2871
+ provider: workflowResult.selectedResponse?.provider ||
2872
+ workflowConfig.models[0]?.provider,
2873
+ model: workflowResult.selectedResponse?.model ||
2874
+ workflowConfig.models[0]?.model,
2767
2875
  // Basic usage info
2768
2876
  usage: workflowResult.usage
2769
2877
  ? {
@@ -2845,8 +2953,11 @@ Current user's request: ${currentInput}`;
2845
2953
  ?.filter((m) => m.role === "user" || m.role === "assistant")
2846
2954
  .map((m) => ({
2847
2955
  role: m.role,
2848
- content: typeof m.content === "string" ? m.content : JSON.stringify(m.content),
2849
- })) ?? options.conversationHistory,
2956
+ content: typeof m.content === "string"
2957
+ ? m.content
2958
+ : JSON.stringify(m.content),
2959
+ })) ??
2960
+ options.conversationHistory,
2850
2961
  timeout: options.timeout,
2851
2962
  verbose: false,
2852
2963
  metadata: options.context,
@@ -2970,7 +3081,9 @@ Current user's request: ${currentInput}`;
2970
3081
  */
2971
3082
  async generateText(options) {
2972
3083
  // Validate required parameters for backward compatibility
2973
- if (!options.prompt || typeof options.prompt !== "string" || options.prompt.trim() === "") {
3084
+ if (!options.prompt ||
3085
+ typeof options.prompt !== "string" ||
3086
+ options.prompt.trim() === "") {
2974
3087
  throw new Error("GenerateText options must include prompt as a non-empty string");
2975
3088
  }
2976
3089
  // NL-004: Resolve model aliases/deprecations before processing
@@ -2989,239 +3102,247 @@ Current user's request: ${currentInput}`;
2989
3102
  * 5. Store conversation turn for future context
2990
3103
  */
2991
3104
  async generateTextInternal(options) {
2992
- return tracers.sdk.startActiveSpan("neurolink.generateTextInternal", { kind: SpanKind.INTERNAL }, async (internalSpan) => {
2993
- try {
2994
- const generateInternalId = `generate-internal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2995
- const existingRequestId = options.context?.requestId;
2996
- const requestId = typeof existingRequestId === "string" && existingRequestId
2997
- ? existingRequestId
2998
- : `req-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2999
- options.context = { ...options.context, requestId };
3000
- const generateInternalStartTime = Date.now();
3001
- const generateInternalHrTimeStart = process.hrtime.bigint();
3002
- const functionTag = "NeuroLink.generateTextInternal";
3003
- // Set span attributes for internal generation
3004
- internalSpan.setAttribute("neurolink.request_id", requestId);
3005
- internalSpan.setAttribute("neurolink.has_conversation_memory", !!this.conversationMemory);
3006
- internalSpan.setAttribute("neurolink.provider", options.provider || "auto");
3007
- internalSpan.setAttribute("neurolink.model", options.model || "default");
3008
- this.logGenerateTextInternalStart(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, options, functionTag);
3009
- this.emitGenerationStartEvents(options);
3010
- try {
3011
- await this.initializeConversationMemoryForGeneration(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart);
3012
- const mcpResult = await this.attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
3013
- if (mcpResult) {
3014
- logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS (MCP path)`, {
3015
- provider: mcpResult.provider,
3016
- model: mcpResult.model,
3017
- responseTimeMs: Date.now() - generateInternalStartTime,
3018
- tokensUsed: mcpResult.usage?.total || 0,
3019
- toolsUsed: mcpResult.toolsUsed?.length || 0,
3020
- ...(mcpResult.usage?.cacheCreationTokens !== undefined && {
3021
- cacheCreationTokens: mcpResult.usage.cacheCreationTokens,
3022
- }),
3023
- ...(mcpResult.usage?.cacheReadTokens !== undefined && {
3024
- cacheReadTokens: mcpResult.usage.cacheReadTokens,
3025
- }),
3026
- ...(mcpResult.usage?.cacheSavingsPercent !== undefined && {
3027
- cacheSavingsPercent: mcpResult.usage.cacheSavingsPercent,
3028
- }),
3029
- });
3030
- {
3031
- const memStoreStart = Date.now();
3032
- try {
3033
- await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
3034
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.OK);
3035
- }
3036
- catch (memErr) {
3037
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
3038
- }
3039
- }
3040
- this.emitter.emit("response:end", mcpResult.content || "");
3041
- internalSpan.setAttribute("neurolink.path", "mcp");
3042
- internalSpan.setAttribute("neurolink.tokens.input", mcpResult.usage?.input || 0);
3043
- internalSpan.setAttribute("neurolink.tokens.output", mcpResult.usage?.output || 0);
3044
- internalSpan.setAttribute("neurolink.result_provider", mcpResult.provider || "unknown");
3045
- internalSpan.setStatus({ code: SpanStatusCode.OK });
3046
- return mcpResult;
3047
- }
3048
- if (options.abortSignal?.aborted) {
3049
- throw new DOMException("The operation was aborted", "AbortError");
3050
- }
3051
- // Save original messages for smart overflow recovery (Solution 6)
3052
- // directProviderGeneration may compact messages; if provider still rejects,
3053
- // the catch block needs the originals for a more effective retry
3054
- if (this.conversationMemory) {
3055
- const originalMessages = await getConversationMessages(this.conversationMemory, options);
3056
- options._originalConversationMessages = originalMessages ? [...originalMessages] : undefined;
3057
- }
3058
- const directResult = await this.directProviderGeneration(options);
3059
- logger.debug(`[${functionTag}] Direct generation successful`);
3060
- logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS`, {
3061
- provider: directResult.provider,
3062
- model: directResult.model,
3063
- responseTimeMs: Date.now() - generateInternalStartTime,
3064
- tokensUsed: directResult.usage?.total || 0,
3065
- toolsUsed: directResult.toolsUsed?.length || 0,
3066
- ...(directResult.usage?.cacheCreationTokens !== undefined && {
3067
- cacheCreationTokens: directResult.usage.cacheCreationTokens,
3068
- }),
3069
- ...(directResult.usage?.cacheReadTokens !== undefined && {
3070
- cacheReadTokens: directResult.usage.cacheReadTokens,
3071
- }),
3072
- ...(directResult.usage?.cacheSavingsPercent !== undefined && {
3073
- cacheSavingsPercent: directResult.usage.cacheSavingsPercent,
3074
- }),
3075
- });
3076
- {
3077
- const memStoreStart = Date.now();
3078
- try {
3079
- await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
3080
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.OK);
3081
- }
3082
- catch (memErr) {
3083
- this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
3084
- }
3085
- }
3086
- this.emitter.emit("response:end", directResult.content || "");
3087
- this.emitter.emit("message", `Text generation completed successfully`);
3088
- internalSpan.setAttribute("neurolink.path", "direct");
3089
- internalSpan.setAttribute("neurolink.tokens.input", directResult.usage?.input || 0);
3090
- internalSpan.setAttribute("neurolink.tokens.output", directResult.usage?.output || 0);
3091
- internalSpan.setAttribute("neurolink.result_provider", directResult.provider || "unknown");
3092
- internalSpan.setStatus({ code: SpanStatusCode.OK });
3093
- return directResult;
3094
- }
3095
- catch (error) {
3096
- // Check if this is a context overflow error - attempt recovery
3097
- if (isContextOverflowError(error) && this.conversationMemory) {
3098
- logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
3099
- error: error instanceof Error ? error.message : String(error),
3100
- overflowProvider: getContextOverflowProvider(error),
3101
- });
3102
- try {
3103
- // IMPROVEMENT 1: Extract actual token count from provider error if available
3104
- const actualOverflow = parseProviderOverflowDetails(error);
3105
- // IMPROVEMENT 2: Use ORIGINAL messages (not already-compacted ones)
3106
- const originalMessages = options._originalConversationMessages ?? (await getConversationMessages(this.conversationMemory, options));
3107
- // IMPROVEMENT 3: Calculate precise reduction target
3108
- const recoveryBudget = checkContextBudget({
3109
- provider: options.provider || "openai",
3110
- model: options.model,
3111
- maxTokens: options.maxTokens,
3112
- currentPrompt: options.prompt,
3113
- systemPrompt: options.systemPrompt,
3114
- });
3115
- // Use provider's reported token count if available (more accurate than our estimate)
3116
- const actualTokens = actualOverflow?.actualTokens ?? recoveryBudget.estimatedInputTokens;
3117
- const budgetTokens = actualOverflow?.budgetTokens ?? recoveryBudget.availableInputTokens;
3118
- // Target = 70% of budget (aggressive safety margin for recovery)
3119
- const compactionTarget = Math.floor(budgetTokens * 0.7);
3120
- // IMPROVEMENT 4: Calculate adaptive truncation fraction from actual numbers
3121
- const requiredReduction = actualTokens > 0 ? (actualTokens - compactionTarget) / actualTokens : 0.5;
3122
- const compactor = new ContextCompactor({
3123
- enableSummarize: false, // Skip LLM call for recovery (speed)
3124
- enablePrune: true,
3125
- enableDeduplicate: true,
3126
- enableTruncate: true,
3127
- truncationFraction: Math.min(0.9, requiredReduction + 0.15),
3128
- });
3129
- const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3130
- if (compactionResult.compacted) {
3131
- const repairedResult = repairToolPairs(compactionResult.messages);
3132
- // IMPROVEMENT 5: Verify BEFORE retrying
3133
- const verifyBudget = checkContextBudget({
3134
- provider: options.provider || "openai",
3135
- model: options.model,
3136
- maxTokens: options.maxTokens,
3137
- systemPrompt: options.systemPrompt,
3138
- currentPrompt: options.prompt,
3139
- conversationMessages: repairedResult.messages,
3140
- });
3141
- if (!verifyBudget.withinBudget) {
3142
- logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
3143
- estimatedTokens: verifyBudget.estimatedInputTokens,
3144
- availableTokens: verifyBudget.availableInputTokens,
3145
- });
3146
- throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
3147
- `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
3148
- `but budget is ${budgetTokens} tokens.`, {
3149
- estimatedTokens: compactionResult.tokensAfter,
3150
- availableTokens: budgetTokens,
3151
- stagesUsed: compactionResult.stagesUsed,
3152
- breakdown: verifyBudget.breakdown,
3153
- });
3154
- }
3155
- logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
3156
- tokensSaved: compactionResult.tokensSaved,
3157
- compactionTarget,
3158
- verifiedTokens: verifyBudget.estimatedInputTokens,
3159
- verifiedBudget: verifyBudget.availableInputTokens,
3160
- });
3161
- // Single verified retry
3162
- return await this.directProviderGeneration({
3163
- ...options,
3164
- conversationMessages: repairedResult.messages,
3165
- });
3166
- }
3167
- }
3168
- catch (retryError) {
3169
- // If the retry error is our own ContextBudgetExceededError, re-throw it
3170
- if (retryError instanceof ContextBudgetExceededError) {
3171
- throw retryError;
3172
- }
3173
- logger.error(`[${functionTag}] Recovery attempt failed`, {
3174
- error: retryError instanceof Error ? retryError.message : String(retryError),
3175
- });
3176
- }
3177
- }
3178
- // If the generation was aborted (e.g., coding task short-circuit via AbortController),
3179
- // still store the conversation turn so that:
3180
- // 1. The Redis conversation entry is created (if first turn)
3181
- // 2. setImmediate triggers generateConversationTitle() for the session
3182
- // 3. The caller's syncTitleFromRedis() can find the SDK-generated title
3183
- if (isAbortError(error)) {
3184
- logger.info(`[${functionTag}] Generation aborted — storing conversation turn for title generation`, {
3185
- hasMemory: !!this.conversationMemory,
3186
- memoryType: this.conversationMemory?.constructor?.name || "NONE",
3187
- sessionId: options.context?.sessionId || "unknown",
3188
- });
3189
- try {
3190
- const abortedResult = {
3191
- content: "[generation was interrupted]",
3192
- provider: options.provider || "unknown",
3193
- model: options.model || "unknown",
3194
- responseTime: Date.now() - generateInternalStartTime,
3195
- };
3196
- await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(generateInternalStartTime), requestId), 5000);
3197
- }
3198
- catch (storeError) {
3199
- logger.warn(`[${functionTag}] Failed to store conversation turn after abort`, {
3200
- error: storeError instanceof Error ? storeError.message : String(storeError),
3201
- });
3202
- }
3203
- }
3204
- else {
3205
- logger.error(`[${functionTag}] All generation methods failed`, {
3206
- error: error instanceof Error ? error.message : String(error),
3207
- });
3208
- }
3209
- this.emitter.emit("response:end", "");
3210
- this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
3211
- throw error;
3212
- }
3213
- }
3214
- catch (spanError) {
3215
- internalSpan.setStatus({
3216
- code: SpanStatusCode.ERROR,
3217
- message: spanError instanceof Error ? spanError.message : String(spanError),
3105
+ return tracers.sdk.startActiveSpan("neurolink.generateTextInternal", { kind: SpanKind.INTERNAL }, (internalSpan) => this.executeGenerateTextInternalWithSpan(options, internalSpan));
3106
+ }
3107
+ async executeGenerateTextInternalWithSpan(options, internalSpan) {
3108
+ try {
3109
+ const context = this.initializeGenerateTextInternalContext(options);
3110
+ internalSpan.setAttribute("neurolink.request_id", context.requestId);
3111
+ internalSpan.setAttribute("neurolink.has_conversation_memory", !!this.conversationMemory);
3112
+ internalSpan.setAttribute("neurolink.provider", options.provider || "auto");
3113
+ internalSpan.setAttribute("neurolink.model", options.model || "default");
3114
+ this.logGenerateTextInternalStart(context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart, options, context.functionTag);
3115
+ this.emitGenerationStartEvents(options);
3116
+ return await this.runGenerateTextInternalFlow(options, internalSpan, context);
3117
+ }
3118
+ catch (error) {
3119
+ internalSpan.setStatus({
3120
+ code: SpanStatusCode.ERROR,
3121
+ message: error instanceof Error ? error.message : String(error),
3122
+ });
3123
+ throw error;
3124
+ }
3125
+ finally {
3126
+ internalSpan.end();
3127
+ }
3128
+ }
3129
+ initializeGenerateTextInternalContext(options) {
3130
+ const generateInternalId = `generate-internal-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3131
+ const existingRequestId = options.context?.requestId;
3132
+ const requestId = typeof existingRequestId === "string" && existingRequestId
3133
+ ? existingRequestId
3134
+ : `req-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3135
+ options.context = { ...options.context, requestId };
3136
+ return {
3137
+ generateInternalId,
3138
+ generateInternalStartTime: Date.now(),
3139
+ generateInternalHrTimeStart: process.hrtime.bigint(),
3140
+ functionTag: "NeuroLink.generateTextInternal",
3141
+ requestId,
3142
+ };
3143
+ }
3144
+ async runGenerateTextInternalFlow(options, internalSpan, context) {
3145
+ try {
3146
+ await this.initializeConversationMemoryForGeneration(context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart);
3147
+ const mcpResult = await this.attemptMCPGeneration(options, context.generateInternalId, context.generateInternalStartTime, context.generateInternalHrTimeStart, context.functionTag);
3148
+ if (mcpResult) {
3149
+ return this.finalizeGenerateTextInternalResult({
3150
+ path: "mcp",
3151
+ result: mcpResult,
3152
+ options,
3153
+ internalSpan,
3154
+ requestId: context.requestId,
3155
+ startTime: context.generateInternalStartTime,
3218
3156
  });
3219
- throw spanError;
3220
3157
  }
3221
- finally {
3222
- internalSpan.end();
3158
+ if (options.abortSignal?.aborted) {
3159
+ throw new DOMException("The operation was aborted", "AbortError");
3160
+ }
3161
+ await this.captureOriginalConversationMessagesForRecovery(options);
3162
+ const directResult = await this.directProviderGeneration(options);
3163
+ logger.debug(`[${context.functionTag}] Direct generation successful`);
3164
+ return this.finalizeGenerateTextInternalResult({
3165
+ path: "direct",
3166
+ result: directResult,
3167
+ options,
3168
+ internalSpan,
3169
+ requestId: context.requestId,
3170
+ startTime: context.generateInternalStartTime,
3171
+ });
3172
+ }
3173
+ catch (error) {
3174
+ const recoveredResult = await this.handleGenerateTextInternalFailure(options, context, error);
3175
+ if (recoveredResult) {
3176
+ return recoveredResult;
3223
3177
  }
3178
+ throw error;
3179
+ }
3180
+ }
3181
+ async captureOriginalConversationMessagesForRecovery(options) {
3182
+ if (!this.conversationMemory) {
3183
+ return;
3184
+ }
3185
+ const originalMessages = await getConversationMessages(this.conversationMemory, options);
3186
+ options._originalConversationMessages = originalMessages
3187
+ ? [...originalMessages]
3188
+ : undefined;
3189
+ }
3190
+ async finalizeGenerateTextInternalResult(params) {
3191
+ const { path, result, options, internalSpan, requestId, startTime } = params;
3192
+ logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS${path === "mcp" ? " (MCP path)" : ""}`, {
3193
+ provider: result.provider,
3194
+ model: result.model,
3195
+ responseTimeMs: Date.now() - startTime,
3196
+ tokensUsed: result.usage?.total || 0,
3197
+ toolsUsed: result.toolsUsed?.length || 0,
3198
+ ...(result.usage?.cacheCreationTokens !== undefined && {
3199
+ cacheCreationTokens: result.usage.cacheCreationTokens,
3200
+ }),
3201
+ ...(result.usage?.cacheReadTokens !== undefined && {
3202
+ cacheReadTokens: result.usage.cacheReadTokens,
3203
+ }),
3204
+ ...(result.usage?.cacheSavingsPercent !== undefined && {
3205
+ cacheSavingsPercent: result.usage.cacheSavingsPercent,
3206
+ }),
3224
3207
  });
3208
+ const memStoreStart = Date.now();
3209
+ try {
3210
+ await storeConversationTurn(this.conversationMemory, options, result, new Date(startTime), requestId);
3211
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": path }, Date.now() - memStoreStart, SpanStatus.OK);
3212
+ }
3213
+ catch (memoryError) {
3214
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": path }, Date.now() - memStoreStart, SpanStatus.ERROR, memoryError instanceof Error
3215
+ ? memoryError.message
3216
+ : String(memoryError));
3217
+ }
3218
+ this.emitter.emit("response:end", result.content || "");
3219
+ if (path === "direct") {
3220
+ this.emitter.emit("message", "Text generation completed successfully");
3221
+ }
3222
+ internalSpan.setAttribute("neurolink.path", path);
3223
+ internalSpan.setAttribute("neurolink.tokens.input", result.usage?.input || 0);
3224
+ internalSpan.setAttribute("neurolink.tokens.output", result.usage?.output || 0);
3225
+ internalSpan.setAttribute("neurolink.result_provider", result.provider || "unknown");
3226
+ internalSpan.setStatus({ code: SpanStatusCode.OK });
3227
+ return result;
3228
+ }
3229
+ async handleGenerateTextInternalFailure(options, context, error) {
3230
+ const recoveredResult = await this.tryRecoverGenerateTextOverflow(options, context.functionTag, error);
3231
+ if (recoveredResult) {
3232
+ return recoveredResult;
3233
+ }
3234
+ if (isAbortError(error)) {
3235
+ logger.info(`[${context.functionTag}] Generation aborted — storing conversation turn for title generation`, {
3236
+ hasMemory: !!this.conversationMemory,
3237
+ memoryType: this.conversationMemory?.constructor?.name || "NONE",
3238
+ sessionId: options.context?.sessionId ||
3239
+ "unknown",
3240
+ });
3241
+ try {
3242
+ const abortedResult = {
3243
+ content: "[generation was interrupted]",
3244
+ provider: options.provider || "unknown",
3245
+ model: options.model || "unknown",
3246
+ responseTime: Date.now() - context.generateInternalStartTime,
3247
+ };
3248
+ await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(context.generateInternalStartTime), context.requestId), 5000);
3249
+ }
3250
+ catch (storeError) {
3251
+ logger.warn(`[${context.functionTag}] Failed to store conversation turn after abort`, {
3252
+ error: storeError instanceof Error
3253
+ ? storeError.message
3254
+ : String(storeError),
3255
+ });
3256
+ }
3257
+ }
3258
+ else {
3259
+ logger.error(`[${context.functionTag}] All generation methods failed`, {
3260
+ error: error instanceof Error ? error.message : String(error),
3261
+ });
3262
+ }
3263
+ this.emitter.emit("response:end", "");
3264
+ this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
3265
+ return null;
3266
+ }
3267
+ async tryRecoverGenerateTextOverflow(options, functionTag, error) {
3268
+ if (!isContextOverflowError(error) || !this.conversationMemory) {
3269
+ return null;
3270
+ }
3271
+ logger.warn(`[${functionTag}] Context overflow detected by provider, attempting smart recovery`, {
3272
+ error: error instanceof Error ? error.message : String(error),
3273
+ overflowProvider: getContextOverflowProvider(error),
3274
+ });
3275
+ try {
3276
+ const actualOverflow = parseProviderOverflowDetails(error);
3277
+ const originalMessages = options._originalConversationMessages ??
3278
+ (await getConversationMessages(this.conversationMemory, options));
3279
+ const recoveryBudget = checkContextBudget({
3280
+ provider: options.provider || "openai",
3281
+ model: options.model,
3282
+ maxTokens: options.maxTokens,
3283
+ currentPrompt: options.prompt,
3284
+ systemPrompt: options.systemPrompt,
3285
+ });
3286
+ const actualTokens = actualOverflow?.actualTokens ?? recoveryBudget.estimatedInputTokens;
3287
+ const budgetTokens = actualOverflow?.budgetTokens ?? recoveryBudget.availableInputTokens;
3288
+ const compactionTarget = Math.floor(budgetTokens * 0.7);
3289
+ const requiredReduction = actualTokens > 0
3290
+ ? (actualTokens - compactionTarget) / actualTokens
3291
+ : 0.5;
3292
+ const compactor = new ContextCompactor({
3293
+ enableSummarize: false,
3294
+ enablePrune: true,
3295
+ enableDeduplicate: true,
3296
+ enableTruncate: true,
3297
+ truncationFraction: Math.min(0.9, requiredReduction + 0.15),
3298
+ });
3299
+ const compactionResult = await compactor.compact(originalMessages, compactionTarget, undefined, options.context?.requestId);
3300
+ if (!compactionResult.compacted) {
3301
+ return null;
3302
+ }
3303
+ const repairedResult = repairToolPairs(compactionResult.messages);
3304
+ const verifyBudget = checkContextBudget({
3305
+ provider: options.provider || "openai",
3306
+ model: options.model,
3307
+ maxTokens: options.maxTokens,
3308
+ systemPrompt: options.systemPrompt,
3309
+ currentPrompt: options.prompt,
3310
+ conversationMessages: repairedResult.messages,
3311
+ });
3312
+ if (!verifyBudget.withinBudget) {
3313
+ logger.error(`[${functionTag}] Recovery compaction insufficient, aborting retry`, {
3314
+ estimatedTokens: verifyBudget.estimatedInputTokens,
3315
+ availableTokens: verifyBudget.availableInputTokens,
3316
+ });
3317
+ throw new ContextBudgetExceededError(`Context overflow recovery failed. Provider rejected at ~${actualTokens} tokens, ` +
3318
+ `recovery compaction achieved ${compactionResult.tokensAfter} tokens ` +
3319
+ `but budget is ${budgetTokens} tokens.`, {
3320
+ estimatedTokens: compactionResult.tokensAfter,
3321
+ availableTokens: budgetTokens,
3322
+ stagesUsed: compactionResult.stagesUsed,
3323
+ breakdown: verifyBudget.breakdown,
3324
+ });
3325
+ }
3326
+ logger.info(`[${functionTag}] Smart recovery verified, retrying generation`, {
3327
+ tokensSaved: compactionResult.tokensSaved,
3328
+ compactionTarget,
3329
+ verifiedTokens: verifyBudget.estimatedInputTokens,
3330
+ verifiedBudget: verifyBudget.availableInputTokens,
3331
+ });
3332
+ return this.directProviderGeneration({
3333
+ ...options,
3334
+ conversationMessages: repairedResult.messages,
3335
+ });
3336
+ }
3337
+ catch (retryError) {
3338
+ if (retryError instanceof ContextBudgetExceededError) {
3339
+ throw retryError;
3340
+ }
3341
+ logger.error(`[${functionTag}] Recovery attempt failed`, {
3342
+ error: retryError instanceof Error ? retryError.message : String(retryError),
3343
+ });
3344
+ return null;
3345
+ }
3225
3346
  }
3226
3347
  /**
3227
3348
  * Log generateTextInternal start with comprehensive analysis
@@ -3294,7 +3415,8 @@ Current user's request: ${currentInput}`;
3294
3415
  * Attempt MCP generation with retry logic
3295
3416
  */
3296
3417
  async attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag) {
3297
- if (!options.disableTools && !(options.tts?.enabled && !options.tts?.useAiResponse)) {
3418
+ if (!options.disableTools &&
3419
+ !(options.tts?.enabled && !options.tts?.useAiResponse)) {
3298
3420
  return await this.performMCPGenerationRetries(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
3299
3421
  }
3300
3422
  return null;
@@ -3316,7 +3438,9 @@ Current user's request: ${currentInput}`;
3316
3438
  try {
3317
3439
  logger.debug(`[${functionTag}] Attempting MCP generation (attempt ${attempt}/${maxAttempts})...`);
3318
3440
  const mcpResult = await this.tryMCPGeneration(options);
3319
- if (mcpResult && (mcpResult.content || (mcpResult.toolExecutions && mcpResult.toolExecutions.length > 0))) {
3441
+ if (mcpResult &&
3442
+ (mcpResult.content ||
3443
+ (mcpResult.toolExecutions && mcpResult.toolExecutions.length > 0))) {
3320
3444
  logger.debug(`[${functionTag}] MCP generation successful on attempt ${attempt}`, {
3321
3445
  contentLength: mcpResult.content?.length || 0,
3322
3446
  toolsUsed: mcpResult.toolsUsed?.length || 0,
@@ -3347,7 +3471,11 @@ Current user's request: ${currentInput}`;
3347
3471
  // NL-007: Record retry error for observability
3348
3472
  retryCount++;
3349
3473
  const errMsg = error instanceof Error ? error.message : String(error);
3350
- const errCode = error instanceof NeuroLinkError ? error.code : error instanceof Error ? error.name : "UNKNOWN";
3474
+ const errCode = error instanceof NeuroLinkError
3475
+ ? error.code
3476
+ : error instanceof Error
3477
+ ? error.name
3478
+ : "UNKNOWN";
3351
3479
  retryErrors.push({ code: errCode, message: errMsg.substring(0, 500) });
3352
3480
  logger.debug(`[${functionTag}] MCP generation failed on attempt ${attempt}/${maxAttempts}`, {
3353
3481
  error: errMsg,
@@ -3366,8 +3494,11 @@ Current user's request: ${currentInput}`;
3366
3494
  const isNonRetryable = isContextOverflowError(error) ||
3367
3495
  isToolError ||
3368
3496
  isNonRetryableProviderError(error) ||
3369
- (error instanceof Error && error.isRetryable === false) ||
3370
- (error instanceof Error && error.statusCode === 400);
3497
+ (error instanceof Error &&
3498
+ error.isRetryable ===
3499
+ false) ||
3500
+ (error instanceof Error &&
3501
+ error.statusCode === 400);
3371
3502
  if (isNonRetryable) {
3372
3503
  logger.debug(`[${functionTag}] Non-retryable error detected, skipping remaining retries`);
3373
3504
  break;
@@ -3403,288 +3534,28 @@ Current user's request: ${currentInput}`;
3403
3534
  throw new DOMException("The operation was aborted", "AbortError");
3404
3535
  }
3405
3536
  // 🚀 EXHAUSTIVE LOGGING POINT T001: TRY MCP GENERATION ENTRY
3406
- const requestId = options.context?.requestId || "unknown";
3537
+ const requestId = options.context?.requestId ||
3538
+ "unknown";
3407
3539
  const tryMCPId = `try-mcp-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3408
3540
  const tryMCPStartTime = Date.now();
3409
3541
  const tryMCPHrTimeStart = process.hrtime.bigint();
3410
3542
  const functionTag = "NeuroLink.tryMCPGeneration";
3411
3543
  try {
3412
- // Initialize MCP if needed
3413
- await this.initializeMCP();
3414
- if (!this.mcpInitialized) {
3415
- logger.warn(`[NeuroLink] ⚠️ LOG_POINT_T004_MCP_NOT_AVAILABLE`, {
3416
- logPoint: "T004_MCP_NOT_AVAILABLE",
3417
- tryMCPId,
3418
- timestamp: new Date().toISOString(),
3419
- elapsedMs: Date.now() - tryMCPStartTime,
3420
- elapsedNs: (process.hrtime.bigint() - tryMCPHrTimeStart).toString(),
3421
- mcpInitialized: this.mcpInitialized,
3422
- mcpComponents: {
3423
- hasExternalServerManager: !!this.externalServerManager,
3424
- hasToolRegistry: !!this.toolRegistry,
3425
- hasProviderRegistry: !!AIProviderFactory,
3426
- },
3427
- fallbackReason: "MCP_NOT_INITIALIZED",
3428
- message: "MCP not available - returning null for fallback to direct generation",
3429
- });
3430
- return null; // Skip MCP if not available
3431
- }
3432
- // Context creation removed - was never used
3433
- // Determine provider
3434
- const providerName = options.provider === "auto" || !options.provider ? await getBestProvider() : options.provider;
3435
- // Get available tools
3436
- let availableTools = await this.getAllAvailableTools();
3437
- // NL-001: Filter out tools with OPEN circuit breakers
3438
- const { tools: circuitBreakerFilteredTools, unavailableTools } = this.toolRegistry.getAvailableTools(this.toolCircuitBreakers);
3439
- // Intersect: keep only tools that pass both getAllAvailableTools and circuit breaker filtering
3440
- const cbFilteredNames = new Set(circuitBreakerFilteredTools.map((t) => t.name));
3441
- availableTools = availableTools.filter((t) => cbFilteredNames.has(t.name));
3442
- // Apply per-call tool filtering for system prompt tool descriptions
3443
- availableTools = this.applyToolInfoFiltering(availableTools, options);
3444
- const targetTool = availableTools.find((t) => t.name.includes("SuccessRateSRByTime") || t.name.includes("juspay-analytics"));
3445
- logger.debug("Available tools for AI prompt generation", {
3446
- toolsCount: availableTools.length,
3447
- toolNames: availableTools.map((t) => t.name),
3448
- unavailableToolsCount: unavailableTools.length,
3449
- unavailableTools: unavailableTools,
3450
- hasTargetTool: !!targetTool,
3451
- targetToolDetails: targetTool
3452
- ? {
3453
- name: targetTool.name,
3454
- description: targetTool.description,
3455
- server: targetTool.server,
3456
- }
3457
- : null,
3458
- });
3459
- // NL-001: Inject system note about unavailable tools
3460
- let circuitBreakerNote = "";
3461
- if (unavailableTools.length > 0) {
3462
- circuitBreakerNote = `\n\nNOTE: The following tools are temporarily unavailable due to repeated failures: ${unavailableTools.join(", ")}. Do not attempt to call these tools.`;
3463
- }
3464
- // Create tool-aware system prompt (skip if skipToolPromptInjection is true)
3465
- const enhancedSystemPrompt = options.skipToolPromptInjection
3466
- ? (options.systemPrompt || "") + circuitBreakerNote
3467
- : this.createToolAwareSystemPrompt(options.systemPrompt, availableTools) + circuitBreakerNote;
3468
- logger.debug("Tool-aware system prompt created", {
3469
- requestId,
3470
- originalPromptLength: options.systemPrompt?.length || 0,
3471
- enhancedPromptLength: enhancedSystemPrompt.length,
3472
- skippedToolInjection: !!options.skipToolPromptInjection,
3473
- enhancedPromptPreview: enhancedSystemPrompt.substring(0, 80) + "...",
3474
- });
3475
- logger.debug("[Observability] System prompt metadata", {
3476
- requestId,
3477
- systemPromptLength: enhancedSystemPrompt.length,
3478
- systemPromptHash: enhancedSystemPrompt.length > 0 ? `sha256:${enhancedSystemPrompt.slice(0, 8)}...` : "empty",
3479
- hasCustomSystemPrompt: !!options.systemPrompt,
3480
- });
3481
- // Get conversation messages for context
3482
- let conversationMessages = await getConversationMessages(this.conversationMemory, options);
3483
- if (logger.shouldLog("debug")) {
3484
- try {
3485
- logger.debug("[Observability] Conversation history summary", {
3486
- requestId,
3487
- messageCount: conversationMessages?.length || 0,
3488
- messages: conversationMessages?.map((msg, i) => {
3489
- let contentLength;
3490
- if (typeof msg.content === "string") {
3491
- contentLength = msg.content.length;
3492
- }
3493
- else {
3494
- try {
3495
- contentLength = JSON.stringify(msg.content).length;
3496
- }
3497
- catch {
3498
- contentLength = 0;
3499
- }
3500
- }
3501
- return {
3502
- index: i,
3503
- role: msg.role,
3504
- contentLength,
3505
- contentPreview: typeof msg.content === "string" ? msg.content.substring(0, 200) : "[multimodal]",
3506
- };
3507
- }),
3508
- });
3509
- }
3510
- catch {
3511
- // Ignore serialization errors in debug logging
3512
- }
3544
+ const generationContext = await this.prepareMCPGenerationContext(options, requestId, tryMCPId, tryMCPStartTime, tryMCPHrTimeStart);
3545
+ if (!generationContext) {
3546
+ return null;
3513
3547
  }
3514
- logger.debug("[Observability] Available tools for LLM", {
3515
- requestId,
3516
- toolCount: availableTools?.length || 0,
3517
- toolNames: availableTools?.map((t) => t.name) || [],
3518
- });
3519
- // Pre-generation budget check
3520
- const budgetResult = checkContextBudget({
3521
- provider: providerName,
3522
- model: options.model,
3523
- maxTokens: options.maxTokens,
3524
- systemPrompt: enhancedSystemPrompt,
3525
- conversationMessages: conversationMessages,
3526
- currentPrompt: options.prompt,
3527
- toolDefinitions: availableTools,
3528
- });
3529
- logger.info("[TokenBudget] Token breakdown", {
3548
+ const conversationMessages = await this.ensureMCPGenerationBudget(options, requestId, generationContext.providerName, generationContext.enhancedSystemPrompt, generationContext.availableTools, generationContext.conversationMessages);
3549
+ return this.generateWithMCPProvider({
3550
+ options,
3530
3551
  requestId,
3531
- system: budgetResult.breakdown?.systemPrompt || 0,
3532
- history: budgetResult.breakdown?.conversationHistory || 0,
3533
- tools: budgetResult.breakdown?.toolDefinitions || 0,
3534
- currentPrompt: budgetResult.breakdown?.currentPrompt || 0,
3535
- files: budgetResult.breakdown?.fileAttachments || 0,
3536
- total: budgetResult.estimatedInputTokens,
3537
- budget: budgetResult.availableInputTokens,
3538
- usagePercent: Math.round(budgetResult.usageRatio * 1000) / 10,
3539
- conversationMessageCount: conversationMessages?.length || 0,
3540
- shouldCompact: budgetResult.shouldCompact,
3541
- });
3542
- const messageCount = conversationMessages?.length || 0;
3543
- const compactionSessionId = this.getCompactionSessionId(options);
3544
- if (budgetResult.shouldCompact &&
3545
- this.conversationMemory &&
3546
- messageCount > (this.lastCompactionMessageCount.get(compactionSessionId) ?? 0)) {
3547
- logger.info("[NeuroLink] Context budget exceeded, triggering auto-compaction", {
3548
- usageRatio: budgetResult.usageRatio,
3549
- estimatedTokens: budgetResult.estimatedInputTokens,
3550
- availableTokens: budgetResult.availableInputTokens,
3551
- });
3552
- const compactor = new ContextCompactor({
3553
- provider: providerName,
3554
- summarizationProvider: this.conversationMemoryConfig?.conversationMemory?.summarizationProvider,
3555
- summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
3556
- });
3557
- const compactionResult = await compactor.compact(conversationMessages, budgetResult.availableInputTokens, this.conversationMemoryConfig?.conversationMemory, requestId);
3558
- if (compactionResult.compacted) {
3559
- const repairedResult = repairToolPairs(compactionResult.messages);
3560
- conversationMessages = repairedResult.messages;
3561
- this.lastCompactionMessageCount.set(compactionSessionId, conversationMessages.length);
3562
- logger.info("[NeuroLink] Context compacted successfully", {
3563
- stagesUsed: compactionResult.stagesUsed,
3564
- tokensSaved: compactionResult.tokensSaved,
3565
- });
3566
- }
3567
- // POST-COMPACTION BUDGET RE-CHECK (BUG-003 fix)
3568
- const postCompactBudget = checkContextBudget({
3569
- provider: providerName,
3570
- model: options.model,
3571
- maxTokens: options.maxTokens,
3572
- systemPrompt: enhancedSystemPrompt,
3573
- conversationMessages: conversationMessages,
3574
- currentPrompt: options.prompt,
3575
- toolDefinitions: availableTools,
3576
- });
3577
- if (!postCompactBudget.withinBudget) {
3578
- const overageRatio = postCompactBudget.usageRatio - 1.0;
3579
- logger.warn("[NeuroLink] Post-compaction still over budget, attempting emergency content truncation", {
3580
- requestId,
3581
- estimatedTokens: postCompactBudget.estimatedInputTokens,
3582
- availableTokens: postCompactBudget.availableInputTokens,
3583
- overagePercent: Math.round(overageRatio * 100),
3584
- stagesUsedInCompaction: compactionResult.stagesUsed,
3585
- });
3586
- // Emergency: truncate the content of the longest messages
3587
- conversationMessages = emergencyContentTruncation(conversationMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
3588
- // Final check after emergency truncation
3589
- const finalBudget = checkContextBudget({
3590
- provider: providerName,
3591
- model: options.model,
3592
- maxTokens: options.maxTokens,
3593
- systemPrompt: enhancedSystemPrompt,
3594
- conversationMessages: conversationMessages,
3595
- currentPrompt: options.prompt,
3596
- toolDefinitions: availableTools,
3597
- });
3598
- if (!finalBudget.withinBudget) {
3599
- throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3600
- `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3601
- `Budget: ${finalBudget.availableInputTokens} tokens. ` +
3602
- `Conversation is too large to fit in the model's context window.`, {
3603
- estimatedTokens: finalBudget.estimatedInputTokens,
3604
- availableTokens: finalBudget.availableInputTokens,
3605
- stagesUsed: compactionResult.stagesUsed,
3606
- breakdown: finalBudget.breakdown,
3607
- });
3608
- }
3609
- }
3610
- }
3611
- // Create provider and generate (with confidence that context fits)
3612
- const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
3613
- this, // Pass SDK instance
3614
- options.region);
3615
- // Propagate trace context for parent-child span hierarchy
3616
- provider.setTraceContext(this._metricsTraceContext);
3617
- // ADD: Emit connection events for all providers (Bedrock-compatible)
3618
- this.emitter.emit("connected");
3619
- this.emitter.emit("message", `${providerName} provider initialized successfully`);
3620
- // Enable tool execution for the provider using BaseProvider method
3621
- provider.setupToolExecutor({
3622
- customTools: this.getCustomTools(),
3623
- executeTool: (toolName, params) => this.executeTool(toolName, params, {
3624
- disableToolCache: options.disableToolCache,
3625
- }),
3626
- }, functionTag);
3627
- logger.debug("[Observability] User input to LLM", {
3628
- requestId,
3629
- promptPreview: options.prompt?.substring(0, 200),
3630
- promptLength: options.prompt?.length || 0,
3631
- model: options.model,
3632
- maxTokens: options.maxTokens,
3633
- temperature: options.temperature,
3634
- maxSteps: options.maxSteps,
3635
- skipToolPromptInjection: options.skipToolPromptInjection,
3636
- });
3637
- const result = await provider.generate({
3638
- ...options,
3639
- systemPrompt: enhancedSystemPrompt,
3640
- conversationMessages, // Inject conversation history
3641
- });
3642
- const responseTime = Date.now() - tryMCPStartTime;
3643
- // Enhanced result validation - consider tool executions as valid results
3644
- const hasContent = result && result.content && result.content.trim().length > 0;
3645
- const hasToolExecutions = result && result.toolExecutions && result.toolExecutions.length > 0;
3646
- // Log detailed result analysis for debugging
3647
- mcpLogger.debug(`[${functionTag}] Result validation:`, {
3648
- hasResult: !!result,
3649
- hasContent,
3650
- hasToolExecutions,
3651
- contentLength: result?.content?.length || 0,
3652
- toolExecutionsCount: result?.toolExecutions?.length || 0,
3653
- toolsUsedCount: result?.toolsUsed?.length || 0,
3654
- });
3655
- // Accept result if it has content OR successful tool executions
3656
- if (!hasContent && !hasToolExecutions) {
3657
- mcpLogger.debug(`[${functionTag}] Result rejected: no content and no tool executions`);
3658
- return null; // Let caller fall back to direct generation
3659
- }
3660
- // Transform tool executions with enhanced preservation
3661
- const transformedToolExecutions = transformToolExecutionsForMCP(result.toolExecutions);
3662
- // Log transformation results
3663
- mcpLogger.debug(`[${functionTag}] Tool execution transformation:`, {
3664
- originalCount: result?.toolExecutions?.length || 0,
3665
- transformedCount: transformedToolExecutions.length,
3666
- transformedTools: transformedToolExecutions.map((te) => te.toolName),
3552
+ functionTag,
3553
+ tryMCPStartTime,
3554
+ providerName: generationContext.providerName,
3555
+ availableTools: generationContext.availableTools,
3556
+ enhancedSystemPrompt: generationContext.enhancedSystemPrompt,
3557
+ conversationMessages,
3667
3558
  });
3668
- // Return enhanced result with preserved tool information
3669
- return {
3670
- content: result.content || "", // Ensure content is never undefined
3671
- provider: providerName,
3672
- model: result.model,
3673
- usage: result.usage,
3674
- responseTime,
3675
- finishReason: result.finishReason,
3676
- toolsUsed: result.toolsUsed || [],
3677
- toolExecutions: transformedToolExecutions,
3678
- enhancedWithTools: Boolean(hasToolExecutions), // Mark as enhanced if tools were actually used
3679
- availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
3680
- audio: result.audio,
3681
- video: result.video,
3682
- ppt: result.ppt,
3683
- imageOutput: result.imageOutput,
3684
- // Include analytics and evaluation from BaseProvider
3685
- analytics: result.analytics,
3686
- evaluation: result.evaluation,
3687
- };
3688
3559
  }
3689
3560
  catch (error) {
3690
3561
  // Immediately propagate AbortError — never swallow aborted requests
@@ -3713,6 +3584,299 @@ Current user's request: ${currentInput}`;
3713
3584
  return null; // Let caller fall back
3714
3585
  }
3715
3586
  }
3587
+ async prepareMCPGenerationContext(options, requestId, tryMCPId, tryMCPStartTime, tryMCPHrTimeStart) {
3588
+ await this.initializeMCP();
3589
+ if (!this.mcpInitialized) {
3590
+ logger.warn(`[NeuroLink] ⚠️ LOG_POINT_T004_MCP_NOT_AVAILABLE`, {
3591
+ logPoint: "T004_MCP_NOT_AVAILABLE",
3592
+ tryMCPId,
3593
+ timestamp: new Date().toISOString(),
3594
+ elapsedMs: Date.now() - tryMCPStartTime,
3595
+ elapsedNs: (process.hrtime.bigint() - tryMCPHrTimeStart).toString(),
3596
+ mcpInitialized: this.mcpInitialized,
3597
+ mcpComponents: {
3598
+ hasExternalServerManager: !!this.externalServerManager,
3599
+ hasToolRegistry: !!this.toolRegistry,
3600
+ hasProviderRegistry: !!AIProviderFactory,
3601
+ },
3602
+ fallbackReason: "MCP_NOT_INITIALIZED",
3603
+ message: "MCP not available - returning null for fallback to direct generation",
3604
+ });
3605
+ return null;
3606
+ }
3607
+ const providerName = options.provider === "auto" || !options.provider
3608
+ ? await getBestProvider()
3609
+ : options.provider;
3610
+ let availableTools = await this.getAllAvailableTools();
3611
+ const { tools: circuitBreakerFilteredTools, unavailableTools } = this.toolRegistry.getAvailableTools(this.toolCircuitBreakers);
3612
+ const cbFilteredNames = new Set(circuitBreakerFilteredTools.map((tool) => tool.name));
3613
+ availableTools = availableTools.filter((tool) => cbFilteredNames.has(tool.name));
3614
+ availableTools = this.applyToolInfoFiltering(availableTools, options);
3615
+ const targetTool = availableTools.find((tool) => tool.name.includes("SuccessRateSRByTime") ||
3616
+ tool.name.includes("juspay-analytics"));
3617
+ logger.debug("Available tools for AI prompt generation", {
3618
+ toolsCount: availableTools.length,
3619
+ toolNames: availableTools.map((tool) => tool.name),
3620
+ unavailableToolsCount: unavailableTools.length,
3621
+ unavailableTools,
3622
+ hasTargetTool: !!targetTool,
3623
+ targetToolDetails: targetTool
3624
+ ? {
3625
+ name: targetTool.name,
3626
+ description: targetTool.description,
3627
+ server: targetTool.server,
3628
+ }
3629
+ : null,
3630
+ });
3631
+ const circuitBreakerNote = unavailableTools.length > 0
3632
+ ? `\n\nNOTE: The following tools are temporarily unavailable due to repeated failures: ${unavailableTools.join(", ")}. Do not attempt to call these tools.`
3633
+ : "";
3634
+ const enhancedSystemPrompt = options.skipToolPromptInjection
3635
+ ? (options.systemPrompt || "") + circuitBreakerNote
3636
+ : this.createToolAwareSystemPrompt(options.systemPrompt, availableTools) +
3637
+ circuitBreakerNote;
3638
+ logger.debug("Tool-aware system prompt created", {
3639
+ requestId,
3640
+ originalPromptLength: options.systemPrompt?.length || 0,
3641
+ enhancedPromptLength: enhancedSystemPrompt.length,
3642
+ skippedToolInjection: !!options.skipToolPromptInjection,
3643
+ enhancedPromptPreview: enhancedSystemPrompt.substring(0, 80) + "...",
3644
+ });
3645
+ logger.debug("[Observability] System prompt metadata", {
3646
+ requestId,
3647
+ systemPromptLength: enhancedSystemPrompt.length,
3648
+ systemPromptHash: enhancedSystemPrompt.length > 0
3649
+ ? `sha256:${enhancedSystemPrompt.slice(0, 8)}...`
3650
+ : "empty",
3651
+ hasCustomSystemPrompt: !!options.systemPrompt,
3652
+ });
3653
+ const conversationMessages = (await getConversationMessages(this.conversationMemory, options));
3654
+ this.logMCPConversationSummary(requestId, conversationMessages);
3655
+ logger.debug("[Observability] Available tools for LLM", {
3656
+ requestId,
3657
+ toolCount: availableTools.length,
3658
+ toolNames: availableTools.map((tool) => tool.name),
3659
+ });
3660
+ return {
3661
+ providerName,
3662
+ availableTools,
3663
+ enhancedSystemPrompt,
3664
+ conversationMessages,
3665
+ };
3666
+ }
3667
+ logMCPConversationSummary(requestId, conversationMessages) {
3668
+ if (!logger.shouldLog("debug")) {
3669
+ return;
3670
+ }
3671
+ try {
3672
+ logger.debug("[Observability] Conversation history summary", {
3673
+ requestId,
3674
+ messageCount: conversationMessages.length,
3675
+ messages: conversationMessages.map((message, index) => {
3676
+ let contentLength;
3677
+ if (typeof message.content === "string") {
3678
+ contentLength = message.content.length;
3679
+ }
3680
+ else {
3681
+ try {
3682
+ contentLength = JSON.stringify(message.content).length;
3683
+ }
3684
+ catch {
3685
+ contentLength = 0;
3686
+ }
3687
+ }
3688
+ return {
3689
+ index,
3690
+ role: message.role,
3691
+ contentLength,
3692
+ contentPreview: typeof message.content === "string"
3693
+ ? message.content.substring(0, 200)
3694
+ : "[multimodal]",
3695
+ };
3696
+ }),
3697
+ });
3698
+ }
3699
+ catch {
3700
+ // Ignore serialization errors in debug logging
3701
+ }
3702
+ }
3703
+ async ensureMCPGenerationBudget(options, requestId, providerName, enhancedSystemPrompt, availableTools, conversationMessages) {
3704
+ const budgetResult = checkContextBudget({
3705
+ provider: providerName,
3706
+ model: options.model,
3707
+ maxTokens: options.maxTokens,
3708
+ systemPrompt: enhancedSystemPrompt,
3709
+ conversationMessages: conversationMessages,
3710
+ currentPrompt: options.prompt,
3711
+ toolDefinitions: availableTools,
3712
+ });
3713
+ logger.info("[TokenBudget] Token breakdown", {
3714
+ requestId,
3715
+ system: budgetResult.breakdown?.systemPrompt || 0,
3716
+ history: budgetResult.breakdown?.conversationHistory || 0,
3717
+ tools: budgetResult.breakdown?.toolDefinitions || 0,
3718
+ currentPrompt: budgetResult.breakdown?.currentPrompt || 0,
3719
+ files: budgetResult.breakdown?.fileAttachments || 0,
3720
+ total: budgetResult.estimatedInputTokens,
3721
+ budget: budgetResult.availableInputTokens,
3722
+ usagePercent: Math.round(budgetResult.usageRatio * 1000) / 10,
3723
+ conversationMessageCount: conversationMessages.length,
3724
+ shouldCompact: budgetResult.shouldCompact,
3725
+ });
3726
+ const compactionSessionId = this.getCompactionSessionId(options);
3727
+ const lastCompactionCount = this.lastCompactionMessageCount.get(compactionSessionId) ?? 0;
3728
+ if (!budgetResult.shouldCompact ||
3729
+ !this.conversationMemory ||
3730
+ conversationMessages.length <= lastCompactionCount) {
3731
+ return conversationMessages;
3732
+ }
3733
+ return this.compactMCPConversationForBudget({
3734
+ options,
3735
+ requestId,
3736
+ providerName,
3737
+ enhancedSystemPrompt,
3738
+ availableTools,
3739
+ conversationMessages,
3740
+ availableInputTokens: budgetResult.availableInputTokens,
3741
+ usageRatio: budgetResult.usageRatio,
3742
+ estimatedInputTokens: budgetResult.estimatedInputTokens,
3743
+ compactionSessionId,
3744
+ });
3745
+ }
3746
+ async compactMCPConversationForBudget(context) {
3747
+ const { options, requestId, providerName, enhancedSystemPrompt, availableTools, conversationMessages, availableInputTokens, usageRatio, estimatedInputTokens, compactionSessionId, } = context;
3748
+ logger.info("[NeuroLink] Context budget exceeded, triggering auto-compaction", {
3749
+ usageRatio,
3750
+ estimatedTokens: estimatedInputTokens,
3751
+ availableTokens: availableInputTokens,
3752
+ });
3753
+ const compactor = new ContextCompactor({
3754
+ provider: providerName,
3755
+ summarizationProvider: this.conversationMemoryConfig?.conversationMemory
3756
+ ?.summarizationProvider,
3757
+ summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
3758
+ });
3759
+ const compactionResult = await compactor.compact(conversationMessages, availableInputTokens, this.conversationMemoryConfig?.conversationMemory, requestId);
3760
+ let compactedMessages = conversationMessages;
3761
+ if (compactionResult.compacted) {
3762
+ const repairedResult = repairToolPairs(compactionResult.messages);
3763
+ compactedMessages = repairedResult.messages;
3764
+ this.lastCompactionMessageCount.set(compactionSessionId, compactedMessages.length);
3765
+ logger.info("[NeuroLink] Context compacted successfully", {
3766
+ stagesUsed: compactionResult.stagesUsed,
3767
+ tokensSaved: compactionResult.tokensSaved,
3768
+ });
3769
+ }
3770
+ const postCompactBudget = checkContextBudget({
3771
+ provider: providerName,
3772
+ model: options.model,
3773
+ maxTokens: options.maxTokens,
3774
+ systemPrompt: enhancedSystemPrompt,
3775
+ conversationMessages: compactedMessages,
3776
+ currentPrompt: options.prompt,
3777
+ toolDefinitions: availableTools,
3778
+ });
3779
+ if (postCompactBudget.withinBudget) {
3780
+ return compactedMessages;
3781
+ }
3782
+ const overageRatio = postCompactBudget.usageRatio - 1.0;
3783
+ logger.warn("[NeuroLink] Post-compaction still over budget, attempting emergency content truncation", {
3784
+ requestId,
3785
+ estimatedTokens: postCompactBudget.estimatedInputTokens,
3786
+ availableTokens: postCompactBudget.availableInputTokens,
3787
+ overagePercent: Math.round(overageRatio * 100),
3788
+ stagesUsedInCompaction: compactionResult.stagesUsed,
3789
+ });
3790
+ compactedMessages = emergencyContentTruncation(compactedMessages, postCompactBudget.availableInputTokens, postCompactBudget.breakdown, providerName);
3791
+ const finalBudget = checkContextBudget({
3792
+ provider: providerName,
3793
+ model: options.model,
3794
+ maxTokens: options.maxTokens,
3795
+ systemPrompt: enhancedSystemPrompt,
3796
+ conversationMessages: compactedMessages,
3797
+ currentPrompt: options.prompt,
3798
+ toolDefinitions: availableTools,
3799
+ });
3800
+ if (!finalBudget.withinBudget) {
3801
+ throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3802
+ `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3803
+ `Budget: ${finalBudget.availableInputTokens} tokens. ` +
3804
+ `Conversation is too large to fit in the model's context window.`, {
3805
+ estimatedTokens: finalBudget.estimatedInputTokens,
3806
+ availableTokens: finalBudget.availableInputTokens,
3807
+ stagesUsed: compactionResult.stagesUsed,
3808
+ breakdown: finalBudget.breakdown,
3809
+ });
3810
+ }
3811
+ return compactedMessages;
3812
+ }
3813
+ async generateWithMCPProvider(context) {
3814
+ const { options, requestId, functionTag, tryMCPStartTime, providerName, availableTools, enhancedSystemPrompt, conversationMessages, } = context;
3815
+ const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, this, options.region);
3816
+ provider.setTraceContext(this._metricsTraceContext);
3817
+ this.emitter.emit("connected");
3818
+ this.emitter.emit("message", `${providerName} provider initialized successfully`);
3819
+ provider.setupToolExecutor({
3820
+ customTools: this.getCustomTools(),
3821
+ executeTool: (toolName, params) => this.executeTool(toolName, params, {
3822
+ disableToolCache: options.disableToolCache,
3823
+ }),
3824
+ }, functionTag);
3825
+ logger.debug("[Observability] User input to LLM", {
3826
+ requestId,
3827
+ promptPreview: options.prompt?.substring(0, 200),
3828
+ promptLength: options.prompt?.length || 0,
3829
+ model: options.model,
3830
+ maxTokens: options.maxTokens,
3831
+ temperature: options.temperature,
3832
+ maxSteps: options.maxSteps,
3833
+ skipToolPromptInjection: options.skipToolPromptInjection,
3834
+ });
3835
+ const result = await provider.generate({
3836
+ ...options,
3837
+ systemPrompt: enhancedSystemPrompt,
3838
+ conversationMessages,
3839
+ });
3840
+ const responseTime = Date.now() - tryMCPStartTime;
3841
+ const hasContent = !!(result?.content && result.content.trim().length > 0);
3842
+ const hasToolExecutions = !!(result?.toolExecutions && result.toolExecutions.length > 0);
3843
+ mcpLogger.debug(`[${functionTag}] Result validation:`, {
3844
+ hasResult: !!result,
3845
+ hasContent,
3846
+ hasToolExecutions,
3847
+ contentLength: result?.content?.length || 0,
3848
+ toolExecutionsCount: result?.toolExecutions?.length || 0,
3849
+ toolsUsedCount: result?.toolsUsed?.length || 0,
3850
+ });
3851
+ if (!hasContent && !hasToolExecutions) {
3852
+ mcpLogger.debug(`[${functionTag}] Result rejected: no content and no tool executions`);
3853
+ return null;
3854
+ }
3855
+ const transformedToolExecutions = transformToolExecutionsForMCP(result.toolExecutions);
3856
+ mcpLogger.debug(`[${functionTag}] Tool execution transformation:`, {
3857
+ originalCount: result?.toolExecutions?.length || 0,
3858
+ transformedCount: transformedToolExecutions.length,
3859
+ transformedTools: transformedToolExecutions.map((te) => te.toolName),
3860
+ });
3861
+ return {
3862
+ content: result.content || "",
3863
+ provider: providerName,
3864
+ model: result.model,
3865
+ usage: result.usage,
3866
+ responseTime,
3867
+ finishReason: result.finishReason,
3868
+ toolsUsed: result.toolsUsed || [],
3869
+ toolExecutions: transformedToolExecutions,
3870
+ enhancedWithTools: Boolean(hasToolExecutions),
3871
+ availableTools: transformToolsForMCP(transformToolsToExpectedFormat(availableTools)),
3872
+ audio: result.audio,
3873
+ video: result.video,
3874
+ ppt: result.ppt,
3875
+ imageOutput: result.imageOutput,
3876
+ analytics: result.analytics,
3877
+ evaluation: result.evaluation,
3878
+ };
3879
+ }
3716
3880
  /**
3717
3881
  * Direct provider generation (no MCP, no recursion)
3718
3882
  */
@@ -3732,12 +3896,18 @@ Current user's request: ${currentInput}`;
3732
3896
  ];
3733
3897
  const requestedProvider = options.provider === "auto" ? undefined : options.provider;
3734
3898
  // Check for orchestrated preferred provider in context
3735
- const preferredOrchestrated = options.context && typeof options.context === "object" && "__orchestratedPreferredProvider" in options.context
3736
- ? options.context.__orchestratedPreferredProvider
3899
+ const preferredOrchestrated = options.context &&
3900
+ typeof options.context === "object" &&
3901
+ "__orchestratedPreferredProvider" in options.context
3902
+ ? options.context
3903
+ .__orchestratedPreferredProvider
3737
3904
  : undefined;
3738
3905
  // Build provider list with orchestrated preference first, then fallback to full list
3739
3906
  const tryProviders = preferredOrchestrated
3740
- ? [preferredOrchestrated, ...providerPriority.filter((p) => p !== preferredOrchestrated)]
3907
+ ? [
3908
+ preferredOrchestrated,
3909
+ ...providerPriority.filter((p) => p !== preferredOrchestrated),
3910
+ ]
3741
3911
  : requestedProvider
3742
3912
  ? [requestedProvider]
3743
3913
  : providerPriority;
@@ -3757,7 +3927,8 @@ Current user's request: ${currentInput}`;
3757
3927
  logger.debug(`[${functionTag}] Attempting provider: ${providerName}`);
3758
3928
  // Get conversation messages for context (use pre-compacted if provided)
3759
3929
  const optionsWithMessages = options;
3760
- let conversationMessages = optionsWithMessages.conversationMessages?.length
3930
+ let conversationMessages = optionsWithMessages.conversationMessages
3931
+ ?.length
3761
3932
  ? optionsWithMessages.conversationMessages
3762
3933
  : await getConversationMessages(this.conversationMemory, options);
3763
3934
  // Pre-generation budget check
@@ -3768,17 +3939,22 @@ Current user's request: ${currentInput}`;
3768
3939
  systemPrompt: options.systemPrompt,
3769
3940
  conversationMessages: conversationMessages,
3770
3941
  currentPrompt: options.prompt,
3771
- toolDefinitions: options.tools ? Object.values(options.tools) : undefined,
3942
+ toolDefinitions: options.tools
3943
+ ? Object.values(options.tools)
3944
+ : undefined,
3772
3945
  });
3773
3946
  const dpgMessageCount = conversationMessages?.length || 0;
3774
3947
  const dpgCompactionSessionId = this.getCompactionSessionId(options);
3775
3948
  if (budgetCheck.shouldCompact &&
3776
3949
  this.conversationMemory &&
3777
- dpgMessageCount > (this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
3950
+ dpgMessageCount >
3951
+ (this.lastCompactionMessageCount.get(dpgCompactionSessionId) ?? 0)) {
3778
3952
  const compactor = new ContextCompactor({
3779
3953
  provider: providerName,
3780
- summarizationProvider: this.conversationMemoryConfig?.conversationMemory?.summarizationProvider,
3781
- summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
3954
+ summarizationProvider: this.conversationMemoryConfig?.conversationMemory
3955
+ ?.summarizationProvider,
3956
+ summarizationModel: this.conversationMemoryConfig?.conversationMemory
3957
+ ?.summarizationModel,
3782
3958
  });
3783
3959
  const compactionResult = await compactor.compact(conversationMessages, budgetCheck.availableInputTokens, this.conversationMemoryConfig?.conversationMemory, options.context?.requestId);
3784
3960
  if (compactionResult.compacted) {
@@ -3794,7 +3970,9 @@ Current user's request: ${currentInput}`;
3794
3970
  systemPrompt: options.systemPrompt,
3795
3971
  conversationMessages: conversationMessages,
3796
3972
  currentPrompt: options.prompt,
3797
- toolDefinitions: options.tools ? Object.values(options.tools) : undefined,
3973
+ toolDefinitions: options.tools
3974
+ ? Object.values(options.tools)
3975
+ : undefined,
3798
3976
  });
3799
3977
  if (!postCompactBudget.withinBudget) {
3800
3978
  logger.warn("[NeuroLink] directProviderGeneration: post-compaction still over budget, emergency truncation", {
@@ -3810,7 +3988,9 @@ Current user's request: ${currentInput}`;
3810
3988
  systemPrompt: options.systemPrompt,
3811
3989
  conversationMessages: conversationMessages,
3812
3990
  currentPrompt: options.prompt,
3813
- toolDefinitions: options.tools ? Object.values(options.tools) : undefined,
3991
+ toolDefinitions: options.tools
3992
+ ? Object.values(options.tools)
3993
+ : undefined,
3814
3994
  });
3815
3995
  if (!finalBudget.withinBudget) {
3816
3996
  throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
@@ -4064,367 +4244,285 @@ Current user's request: ${currentInput}`;
4064
4244
  * @throws {Error} When conversation memory operations fail (if enabled)
4065
4245
  */
4066
4246
  async stream(options) {
4067
- // Shallow-copy caller's object to avoid mutating their original reference
4068
- options = { ...options };
4069
- // Set metrics trace context for parent-child span linking
4070
- const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
4071
- const metricsParentSpanId = crypto.randomUUID().replace(/-/g, "").substring(0, 16);
4072
- // Scope trace context to this request via AsyncLocalStorage
4073
- // so concurrent generate/stream calls don't race.
4074
- return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsParentSpanId }, async () => {
4075
- // Manual span lifecycle: the span must stay open until the stream is fully consumed,
4076
- // NOT when the StreamResult object is returned. withSpan would end the span too early
4077
- // because streaming results resolve lazily via the async generator.
4078
- const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
4079
- kind: SpanKind.INTERNAL,
4080
- attributes: {
4081
- [ATTR.NL_PROVIDER]: options.provider || "default",
4082
- [ATTR.GEN_AI_MODEL]: options.model || "default",
4083
- [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
4084
- [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
4085
- [ATTR.NL_STREAM_MODE]: true,
4086
- },
4247
+ return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeStreamRequest({ ...options }));
4248
+ }
4249
+ async executeStreamRequest(options) {
4250
+ const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
4251
+ kind: SpanKind.INTERNAL,
4252
+ attributes: {
4253
+ [ATTR.NL_PROVIDER]: options.provider || "default",
4254
+ [ATTR.GEN_AI_MODEL]: options.model || "default",
4255
+ [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
4256
+ [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
4257
+ [ATTR.NL_STREAM_MODE]: true,
4258
+ },
4259
+ });
4260
+ const spanStartTime = Date.now();
4261
+ this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
4262
+ try {
4263
+ options.model = resolveModel(options.model, this.modelAliasConfig);
4264
+ const startTime = Date.now();
4265
+ const hrTimeStart = process.hrtime.bigint();
4266
+ const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
4267
+ const originalPrompt = options.input.text;
4268
+ options.fileRegistry = this.fileRegistry;
4269
+ await this.validateStreamRequestOptions(options, startTime);
4270
+ const workflowResult = await this.maybeHandleWorkflowStreamRequest({
4271
+ options,
4272
+ startTime,
4273
+ streamSpan,
4274
+ spanStartTime,
4087
4275
  });
4088
- const spanStartTime = Date.now();
4089
- // MCP Enhancement: propagate disableToolCache to tool execution
4090
- this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
4276
+ if (workflowResult) {
4277
+ return workflowResult;
4278
+ }
4279
+ return this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
4280
+ options,
4281
+ streamSpan,
4282
+ spanStartTime,
4283
+ startTime,
4284
+ hrTimeStart,
4285
+ streamId,
4286
+ originalPrompt,
4287
+ }));
4288
+ }
4289
+ catch (error) {
4290
+ streamSpan.setStatus({
4291
+ code: SpanStatusCode.ERROR,
4292
+ message: error instanceof Error ? error.message : String(error),
4293
+ });
4294
+ if (error instanceof Error) {
4295
+ streamSpan.recordException(error);
4296
+ }
4297
+ streamSpan.end();
4298
+ throw error;
4299
+ }
4300
+ }
4301
+ async validateStreamRequestOptions(options, startTime) {
4302
+ await this.validateStreamInput(options);
4303
+ this.enforceSessionBudget(options.maxBudgetUsd);
4304
+ await this.applyAuthenticatedRequestContext(options);
4305
+ this.emitStreamStartEvents(options, startTime);
4306
+ this.applyStreamLifecycleMiddleware(options);
4307
+ }
4308
+ async maybeHandleWorkflowStreamRequest(params) {
4309
+ if (!params.options.workflow && !params.options.workflowConfig) {
4310
+ return null;
4311
+ }
4312
+ const result = await this.streamWithWorkflow(params.options, params.startTime);
4313
+ const originalWorkflowStream = result.stream;
4314
+ const self = this;
4315
+ result.stream = (async function* () {
4091
4316
  try {
4092
- // NL-004: Resolve model aliases/deprecations before processing
4093
- options.model = resolveModel(options.model, this.modelAliasConfig);
4094
- const startTime = Date.now();
4095
- const hrTimeStart = process.hrtime.bigint();
4096
- const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
4097
- const originalPrompt = options.input.text; // Store the original prompt for memory storage
4098
- // Inject file registry for lazy on-demand file processing
4099
- options.fileRegistry = this.fileRegistry;
4100
- await this.validateStreamInput(options);
4101
- // Check budget limit before making API call
4102
- if (options.maxBudgetUsd !== undefined &&
4103
- options.maxBudgetUsd > 0 &&
4104
- this._sessionCostUsd >= options.maxBudgetUsd) {
4105
- throw new NeuroLinkError({
4106
- code: "SESSION_BUDGET_EXCEEDED",
4107
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
4108
- category: ErrorCategory.VALIDATION,
4109
- severity: ErrorSeverity.HIGH,
4110
- retriable: false,
4111
- context: {
4112
- spent: this._sessionCostUsd,
4113
- limit: options.maxBudgetUsd,
4114
- },
4115
- });
4317
+ for await (const chunk of originalWorkflowStream) {
4318
+ yield chunk;
4116
4319
  }
4117
- // Handle per-call auth token validation
4118
- if (options.auth?.token) {
4119
- const { AuthError } = await import("./auth/errors.js");
4120
- await this.ensureAuthProvider();
4121
- if (!this.authProvider) {
4122
- throw AuthError.create("PROVIDER_ERROR", "No auth provider configured. Set auth in constructor or via setAuthProvider() before using auth: { token }.");
4123
- }
4124
- let authResult;
4125
- try {
4126
- authResult = await withTimeout(this.authProvider.authenticateToken(options.auth.token), 5000, AuthError.create("PROVIDER_ERROR", "Auth token validation timed out after 5000ms"));
4127
- }
4128
- catch (err) {
4129
- // Rethrow auth errors as-is; wrap anything else
4130
- if (err instanceof Error && "feature" in err && err.feature === "Auth") {
4131
- throw err;
4320
+ params.streamSpan.setStatus({ code: SpanStatusCode.OK });
4321
+ }
4322
+ catch (error) {
4323
+ params.streamSpan.setStatus({
4324
+ code: SpanStatusCode.ERROR,
4325
+ message: error instanceof Error ? error.message : String(error),
4326
+ });
4327
+ throw error;
4328
+ }
4329
+ finally {
4330
+ self._disableToolCacheForCurrentRequest = false;
4331
+ params.streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - params.spanStartTime);
4332
+ params.streamSpan.end();
4333
+ }
4334
+ })();
4335
+ return result;
4336
+ }
4337
+ async runStandardStreamRequest(params) {
4338
+ const { options, streamSpan, spanStartTime, startTime, hrTimeStart, streamId, originalPrompt, } = params;
4339
+ try {
4340
+ const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
4341
+ const { stream: mcpStream, provider: providerName, usage: streamUsage, model: streamModel, finishReason: streamFinishReason, toolCalls: streamToolCalls, toolResults: streamToolResults, analytics: streamAnalytics, } = await this.createMCPStream(enhancedOptions);
4342
+ const streamState = {
4343
+ finishReason: streamFinishReason ?? "stop",
4344
+ toolCalls: streamToolCalls,
4345
+ toolResults: streamToolResults,
4346
+ };
4347
+ streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
4348
+ let accumulatedContent = "";
4349
+ let chunkCount = 0;
4350
+ const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
4351
+ const metadata = {
4352
+ fallbackAttempted: false,
4353
+ guardrailsBlocked: false,
4354
+ error: undefined,
4355
+ fallbackProvider: undefined,
4356
+ fallbackModel: undefined,
4357
+ };
4358
+ const self = this;
4359
+ const streamStartTime = Date.now();
4360
+ const sessionId = enhancedOptions.context
4361
+ ?.sessionId;
4362
+ const processedStream = (async function* () {
4363
+ let streamError;
4364
+ try {
4365
+ for await (const chunk of mcpStream) {
4366
+ chunkCount++;
4367
+ if (chunk &&
4368
+ "content" in chunk &&
4369
+ typeof chunk.content === "string") {
4370
+ accumulatedContent += chunk.content;
4371
+ self.emitter.emit("response:chunk", chunk.content);
4372
+ self.emitter.emit("stream:chunk", {
4373
+ type: "stream:chunk",
4374
+ content: chunk.content,
4375
+ metadata: {
4376
+ chunkIndex: chunkCount,
4377
+ totalLength: accumulatedContent.length,
4378
+ },
4379
+ timestamp: Date.now(),
4380
+ });
4132
4381
  }
4133
- throw AuthError.create("PROVIDER_ERROR", `Auth token validation failed: ${err instanceof Error ? err.message : String(err)}`);
4134
- }
4135
- if (!authResult.valid) {
4136
- throw AuthError.create("INVALID_TOKEN", authResult.error || "Token validation failed");
4382
+ yield chunk;
4137
4383
  }
4138
- // Fail closed: token valid but no user identity is a provider bug
4139
- if (!authResult.user) {
4140
- throw AuthError.create("INVALID_TOKEN", "Token validated but no user identity returned");
4141
- }
4142
- if (!authResult.user.id) {
4143
- throw AuthError.create("INVALID_TOKEN", "Token validated but user identity missing required 'id' field");
4384
+ if (chunkCount === 0 &&
4385
+ !metadata.fallbackAttempted &&
4386
+ !enhancedOptions.disableInternalFallback &&
4387
+ streamState.toolCalls.length === 0 &&
4388
+ streamState.toolResults.length === 0) {
4389
+ yield* self.handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, (content) => {
4390
+ accumulatedContent += content;
4391
+ });
4144
4392
  }
4145
- // Merge validated user into context
4146
- options.context = {
4147
- ...(options.context || {}),
4148
- userId: authResult.user.id,
4149
- userEmail: authResult.user.email,
4150
- userRoles: authResult.user.roles,
4151
- };
4152
- }
4153
- // Handle pre-validated requestContext
4154
- if (options.requestContext) {
4155
- // When auth token was validated, token-derived identity fields
4156
- // MUST take precedence over requestContext to prevent privilege escalation.
4157
- const tokenDerivedFields = options.auth?.token && this.authProvider
4158
- ? {
4159
- userId: options.context?.userId,
4160
- userEmail: options.context?.userEmail,
4161
- userRoles: options.context?.userRoles,
4162
- }
4163
- : {};
4164
- options.context = {
4165
- ...(options.context || {}),
4166
- ...options.requestContext,
4167
- ...tokenDerivedFields,
4168
- };
4169
- }
4170
- this.emitStreamStartEvents(options, startTime);
4171
- // Auto-inject lifecycle middleware when callbacks are provided
4172
- // (must happen before workflow early return so that path gets middleware too)
4173
- if (options.onFinish || options.onError || options.onChunk) {
4174
- options.middleware = {
4175
- ...options.middleware,
4176
- middlewareConfig: {
4177
- ...options.middleware?.middlewareConfig,
4178
- lifecycle: {
4179
- ...options.middleware?.middlewareConfig?.lifecycle,
4180
- enabled: true,
4181
- config: {
4182
- ...options.middleware?.middlewareConfig?.lifecycle?.config,
4183
- ...(options.onFinish !== undefined ? { onFinish: options.onFinish } : {}),
4184
- ...(options.onError !== undefined ? { onError: options.onError } : {}),
4185
- ...(options.onChunk !== undefined ? { onChunk: options.onChunk } : {}),
4186
- },
4187
- },
4188
- },
4189
- };
4190
- }
4191
- // Check if workflow is requested
4192
- if (options.workflow || options.workflowConfig) {
4193
- const result = await this.streamWithWorkflow(options, startTime);
4194
- // Wrap the workflow stream so the span stays open until fully consumed
4195
- const originalWorkflowStream = result.stream;
4196
- const selfWorkflow = this;
4197
- result.stream = (async function* () {
4393
+ let resolvedUsage = streamUsage;
4394
+ if (!resolvedUsage && streamAnalytics) {
4198
4395
  try {
4199
- for await (const chunk of originalWorkflowStream) {
4200
- yield chunk;
4396
+ const resolved = await Promise.resolve(streamAnalytics);
4397
+ if (resolved?.tokenUsage) {
4398
+ resolvedUsage = resolved.tokenUsage;
4201
4399
  }
4202
- streamSpan.setStatus({ code: SpanStatusCode.OK });
4203
- }
4204
- catch (error) {
4205
- streamSpan.setStatus({
4206
- code: SpanStatusCode.ERROR,
4207
- message: error instanceof Error ? error.message : String(error),
4208
- });
4209
- throw error;
4210
4400
  }
4211
- finally {
4212
- selfWorkflow._disableToolCacheForCurrentRequest = false;
4213
- streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4214
- streamSpan.end();
4401
+ catch {
4402
+ // non-blocking
4215
4403
  }
4216
- })();
4217
- return result;
4404
+ }
4405
+ self.emitter.emit("stream:complete", {
4406
+ type: "stream:complete",
4407
+ content: accumulatedContent,
4408
+ provider: metadata.fallbackProvider ?? providerName,
4409
+ model: metadata.fallbackModel ?? streamModel ?? enhancedOptions.model,
4410
+ prompt: enhancedOptions.input?.text ||
4411
+ enhancedOptions.prompt,
4412
+ metadata: {
4413
+ chunkCount,
4414
+ totalLength: accumulatedContent.length,
4415
+ durationMs: Date.now() - streamStartTime,
4416
+ sessionId,
4417
+ usage: resolvedUsage,
4418
+ ...(metadata.fallbackAttempted && {
4419
+ primaryProvider: providerName,
4420
+ primaryModel: enhancedOptions.model,
4421
+ fallback: true,
4422
+ }),
4423
+ },
4424
+ timestamp: Date.now(),
4425
+ });
4218
4426
  }
4219
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
4220
- return await this.setLangfuseContextFromOptions(options, async () => {
4221
- try {
4222
- // Prepare options: init memory, MCP, orchestration, Ollama auto-disable, tool detection
4223
- const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
4224
- const { stream: mcpStream, provider: providerName, usage: streamUsage, model: streamModel, finishReason: streamFinishReason, toolCalls: streamToolCalls, toolResults: streamToolResults, analytics: streamAnalytics, } = await this.createMCPStream(enhancedOptions);
4225
- const streamState = {
4226
- finishReason: streamFinishReason ?? "stop",
4227
- toolCalls: streamToolCalls,
4228
- toolResults: streamToolResults,
4229
- };
4230
- // Update span with resolved provider name
4231
- streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
4232
- let accumulatedContent = "";
4233
- let chunkCount = 0;
4234
- // Set up event capture listeners
4235
- const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
4236
- const metadata = {
4237
- fallbackAttempted: false,
4238
- guardrailsBlocked: false,
4239
- error: undefined,
4240
- fallbackProvider: undefined,
4241
- fallbackModel: undefined,
4242
- };
4243
- const self = this;
4244
- const streamStartTime = Date.now();
4245
- const sessionId = enhancedOptions.context?.sessionId;
4246
- const processedStream = (async function* () {
4247
- let streamError;
4248
- try {
4249
- for await (const chunk of mcpStream) {
4250
- chunkCount++;
4251
- if (chunk && "content" in chunk && typeof chunk.content === "string") {
4252
- accumulatedContent += chunk.content;
4253
- self.emitter.emit("response:chunk", chunk.content);
4254
- // Emit stream:chunk event (Observability Solution 8)
4255
- self.emitter.emit("stream:chunk", {
4256
- type: "stream:chunk",
4257
- content: chunk.content,
4258
- metadata: {
4259
- chunkIndex: chunkCount,
4260
- totalLength: accumulatedContent.length,
4261
- },
4262
- timestamp: Date.now(),
4263
- });
4264
- }
4265
- yield chunk;
4266
- }
4267
- if (chunkCount === 0 &&
4268
- !metadata.fallbackAttempted &&
4269
- !enhancedOptions.disableInternalFallback &&
4270
- streamState.toolCalls.length === 0 &&
4271
- streamState.toolResults.length === 0) {
4272
- yield* self.handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
4273
- accumulatedContent += content;
4274
- });
4275
- }
4276
- // Emit stream:complete event (Observability Solution 8)
4277
- // When fallback took over, attribute the completion to the
4278
- // fallback provider so downstream telemetry reflects reality.
4279
- const effectiveProvider = metadata.fallbackProvider ?? providerName;
4280
- const effectiveModel = metadata.fallbackModel ?? streamModel ?? enhancedOptions.model;
4281
- // Resolve analytics promise to get final token usage
4282
- let resolvedUsage = streamUsage;
4283
- if (!resolvedUsage && streamAnalytics) {
4284
- try {
4285
- const resolved = await Promise.resolve(streamAnalytics);
4286
- if (resolved?.tokenUsage) {
4287
- resolvedUsage = resolved.tokenUsage;
4288
- }
4289
- }
4290
- catch {
4291
- /* non-blocking */
4292
- }
4293
- }
4294
- self.emitter.emit("stream:complete", {
4295
- type: "stream:complete",
4296
- content: accumulatedContent,
4297
- provider: effectiveProvider,
4298
- model: effectiveModel,
4299
- prompt: enhancedOptions.input?.text || enhancedOptions.prompt,
4300
- metadata: {
4301
- chunkCount,
4302
- totalLength: accumulatedContent.length,
4303
- durationMs: Date.now() - streamStartTime,
4304
- sessionId,
4305
- usage: resolvedUsage,
4306
- ...(metadata.fallbackAttempted && {
4307
- primaryProvider: providerName,
4308
- primaryModel: enhancedOptions.model,
4309
- fallback: true,
4310
- }),
4311
- },
4312
- timestamp: Date.now(),
4313
- });
4314
- }
4315
- catch (error) {
4316
- streamError = error;
4317
- // Emit stream:error event (Observability Solution 8)
4318
- self.emitter.emit("stream:error", {
4319
- type: "stream:error",
4320
- content: error instanceof Error ? error.message : String(error),
4321
- provider: providerName,
4322
- model: enhancedOptions.model,
4323
- metadata: {
4324
- chunkCount,
4325
- totalLength: accumulatedContent.length,
4326
- durationMs: Date.now() - streamStartTime,
4327
- errorName: error instanceof Error ? error.name : "UnknownError",
4328
- sessionId,
4329
- },
4330
- timestamp: Date.now(),
4331
- });
4332
- throw error;
4333
- }
4334
- finally {
4335
- self._disableToolCacheForCurrentRequest = false;
4336
- cleanupListeners();
4337
- // Finalize span now that the stream is fully consumed
4338
- streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4339
- streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
4340
- // When fallback took over, the primary provider's span must
4341
- // reflect that it failed — never mark it as successful.
4342
- const primaryFailed = !!(metadata.error || streamError);
4343
- streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, primaryFailed ? "error" : "stop");
4344
- if (metadata.fallbackAttempted) {
4345
- streamSpan.setAttribute("neurolink.fallback_triggered", true);
4346
- if (metadata.fallbackProvider) {
4347
- streamSpan.setAttribute("neurolink.fallback_provider", metadata.fallbackProvider);
4348
- }
4349
- }
4350
- if (primaryFailed) {
4351
- streamSpan.setStatus({
4352
- code: SpanStatusCode.ERROR,
4353
- message: metadata.error || (streamError instanceof Error ? streamError.message : String(streamError)),
4354
- });
4355
- }
4356
- else {
4357
- streamSpan.setStatus({ code: SpanStatusCode.OK });
4358
- }
4359
- streamSpan.end();
4360
- if (accumulatedContent.trim()) {
4361
- logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
4362
- provider: providerName,
4363
- model: enhancedOptions.model,
4364
- responseTimeMs: Date.now() - startTime,
4365
- contentLength: accumulatedContent.length,
4366
- fallback: metadata.fallbackAttempted,
4367
- });
4368
- }
4369
- await self.storeStreamConversationMemory({
4370
- enhancedOptions,
4371
- providerName,
4372
- originalPrompt,
4373
- accumulatedContent,
4374
- startTime,
4375
- eventSequence,
4376
- });
4377
- }
4378
- })();
4379
- const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
4380
- streamResult.finishReason = streamState.finishReason || streamResult.finishReason;
4381
- streamResult.toolCalls = streamState.toolCalls;
4382
- streamResult.toolResults = streamState.toolResults;
4383
- if (!streamResult.usage) {
4384
- streamResult.usage = streamUsage;
4385
- }
4386
- if (!streamResult.analytics) {
4387
- streamResult.analytics = streamAnalytics instanceof Promise ? await streamAnalytics : streamAnalytics;
4388
- }
4389
- const responseTime = Date.now() - startTime;
4390
- // Accumulate session cost for budget tracking
4391
- if (streamResult.analytics?.cost && streamResult.analytics.cost > 0) {
4392
- this._sessionCostUsd += streamResult.analytics.cost;
4427
+ catch (error) {
4428
+ streamError = error;
4429
+ self.emitter.emit("stream:error", {
4430
+ type: "stream:error",
4431
+ content: error instanceof Error ? error.message : String(error),
4432
+ provider: providerName,
4433
+ model: enhancedOptions.model,
4434
+ metadata: {
4435
+ chunkCount,
4436
+ totalLength: accumulatedContent.length,
4437
+ durationMs: Date.now() - streamStartTime,
4438
+ errorName: error instanceof Error ? error.name : "UnknownError",
4439
+ sessionId,
4440
+ },
4441
+ timestamp: Date.now(),
4442
+ });
4443
+ throw error;
4444
+ }
4445
+ finally {
4446
+ self._disableToolCacheForCurrentRequest = false;
4447
+ cleanupListeners();
4448
+ streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
4449
+ streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
4450
+ const primaryFailed = !!(metadata.error || streamError);
4451
+ streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, primaryFailed ? "error" : "stop");
4452
+ if (metadata.fallbackAttempted) {
4453
+ streamSpan.setAttribute("neurolink.fallback_triggered", true);
4454
+ if (metadata.fallbackProvider) {
4455
+ streamSpan.setAttribute("neurolink.fallback_provider", metadata.fallbackProvider);
4393
4456
  }
4394
- this.emitStreamEndEvents(streamResult);
4395
- return this.createStreamResponse(streamResult, processedStream, {
4396
- providerName,
4397
- options,
4398
- startTime,
4399
- responseTime,
4400
- streamId,
4401
- fallback: metadata.fallbackAttempted,
4402
- guardrailsBlocked: metadata.guardrailsBlocked,
4403
- error: metadata.error,
4404
- events: eventSequence,
4457
+ }
4458
+ if (primaryFailed) {
4459
+ streamSpan.setStatus({
4460
+ code: SpanStatusCode.ERROR,
4461
+ message: metadata.error ||
4462
+ (streamError instanceof Error
4463
+ ? streamError.message
4464
+ : String(streamError)),
4405
4465
  });
4406
4466
  }
4407
- catch (error) {
4408
- if (options.disableInternalFallback) {
4409
- throw error;
4410
- }
4411
- return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
4467
+ else {
4468
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
4412
4469
  }
4413
- });
4414
- }
4415
- catch (error) {
4416
- // End span on error before re-throwing
4417
- streamSpan.setStatus({
4418
- code: SpanStatusCode.ERROR,
4419
- message: error instanceof Error ? error.message : String(error),
4420
- });
4421
- if (error instanceof Error) {
4422
- streamSpan.recordException(error);
4470
+ streamSpan.end();
4471
+ if (accumulatedContent.trim()) {
4472
+ logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
4473
+ provider: providerName,
4474
+ model: enhancedOptions.model,
4475
+ responseTimeMs: Date.now() - startTime,
4476
+ contentLength: accumulatedContent.length,
4477
+ fallback: metadata.fallbackAttempted,
4478
+ });
4479
+ }
4480
+ await self.storeStreamConversationMemory({
4481
+ enhancedOptions,
4482
+ providerName,
4483
+ originalPrompt,
4484
+ accumulatedContent,
4485
+ startTime,
4486
+ eventSequence,
4487
+ });
4423
4488
  }
4424
- streamSpan.end();
4489
+ })();
4490
+ const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
4491
+ streamResult.finishReason =
4492
+ streamState.finishReason || streamResult.finishReason;
4493
+ streamResult.toolCalls = streamState.toolCalls;
4494
+ streamResult.toolResults = streamState.toolResults;
4495
+ if (!streamResult.usage) {
4496
+ streamResult.usage = streamUsage;
4497
+ }
4498
+ if (!streamResult.analytics) {
4499
+ streamResult.analytics =
4500
+ streamAnalytics instanceof Promise
4501
+ ? await streamAnalytics
4502
+ : streamAnalytics;
4503
+ }
4504
+ if (streamResult.analytics?.cost && streamResult.analytics.cost > 0) {
4505
+ this._sessionCostUsd += streamResult.analytics.cost;
4506
+ }
4507
+ this.emitStreamEndEvents(streamResult);
4508
+ return this.createStreamResponse(streamResult, processedStream, {
4509
+ providerName,
4510
+ options,
4511
+ startTime,
4512
+ responseTime: Date.now() - startTime,
4513
+ streamId,
4514
+ fallback: metadata.fallbackAttempted,
4515
+ guardrailsBlocked: metadata.guardrailsBlocked,
4516
+ error: metadata.error,
4517
+ events: eventSequence,
4518
+ });
4519
+ }
4520
+ catch (error) {
4521
+ if (options.disableInternalFallback) {
4425
4522
  throw error;
4426
4523
  }
4427
- }); // end metricsTraceContextStorage.run
4524
+ return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
4525
+ }
4428
4526
  }
4429
4527
  /**
4430
4528
  * Prepare stream options: initialize memory, MCP, retrieval, orchestration,
@@ -4436,7 +4534,8 @@ Current user's request: ${currentInput}`;
4436
4534
  // Initialize MCP
4437
4535
  await this.initializeMCP();
4438
4536
  // Memory retrieval
4439
- if (this.shouldReadMemory(options.memory, options.context?.userId) && options.context?.userId) {
4537
+ if (this.shouldReadMemory(options.memory, options.context?.userId) &&
4538
+ options.context?.userId) {
4440
4539
  try {
4441
4540
  options.input.text = await this.retrieveMemory(options.input.text, options.context.userId, options.memory?.additionalUsers);
4442
4541
  logger.debug("Memory retrieval successful");
@@ -4481,7 +4580,8 @@ Current user's request: ${currentInput}`;
4481
4580
  if (!options.tools) {
4482
4581
  options.tools = {};
4483
4582
  }
4484
- options.tools[ragResult.toolName] = ragResult.tool;
4583
+ options.tools[ragResult.toolName] =
4584
+ ragResult.tool;
4485
4585
  // Inject RAG-aware system prompt so the AI uses the RAG tool first
4486
4586
  const ragSystemInstruction = [
4487
4587
  `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
@@ -4490,7 +4590,8 @@ Current user's request: ${currentInput}`;
4490
4590
  `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
4491
4591
  `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
4492
4592
  ].join(" ");
4493
- options.systemPrompt = (options.systemPrompt || "") + ragSystemInstruction;
4593
+ options.systemPrompt =
4594
+ (options.systemPrompt || "") + ragSystemInstruction;
4494
4595
  logger.info("[RAG] Tool injected into stream()", {
4495
4596
  toolName: ragResult.toolName,
4496
4597
  filesLoaded: ragResult.filesLoaded,
@@ -4518,7 +4619,8 @@ Current user's request: ${currentInput}`;
4518
4619
  * Prevents overwhelming smaller models with massive tool descriptions in the system message.
4519
4620
  */
4520
4621
  async autoDisableOllamaStreamTools(options) {
4521
- if ((options.provider === "ollama" || options.provider?.toLowerCase().includes("ollama")) &&
4622
+ if ((options.provider === "ollama" ||
4623
+ options.provider?.toLowerCase().includes("ollama")) &&
4522
4624
  !options.disableTools) {
4523
4625
  const { ModelConfigurationManager } = await import("./core/modelConfiguration.js");
4524
4626
  const modelConfig = ModelConfigurationManager.getInstance();
@@ -4602,7 +4704,7 @@ Current user's request: ${currentInput}`;
4602
4704
  * Handle fallback when the primary stream returns 0 chunks.
4603
4705
  * Yields chunks from a fallback provider and updates metadata accordingly.
4604
4706
  */
4605
- async *handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, _accumulatedContent, appendContent) {
4707
+ async *handleStreamFallback(metadata, streamState, originalPrompt, enhancedOptions, providerName, appendContent) {
4606
4708
  metadata.fallbackAttempted = true;
4607
4709
  const errorMsg = "Stream completed with 0 chunks (possible guardrails block)";
4608
4710
  metadata.error = errorMsg;
@@ -4665,18 +4767,23 @@ Current user's request: ${currentInput}`;
4665
4767
  if (fallbackToolCalls.length > 0 || fallbackToolResults.length > 0) {
4666
4768
  streamState.toolCalls = fallbackToolCalls;
4667
4769
  streamState.toolResults = fallbackToolResults;
4668
- streamState.finishReason = fallbackResult.finishReason ?? streamState.finishReason;
4770
+ streamState.finishReason =
4771
+ fallbackResult.finishReason ?? streamState.finishReason;
4669
4772
  }
4670
4773
  let fallbackChunkCount = 0;
4671
4774
  for await (const fallbackChunk of fallbackResult.stream) {
4672
4775
  fallbackChunkCount++;
4673
- if (fallbackChunk && "content" in fallbackChunk && typeof fallbackChunk.content === "string") {
4776
+ if (fallbackChunk &&
4777
+ "content" in fallbackChunk &&
4778
+ typeof fallbackChunk.content === "string") {
4674
4779
  appendContent(fallbackChunk.content);
4675
4780
  this.emitter.emit("response:chunk", fallbackChunk.content);
4676
4781
  }
4677
4782
  yield fallbackChunk;
4678
4783
  }
4679
- if (fallbackChunkCount === 0 && fallbackToolCalls.length === 0 && fallbackToolResults.length === 0) {
4784
+ if (fallbackChunkCount === 0 &&
4785
+ fallbackToolCalls.length === 0 &&
4786
+ fallbackToolResults.length === 0) {
4680
4787
  throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
4681
4788
  }
4682
4789
  // Fallback succeeded - likely guardrails blocked primary
@@ -4685,7 +4792,9 @@ Current user's request: ${currentInput}`;
4685
4792
  metadata.guardrailsBlocked = true;
4686
4793
  }
4687
4794
  catch (fallbackError) {
4688
- const fallbackErrorMsg = fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
4795
+ const fallbackErrorMsg = fallbackError instanceof Error
4796
+ ? fallbackError.message
4797
+ : String(fallbackError);
4689
4798
  metadata.error = `${errorMsg}; Fallback failed: ${fallbackErrorMsg}`;
4690
4799
  logger.error("Fallback provider failed", {
4691
4800
  fallbackProvider: fallbackRoute.provider,
@@ -4699,19 +4808,22 @@ Current user's request: ${currentInput}`;
4699
4808
  * Handles conversation memory storage in the background.
4700
4809
  */
4701
4810
  async storeStreamConversationMemory(params) {
4702
- const { enhancedOptions, providerName, originalPrompt, accumulatedContent, startTime, eventSequence } = params;
4811
+ const { enhancedOptions, providerName, originalPrompt, accumulatedContent, startTime, eventSequence, } = params;
4703
4812
  // Guard: skip storing if no meaningful content was produced (no text AND no tool activity)
4704
4813
  const hasToolEvents = eventSequence.some((e) => e.type === "tool:start" || e.type === "tool:end");
4705
4814
  if (!accumulatedContent.trim() && !hasToolEvents) {
4706
4815
  logger.warn("[NeuroLink.stream] Skipping conversation turn storage — no text content or tool activity", {
4707
- sessionId: enhancedOptions.context?.sessionId,
4816
+ sessionId: enhancedOptions.context
4817
+ ?.sessionId,
4708
4818
  });
4709
4819
  return;
4710
4820
  }
4711
4821
  // Store memory after stream consumption is complete
4712
4822
  if (this.conversationMemory && enhancedOptions.context?.sessionId) {
4713
- const sessionId = enhancedOptions.context?.sessionId;
4714
- const userId = enhancedOptions.context?.userId;
4823
+ const sessionId = enhancedOptions.context
4824
+ ?.sessionId;
4825
+ const userId = enhancedOptions.context
4826
+ ?.userId;
4715
4827
  let providerDetails;
4716
4828
  if (enhancedOptions.model) {
4717
4829
  providerDetails = {
@@ -4730,7 +4842,8 @@ Current user's request: ${currentInput}`;
4730
4842
  providerDetails,
4731
4843
  enableSummarization: enhancedOptions.enableSummarization,
4732
4844
  events: eventSequence.length > 0 ? eventSequence : undefined,
4733
- requestId: enhancedOptions.context?.requestId,
4845
+ requestId: enhancedOptions.context
4846
+ ?.requestId,
4734
4847
  });
4735
4848
  this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "stream" }, Date.now() - memStoreStart, SpanStatus.OK);
4736
4849
  logger.debug("[NeuroLink.stream] Stored conversation turn with events", {
@@ -4760,7 +4873,8 @@ Current user's request: ${currentInput}`;
4760
4873
  validationStartTimeNs: validationStartTime.toString(),
4761
4874
  message: "Starting comprehensive input validation process",
4762
4875
  });
4763
- const hasText = typeof options?.input?.text === "string" && options.input.text.trim().length > 0;
4876
+ const hasText = typeof options?.input?.text === "string" &&
4877
+ options.input.text.trim().length > 0;
4764
4878
  // Accept audio when frames are present; sampleRateHz is optional (defaults applied later)
4765
4879
  const hasAudio = !!(options?.input?.audio &&
4766
4880
  options.input.audio.frames &&
@@ -4839,10 +4953,12 @@ Current user's request: ${currentInput}`;
4839
4953
  const streamCompactionSessionId = this.getCompactionSessionId(options);
4840
4954
  if (streamBudget.shouldCompact &&
4841
4955
  (hasCallerConversationHistory || this.conversationMemory) &&
4842
- streamMessageCount > (this.lastCompactionMessageCount.get(streamCompactionSessionId) ?? 0)) {
4956
+ streamMessageCount >
4957
+ (this.lastCompactionMessageCount.get(streamCompactionSessionId) ?? 0)) {
4843
4958
  const compactor = new ContextCompactor({
4844
4959
  provider: providerName,
4845
- summarizationProvider: this.conversationMemoryConfig?.conversationMemory?.summarizationProvider,
4960
+ summarizationProvider: this.conversationMemoryConfig?.conversationMemory
4961
+ ?.summarizationProvider,
4846
4962
  summarizationModel: this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
4847
4963
  });
4848
4964
  const compactionResult = await compactor.compact(conversationMessages, streamBudget.availableInputTokens, this.conversationMemoryConfig?.conversationMemory, options.context?.requestId);
@@ -4987,7 +5103,8 @@ Current user's request: ${currentInput}`;
4987
5103
  parentSpanId: traceCtx?.parentSpanId,
4988
5104
  });
4989
5105
  failedSpan = SpanSerializer.endSpan(failedSpan, SpanStatus.ERROR);
4990
- failedSpan.statusMessage = error instanceof Error ? error.message : String(error);
5106
+ failedSpan.statusMessage =
5107
+ error instanceof Error ? error.message : String(error);
4991
5108
  failedSpan.durationMs = Date.now() - startTime;
4992
5109
  this.metricsAggregator.recordSpan(failedSpan);
4993
5110
  getMetricsAggregator().recordSpan(failedSpan);
@@ -5011,7 +5128,9 @@ Current user's request: ${currentInput}`;
5011
5128
  const fallbackProcessedStream = (async function* (self) {
5012
5129
  try {
5013
5130
  for await (const chunk of fallbackStreamResult.stream) {
5014
- if (chunk && "content" in chunk && typeof chunk.content === "string") {
5131
+ if (chunk &&
5132
+ "content" in chunk &&
5133
+ typeof chunk.content === "string") {
5015
5134
  fallbackAccumulatedContent += chunk.content;
5016
5135
  // Emit chunk event
5017
5136
  self.emitter.emit("response:chunk", chunk.content);
@@ -5030,9 +5149,12 @@ Current user's request: ${currentInput}`;
5030
5149
  }
5031
5150
  // Store memory after fallback stream consumption is complete
5032
5151
  // Guard: skip storing if fallback accumulated content is empty
5033
- if (self.conversationMemory && enhancedOptions?.context?.sessionId && fallbackAccumulatedContent.trim()) {
5152
+ if (self.conversationMemory &&
5153
+ enhancedOptions?.context?.sessionId &&
5154
+ fallbackAccumulatedContent.trim()) {
5034
5155
  const sessionId = enhancedOptions?.context?.sessionId;
5035
- const userId = enhancedOptions?.context?.userId;
5156
+ const userId = enhancedOptions?.context
5157
+ ?.userId;
5036
5158
  let providerDetails;
5037
5159
  if (options.model) {
5038
5160
  providerDetails = {
@@ -5051,7 +5173,8 @@ Current user's request: ${currentInput}`;
5051
5173
  providerDetails,
5052
5174
  enableSummarization: enhancedOptions?.enableSummarization,
5053
5175
  requestId: enhancedOptions?.context?.requestId ||
5054
- options.context?.requestId,
5176
+ options.context
5177
+ ?.requestId,
5055
5178
  });
5056
5179
  self.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "fallback-stream" }, Date.now() - memStoreStart, SpanStatus.OK);
5057
5180
  }
@@ -5460,7 +5583,9 @@ Current user's request: ${currentInput}`;
5460
5583
  // Compose with any parent abortSignal from ToolExecutionOptions
5461
5584
  const execOptions = args[1];
5462
5585
  const parentSignal = execOptions?.abortSignal;
5463
- const composedSignal = parentSignal ? AbortSignal.any([parentSignal, timeoutSignal]) : timeoutSignal;
5586
+ const composedSignal = parentSignal
5587
+ ? AbortSignal.any([parentSignal, timeoutSignal])
5588
+ : timeoutSignal;
5464
5589
  // Replace the abortSignal in execution options
5465
5590
  const augmentedContext = {
5466
5591
  ...execOptions,
@@ -5517,7 +5642,9 @@ Current user's request: ${currentInput}`;
5517
5642
  * @returns Current context or undefined if not set
5518
5643
  */
5519
5644
  getToolContext() {
5520
- return this.toolExecutionContext ? { ...this.toolExecutionContext } : undefined;
5645
+ return this.toolExecutionContext
5646
+ ? { ...this.toolExecutionContext }
5647
+ : undefined;
5521
5648
  }
5522
5649
  /**
5523
5650
  * Clear the tool execution context
@@ -5621,7 +5748,8 @@ Current user's request: ${currentInput}`;
5621
5748
  typeof this.conversationMemory.updateAgenticLoopReport !== "function") {
5622
5749
  throw new ConversationMemoryError("updateAgenticLoopReport is only supported with Redis conversation memory.", "CONFIG_ERROR");
5623
5750
  }
5624
- await withTimeout(this.conversationMemory.updateAgenticLoopReport(sessionId, userId, report), 5000);
5751
+ await withTimeout(this
5752
+ .conversationMemory.updateAgenticLoopReport(sessionId, userId, report), 5000);
5625
5753
  }
5626
5754
  /**
5627
5755
  * Get all registered custom tools
@@ -5639,10 +5767,14 @@ Current user's request: ${currentInput}`;
5639
5767
  description: tool.description,
5640
5768
  hasParameters: !!tool.parameters,
5641
5769
  parametersType: typeof tool.parameters,
5642
- parametersKeys: tool.parameters && typeof tool.parameters === "object" ? Object.keys(tool.parameters) : "NOT_OBJECT",
5770
+ parametersKeys: tool.parameters && typeof tool.parameters === "object"
5771
+ ? Object.keys(tool.parameters)
5772
+ : "NOT_OBJECT",
5643
5773
  hasInputSchema: !!tool.inputSchema,
5644
5774
  inputSchemaType: typeof tool.inputSchema,
5645
- inputSchemaKeys: tool.inputSchema && typeof tool.inputSchema === "object" ? Object.keys(tool.inputSchema) : "NOT_OBJECT",
5775
+ inputSchemaKeys: tool.inputSchema && typeof tool.inputSchema === "object"
5776
+ ? Object.keys(tool.inputSchema)
5777
+ : "NOT_OBJECT",
5646
5778
  hasEffectiveSchema: !!effectiveSchema,
5647
5779
  effectiveSchemaType: typeof effectiveSchema,
5648
5780
  effectiveSchemaHasProperties: !!effectiveSchema?.properties,
@@ -5663,14 +5795,18 @@ Current user's request: ${currentInput}`;
5663
5795
  execute: async (params, context) => {
5664
5796
  // CONTEXT MERGING: Combine all available contexts for maximum information
5665
5797
  const storedContext = this.toolExecutionContext || {};
5666
- const runtimeContext = context && isNonNullObject(context) ? context : {};
5798
+ const runtimeContext = context && isNonNullObject(context)
5799
+ ? context
5800
+ : {};
5667
5801
  // Merge contexts with runtime context taking precedence
5668
5802
  // This ensures we have the richest possible context for tool execution
5669
5803
  const executionContext = {
5670
5804
  ...storedContext, // Base context from setToolContext (session, tokens, etc.)
5671
5805
  ...runtimeContext, // Runtime context from AI model (if any)
5672
5806
  // Ensure we always have at least a sessionId for tracing
5673
- sessionId: runtimeContext.sessionId || storedContext.sessionId || `fallback-${Date.now()}`,
5807
+ sessionId: runtimeContext.sessionId ||
5808
+ storedContext.sessionId ||
5809
+ `fallback-${Date.now()}`,
5674
5810
  };
5675
5811
  // Enhanced logging for context debugging
5676
5812
  logger.debug("Tool execution context merged", {
@@ -5678,7 +5814,8 @@ Current user's request: ${currentInput}`;
5678
5814
  storedContextKeys: Object.keys(storedContext),
5679
5815
  runtimeContextKeys: Object.keys(runtimeContext),
5680
5816
  finalContextKeys: Object.keys(executionContext),
5681
- hasJuspayToken: !!executionContext.juspayToken,
5817
+ hasJuspayToken: !!executionContext
5818
+ .juspayToken,
5682
5819
  hasShopId: !!executionContext.shopId,
5683
5820
  sessionId: executionContext.sessionId,
5684
5821
  });
@@ -5706,7 +5843,9 @@ Current user's request: ${currentInput}`;
5706
5843
  toolMap.set(toolName, {
5707
5844
  name: toolName,
5708
5845
  description: toolDef.description || `File tool: ${toolName}`,
5709
- inputSchema: typeof toolParams === "object" && toolParams !== null ? toolParams : { type: "object", properties: {} },
5846
+ inputSchema: typeof toolParams === "object" && toolParams !== null
5847
+ ? toolParams
5848
+ : { type: "object", properties: {} },
5710
5849
  execute: async (params) => {
5711
5850
  return await toolDef.execute(params, {
5712
5851
  toolCallId: `file-tool-${Date.now()}`,
@@ -5802,361 +5941,346 @@ Current user's request: ${currentInput}`;
5802
5941
  * @returns Tool execution result
5803
5942
  */
5804
5943
  async executeTool(toolName, params = {}, options) {
5805
- const functionTag = "NeuroLink.executeTool";
5806
- const executionStartTime = Date.now();
5807
- // === MCP ENHANCEMENT: RequestBatcher — batch programmatic tool calls ===
5808
- // LIMITATION: When the request batcher is enabled, per-tool timeout and retry
5809
- // settings (from registration options or call-site options) are NOT applied.
5810
- // The batcher uses its own hardcoded defaults for timeout and retry behavior.
5811
- // Use `bypassBatcher: true` to ensure per-tool timeout/retry is respected.
5812
- // Additionally, note that executeToolInternal's safe-tool retry logic may still
5813
- // trigger even when maxRetries is set to 0, since it operates independently.
5814
5944
  if (this.mcpToolBatcher && !options?.bypassBatcher) {
5815
5945
  return this.mcpToolBatcher.execute(toolName, params);
5816
5946
  }
5817
- // Determine tool type for span attributes
5818
- const externalTools = this.externalServerManager.getAllTools();
5819
- const externalTool = externalTools.find((tool) => tool.name === toolName);
5820
- const toolType = externalTool ? "mcp" : this.getCustomTools().has(toolName) ? "custom" : "external";
5821
- // Compute truncated input size for the span
5822
- const inputStr = typeof params === "string" ? params : params ? JSON.stringify(params) : "";
5823
- const inputSize = inputStr.length;
5824
- const truncatedInput = inputStr.length > 2048 ? inputStr.substring(0, 2048) : inputStr;
5947
+ const executionContext = this.createToolExecutionContext(toolName, params, options);
5825
5948
  return tracers.mcp.startActiveSpan("neurolink.tool.execute", {
5826
5949
  attributes: {
5827
5950
  "tool.name": toolName,
5828
- "tool.type": toolType,
5829
- "tool.input_size": inputSize,
5830
- "tool.input_preview": truncatedInput,
5951
+ "tool.type": executionContext.toolType,
5952
+ "tool.input_size": executionContext.inputSize,
5953
+ "tool.input_preview": executionContext.truncatedInput,
5831
5954
  },
5832
- }, async (toolSpan) => {
5833
- try {
5834
- // Debug: Log tool execution attempt
5835
- logger.debug(`[${functionTag}] Tool execution requested:`, {
5836
- toolName,
5837
- params: isNonNullObject(params) ? transformParamsForLogging(params) : params,
5838
- hasExternalManager: !!this.externalServerManager,
5839
- });
5840
- // 🔧 PARAMETER TRACE: Log tool execution details for debugging
5841
- logger.debug(`Tool execution detailed analysis`, {
5842
- toolName,
5843
- executionStartTime,
5844
- paramsAnalysis: {
5845
- type: typeof params,
5846
- isNull: params === null,
5847
- isUndefined: params === undefined,
5848
- isEmpty: params && typeof params === "object" && Object.keys(params).length === 0,
5849
- keys: params && typeof params === "object" ? Object.keys(params) : "NOT_OBJECT",
5850
- keysLength: params && typeof params === "object" ? Object.keys(params).length : 0,
5955
+ }, (toolSpan) => this.executeToolWithSpan(toolName, params, options, executionContext, toolSpan));
5956
+ }
5957
+ createToolExecutionContext(toolName, params, options) {
5958
+ const externalTool = this.externalServerManager
5959
+ .getAllTools()
5960
+ .find((tool) => tool.name === toolName);
5961
+ const toolType = externalTool
5962
+ ? "mcp"
5963
+ : this.getCustomTools().has(toolName)
5964
+ ? "custom"
5965
+ : "external";
5966
+ const inputStr = typeof params === "string"
5967
+ ? params
5968
+ : params
5969
+ ? JSON.stringify(params)
5970
+ : "";
5971
+ return {
5972
+ functionTag: "NeuroLink.executeTool",
5973
+ executionStartTime: Date.now(),
5974
+ externalTool,
5975
+ toolType,
5976
+ inputSize: inputStr.length,
5977
+ truncatedInput: inputStr.length > 2048 ? inputStr.substring(0, 2048) : inputStr,
5978
+ options,
5979
+ };
5980
+ }
5981
+ async executeToolWithSpan(toolName, params, options, executionContext, toolSpan) {
5982
+ try {
5983
+ const prepared = await this.prepareToolExecutionState(toolName, params, options, executionContext);
5984
+ return await this.runPreparedToolExecution(toolName, params, prepared, executionContext, toolSpan);
5985
+ }
5986
+ catch (outerError) {
5987
+ if (!(outerError instanceof NeuroLinkError)) {
5988
+ const errMsg = outerError instanceof Error ? outerError.message : String(outerError);
5989
+ toolSpan.recordException(outerError instanceof Error ? outerError : new Error(errMsg));
5990
+ toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
5991
+ }
5992
+ throw outerError;
5993
+ }
5994
+ finally {
5995
+ toolSpan.end();
5996
+ }
5997
+ }
5998
+ async prepareToolExecutionState(toolName, params, options, executionContext) {
5999
+ logger.debug(`[${executionContext.functionTag}] Tool execution requested:`, {
6000
+ toolName,
6001
+ params: isNonNullObject(params)
6002
+ ? transformParamsForLogging(params)
6003
+ : params,
6004
+ hasExternalManager: !!this.externalServerManager,
6005
+ });
6006
+ logger.debug(`Tool execution detailed analysis`, {
6007
+ toolName,
6008
+ executionStartTime: executionContext.executionStartTime,
6009
+ paramsAnalysis: {
6010
+ type: typeof params,
6011
+ isNull: params === null,
6012
+ isUndefined: params === undefined,
6013
+ isEmpty: params &&
6014
+ typeof params === "object" &&
6015
+ Object.keys(params).length === 0,
6016
+ keys: params && typeof params === "object"
6017
+ ? Object.keys(params)
6018
+ : "NOT_OBJECT",
6019
+ keysLength: params && typeof params === "object"
6020
+ ? Object.keys(params).length
6021
+ : 0,
6022
+ },
6023
+ isTargetTool: toolName === "juspay-analytics_SuccessRateSRByTime",
6024
+ options,
6025
+ hasExternalManager: !!this.externalServerManager,
6026
+ });
6027
+ this.emitter.emit("tool:start", {
6028
+ toolName,
6029
+ timestamp: executionContext.executionStartTime,
6030
+ input: params,
6031
+ });
6032
+ const toolInfo = this.toolRegistry.getToolInfo(toolName);
6033
+ const finalOptions = {
6034
+ timeout: options?.timeout ??
6035
+ toolInfo?.tool?.timeoutMs ??
6036
+ TOOL_TIMEOUTS.EXECUTION_DEFAULT_MS,
6037
+ maxRetries: options?.maxRetries ??
6038
+ toolInfo?.tool?.maxRetries ??
6039
+ RETRY_ATTEMPTS.DEFAULT,
6040
+ retryDelayMs: options?.retryDelayMs || RETRY_DELAYS.BASE_MS,
6041
+ authContext: options?.authContext,
6042
+ disableToolCache: options?.disableToolCache,
6043
+ };
6044
+ const { MemoryManager } = await import("./utils/performance.js");
6045
+ const startMemory = MemoryManager.getMemoryUsageMB();
6046
+ const breakerServerId = executionContext.externalTool?.serverId ||
6047
+ toolInfo?.tool?.serverId ||
6048
+ "unknown";
6049
+ const breakerKey = `${breakerServerId}.${toolName}`;
6050
+ let circuitBreaker = this.toolCircuitBreakers.get(breakerKey);
6051
+ if (!circuitBreaker) {
6052
+ circuitBreaker = new CircuitBreaker(CIRCUIT_BREAKER.FAILURE_THRESHOLD, CIRCUIT_BREAKER_RESET_MS);
6053
+ this.toolCircuitBreakers.set(breakerKey, circuitBreaker);
6054
+ }
6055
+ let metrics = this.toolExecutionMetrics.get(toolName);
6056
+ if (!metrics) {
6057
+ metrics = {
6058
+ totalExecutions: 0,
6059
+ successfulExecutions: 0,
6060
+ failedExecutions: 0,
6061
+ averageExecutionTime: 0,
6062
+ lastExecutionTime: 0,
6063
+ errorCategories: {},
6064
+ };
6065
+ this.toolExecutionMetrics.set(toolName, metrics);
6066
+ }
6067
+ metrics.totalExecutions++;
6068
+ return {
6069
+ finalOptions,
6070
+ startMemory,
6071
+ circuitBreaker,
6072
+ breakerKey,
6073
+ metrics,
6074
+ };
6075
+ }
6076
+ async runPreparedToolExecution(toolName, params, prepared, executionContext, toolSpan) {
6077
+ try {
6078
+ mcpLogger.debug(`[${executionContext.functionTag}] Executing tool: ${toolName}`, {
6079
+ toolName,
6080
+ params,
6081
+ options: prepared.finalOptions,
6082
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6083
+ });
6084
+ const result = await prepared.circuitBreaker.execute(async () => {
6085
+ return withRetry(async () => withTimeout(this.executeToolInternal(toolName, params, prepared.finalOptions), prepared.finalOptions.timeout, ErrorFactory.toolTimeout(toolName, prepared.finalOptions.timeout)), {
6086
+ maxAttempts: prepared.finalOptions.maxRetries + 1,
6087
+ delayMs: prepared.finalOptions.retryDelayMs,
6088
+ isRetriable: isRetriableError,
6089
+ onRetry: (attempt, error) => {
6090
+ mcpLogger.warn(`[${executionContext.functionTag}] Retrying tool execution (attempt ${attempt})`, {
6091
+ toolName,
6092
+ error: error.message,
6093
+ attempt,
6094
+ });
5851
6095
  },
5852
- isTargetTool: toolName === "juspay-analytics_SuccessRateSRByTime",
5853
- options,
5854
- hasExternalManager: !!this.externalServerManager,
5855
6096
  });
5856
- // Emit tool start event (NeuroLink format - keep existing)
5857
- this.emitter.emit("tool:start", {
5858
- toolName,
5859
- timestamp: executionStartTime,
5860
- input: params, // Enhanced: add input parameters
6097
+ });
6098
+ return await this.handleSuccessfulToolExecution(toolName, result, prepared, executionContext, toolSpan);
6099
+ }
6100
+ catch (error) {
6101
+ return this.handleFailedToolExecution(toolName, params, error, prepared, executionContext, toolSpan);
6102
+ }
6103
+ }
6104
+ async handleSuccessfulToolExecution(toolName, result, prepared, executionContext, toolSpan) {
6105
+ const executionTime = Date.now() - executionContext.executionStartTime;
6106
+ prepared.metrics.successfulExecutions++;
6107
+ prepared.metrics.lastExecutionTime = executionTime;
6108
+ prepared.metrics.averageExecutionTime =
6109
+ (prepared.metrics.averageExecutionTime *
6110
+ (prepared.metrics.successfulExecutions - 1) +
6111
+ executionTime) /
6112
+ prepared.metrics.successfulExecutions;
6113
+ const { MemoryManager } = await import("./utils/performance.js");
6114
+ const endMemory = MemoryManager.getMemoryUsageMB();
6115
+ const memoryDelta = endMemory.heapUsed - prepared.startMemory.heapUsed;
6116
+ if (memoryDelta > 20) {
6117
+ mcpLogger.warn(`Tool '${toolName}' used excessive memory: ${memoryDelta}MB`, {
6118
+ toolName,
6119
+ memoryDelta,
6120
+ executionTime,
6121
+ });
6122
+ }
6123
+ mcpLogger.debug(`[${executionContext.functionTag}] Tool executed successfully`, {
6124
+ toolName,
6125
+ executionTime,
6126
+ memoryDelta,
6127
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6128
+ });
6129
+ const resultObj = result && typeof result === "object"
6130
+ ? result
6131
+ : undefined;
6132
+ const isToolError = (resultObj && "isError" in resultObj && resultObj.isError === true) ||
6133
+ (resultObj && "success" in resultObj && resultObj.success === false);
6134
+ if (isToolError) {
6135
+ try {
6136
+ await prepared.circuitBreaker.execute(async () => {
6137
+ throw new Error(`Tool ${toolName} returned isError:true`);
5861
6138
  });
5862
- // NL-004: Use composite key (serverId.toolName) to avoid cross-server collisions
5863
- // Fetch toolInfo early so per-tool timeout is available for finalOptions
5864
- const toolInfo = this.toolRegistry.getToolInfo(toolName);
5865
- // Set default options — per-tool values from registration take precedence over global defaults.
5866
- // When not explicitly set at registration, global defaults are preserved for backward compatibility.
5867
- const registeredTimeout = toolInfo?.tool?.timeoutMs;
5868
- const registeredMaxRetries = toolInfo?.tool?.maxRetries;
5869
- const finalOptions = {
5870
- timeout: options?.timeout ?? registeredTimeout ?? TOOL_TIMEOUTS.EXECUTION_DEFAULT_MS,
5871
- maxRetries: options?.maxRetries ?? registeredMaxRetries ?? RETRY_ATTEMPTS.DEFAULT,
5872
- retryDelayMs: options?.retryDelayMs || RETRY_DELAYS.BASE_MS,
5873
- authContext: options?.authContext,
5874
- disableToolCache: options?.disableToolCache,
5875
- };
5876
- // Track memory usage for tool execution
5877
- const { MemoryManager } = await import("./utils/performance.js");
5878
- const startMemory = MemoryManager.getMemoryUsageMB();
5879
- const breakerServerId = externalTool?.serverId || toolInfo?.tool?.serverId || "unknown";
5880
- const breakerKey = `${breakerServerId}.${toolName}`;
5881
- // Get or create circuit breaker for this tool
5882
- if (!this.toolCircuitBreakers.has(breakerKey)) {
5883
- this.toolCircuitBreakers.set(breakerKey, new CircuitBreaker(CIRCUIT_BREAKER.FAILURE_THRESHOLD, CIRCUIT_BREAKER_RESET_MS));
5884
- }
5885
- const circuitBreaker = this.toolCircuitBreakers.get(breakerKey);
5886
- // Initialize metrics for this tool if not exists
5887
- if (!this.toolExecutionMetrics.has(toolName)) {
5888
- this.toolExecutionMetrics.set(toolName, {
5889
- totalExecutions: 0,
5890
- successfulExecutions: 0,
5891
- failedExecutions: 0,
5892
- averageExecutionTime: 0,
5893
- lastExecutionTime: 0,
5894
- errorCategories: {},
5895
- });
5896
- }
5897
- const metrics = this.toolExecutionMetrics.get(toolName);
5898
- if (metrics) {
5899
- metrics.totalExecutions++;
5900
- }
5901
- try {
5902
- mcpLogger.debug(`[${functionTag}] Executing tool: ${toolName}`, {
5903
- toolName,
5904
- params,
5905
- options: finalOptions,
5906
- circuitBreakerState: circuitBreaker?.getState(),
5907
- });
5908
- // Execute with circuit breaker, timeout, and retry logic
5909
- if (!circuitBreaker) {
5910
- throw new Error(`Circuit breaker not initialized for tool: ${toolName}`);
5911
- }
5912
- const result = await circuitBreaker.execute(async () => {
5913
- return await withRetry(async () => {
5914
- return await withTimeout(this.executeToolInternal(toolName, params, finalOptions), finalOptions.timeout, ErrorFactory.toolTimeout(toolName, finalOptions.timeout));
5915
- }, {
5916
- maxAttempts: finalOptions.maxRetries + 1, // +1 for initial attempt
5917
- delayMs: finalOptions.retryDelayMs,
5918
- isRetriable: isRetriableError,
5919
- onRetry: (attempt, error) => {
5920
- mcpLogger.warn(`[${functionTag}] Retrying tool execution (attempt ${attempt})`, {
5921
- toolName,
5922
- error: error.message,
5923
- attempt,
5924
- });
5925
- },
5926
- });
5927
- });
5928
- // Update success metrics
5929
- const executionTime = Date.now() - executionStartTime;
5930
- if (metrics) {
5931
- metrics.successfulExecutions++;
5932
- metrics.lastExecutionTime = executionTime;
5933
- metrics.averageExecutionTime =
5934
- (metrics.averageExecutionTime * (metrics.successfulExecutions - 1) + executionTime) /
5935
- metrics.successfulExecutions;
5936
- }
5937
- // Track memory usage
5938
- const endMemory = MemoryManager.getMemoryUsageMB();
5939
- const memoryDelta = endMemory.heapUsed - startMemory.heapUsed;
5940
- if (memoryDelta > 20) {
5941
- mcpLogger.warn(`Tool '${toolName}' used excessive memory: ${memoryDelta}MB`, {
5942
- toolName,
5943
- memoryDelta,
5944
- executionTime,
5945
- });
5946
- }
5947
- mcpLogger.debug(`[${functionTag}] Tool executed successfully`, {
5948
- toolName,
5949
- executionTime,
5950
- memoryDelta,
5951
- circuitBreakerState: circuitBreaker?.getState(),
5952
- });
5953
- // Set span success attributes
5954
- // Check if result has isError flag (MCP tool error result)
5955
- // Also detect toolRegistry-wrapped errors that return { success: false }
5956
- const resultObj = result && typeof result === "object" ? result : undefined;
5957
- const isToolError = (resultObj && "isError" in resultObj && resultObj.isError === true) ||
5958
- (resultObj && "success" in resultObj && resultObj.success === false);
5959
- // NL-001: Count isError:true results as circuit breaker failures
5960
- // This ensures tools that return error results (not just thrown errors) are tracked
5961
- // TODO(NL-009): This records a failure AFTER the circuit breaker already recorded
5962
- // success inside `circuitBreaker.execute()`. The correct fix is to check `isToolError`
5963
- // inside the execute callback and throw before returning, so the breaker never sees
5964
- // success. Deferred because moving the check inside the callback requires restructuring
5965
- // the retry/timeout wrapper chain and is high-risk for a hot-path change.
5966
- if (isToolError && circuitBreaker) {
5967
- // Record a failure by executing a rejected promise through the breaker
5968
- try {
5969
- await circuitBreaker.execute(async () => {
5970
- throw new Error(`Tool ${toolName} returned isError:true`);
5971
- });
5972
- }
5973
- catch {
5974
- // Expected — we intentionally triggered the failure recording
5975
- }
5976
- mcpLogger.debug(`[${functionTag}] Circuit breaker failure recorded for isError result`, {
5977
- toolName,
5978
- circuitBreakerState: circuitBreaker.getState(),
5979
- circuitBreakerFailures: circuitBreaker.getFailureCount(),
5980
- });
5981
- }
5982
- // NL-002 + NL-003: Format and capture MCP error results
5983
- if (isToolError) {
5984
- const resultObj = result;
5985
- const contentArr = resultObj.content;
5986
- const errorText = contentArr
5987
- ?.filter((c) => c.type === "text" && c.text)
5988
- .map((c) => c.text)
5989
- .join(" ") || (typeof resultObj.error === "string" ? resultObj.error : "Unknown error");
5990
- const errorCategory = classifyMcpErrorMessage(errorText);
5991
- const prefix = `[TOOL_ERROR: ${toolName} failed (${errorCategory})] `;
5992
- // NL-002: Clone content array to avoid mutating shared objects, then prefix error
5993
- if (contentArr && Array.isArray(contentArr)) {
5994
- const clonedContent = contentArr.map((c) => ({ ...c }));
5995
- for (const content of clonedContent) {
5996
- if (content.type === "text" && content.text) {
5997
- content.text = prefix + content.text;
5998
- break; // Only prefix the first text content
5999
- }
6000
- }
6001
- resultObj.content = clonedContent;
6002
- }
6003
- // NL-003: Capture error details in span attributes for telemetry
6004
- toolSpan.setAttribute("tool.error.message", errorText.substring(0, 500));
6005
- toolSpan.setAttribute("tool.error.category", errorCategory);
6006
- toolSpan.setStatus({
6007
- code: SpanStatusCode.ERROR,
6008
- message: `MCP tool returned isError: ${errorText.substring(0, 200)}`,
6009
- });
6010
- if (metrics) {
6011
- metrics.failedExecutions++;
6012
- const prevSuccessful = metrics.successfulExecutions;
6013
- metrics.successfulExecutions = Math.max(0, metrics.successfulExecutions - 1);
6014
- // Recompute averageExecutionTime: back out this execution's duration
6015
- // which was incorrectly included as a success
6016
- if (prevSuccessful > 1) {
6017
- metrics.averageExecutionTime =
6018
- (metrics.averageExecutionTime * prevSuccessful - executionTime) / (prevSuccessful - 1);
6019
- }
6020
- else {
6021
- // No remaining successful executions, reset to 0
6022
- metrics.averageExecutionTime = 0;
6023
- }
6024
- const mappedCategory = mcpCategoryToErrorCategory(errorCategory);
6025
- metrics.errorCategories[mappedCategory] = (metrics.errorCategories[mappedCategory] || 0) + 1;
6026
- }
6027
- }
6028
- // Emit tool end event AFTER isError check so success flag is correct
6029
- this.emitToolEndEvent(toolName, executionStartTime, !isToolError, result);
6030
- toolSpan.setAttribute("tool.result.status", isToolError ? "error" : "success");
6031
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6032
- return result;
6033
- }
6034
- catch (error) {
6035
- // Update failure metrics
6036
- if (metrics) {
6037
- metrics.failedExecutions++;
6038
- }
6039
- const executionTime = Date.now() - executionStartTime;
6040
- // Circuit breaker open: return a structured non-retryable isError result
6041
- // so the AI model understands the tool is temporarily unavailable.
6042
- // Log at warn (not error) since this is expected circuit breaker behavior.
6043
- if (error instanceof CircuitBreakerOpenError) {
6044
- mcpLogger.warn(`[${functionTag}] Tool blocked by circuit breaker: ${toolName}`, {
6045
- toolName,
6046
- breakerState: error.breakerState,
6047
- retryAfter: error.retryAfter,
6048
- retryAfterMs: error.retryAfterMs,
6049
- failureCount: error.failureCount,
6050
- executionTime,
6051
- });
6052
- if (metrics) {
6053
- const category = ErrorCategory.EXECUTION;
6054
- metrics.errorCategories[category] = (metrics.errorCategories[category] || 0) + 1;
6055
- }
6056
- // Emit tool end event for circuit breaker open
6057
- this.emitToolEndEvent(toolName, executionStartTime, false, undefined);
6058
- toolSpan.setAttribute("tool.result.status", "circuit_breaker_open");
6059
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6060
- toolSpan.setAttribute("tool.circuit_breaker.state", error.breakerState);
6061
- toolSpan.setAttribute("tool.circuit_breaker.retry_after_ms", error.retryAfterMs);
6062
- toolSpan.setAttribute("tool.circuit_breaker.failure_count", error.failureCount);
6063
- toolSpan.setStatus({
6064
- code: SpanStatusCode.ERROR,
6065
- message: `Circuit breaker open for ${toolName}: ${error.message}`,
6066
- });
6067
- // Return an isError tool result so the AI can inform the user
6068
- // instead of throwing, which would cause a generic retry
6069
- return {
6070
- isError: true,
6071
- content: [
6072
- {
6073
- type: "text",
6074
- text: `TOOL TEMPORARILY UNAVAILABLE: "${toolName}" has been disabled after ` +
6075
- `${error.failureCount} failures. ` +
6076
- `This is a circuit breaker protection — do NOT retry this tool. ` +
6077
- `It will become available again after ${Math.ceil(error.retryAfterMs / 1000)} seconds ` +
6078
- `(at ${error.retryAfter}). ` +
6079
- `Instead, inform the user that the operation failed and suggest trying again later.`,
6080
- },
6081
- ],
6082
- };
6083
- }
6084
- // Create structured error
6085
- let structuredError;
6086
- if (error instanceof NeuroLinkError) {
6087
- structuredError = error;
6088
- }
6089
- else if (error instanceof Error) {
6090
- // Categorize the error based on the message
6091
- if (error.message.includes("timeout")) {
6092
- structuredError = ErrorFactory.toolTimeout(toolName, finalOptions.timeout);
6093
- }
6094
- else if (error.message.includes("not found")) {
6095
- const availableTools = await this.getAllAvailableTools();
6096
- structuredError = ErrorFactory.toolNotFound(toolName, extractToolNames(availableTools.map((t) => ({ name: t.name }))));
6097
- }
6098
- else if (error.message.includes("validation") || error.message.includes("parameter")) {
6099
- structuredError = ErrorFactory.invalidParameters(toolName, error, params);
6100
- }
6101
- else if (error.message.includes("network") || error.message.includes("connection")) {
6102
- structuredError = ErrorFactory.networkError(toolName, error);
6103
- }
6104
- else {
6105
- structuredError = ErrorFactory.toolExecutionFailed(toolName, error);
6106
- }
6107
- }
6108
- else {
6109
- structuredError = ErrorFactory.toolExecutionFailed(toolName, new Error(String(error)));
6110
- }
6111
- if (metrics) {
6112
- const category = structuredError.category || ErrorCategory.EXECUTION;
6113
- metrics.errorCategories[category] = (metrics.errorCategories[category] || 0) + 1;
6139
+ }
6140
+ catch {
6141
+ // Expected intentionally records the failure
6142
+ }
6143
+ mcpLogger.debug(`[${executionContext.functionTag}] Circuit breaker failure recorded for isError result`, {
6144
+ toolName,
6145
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6146
+ circuitBreakerFailures: prepared.circuitBreaker.getFailureCount(),
6147
+ });
6148
+ const contentArr = resultObj?.content;
6149
+ const errorText = contentArr
6150
+ ?.filter((content) => content.type === "text" && content.text)
6151
+ .map((content) => content.text)
6152
+ .join(" ") ||
6153
+ (typeof resultObj?.error === "string"
6154
+ ? resultObj.error
6155
+ : "Unknown error");
6156
+ const errorCategory = classifyMcpErrorMessage(errorText);
6157
+ const prefix = `[TOOL_ERROR: ${toolName} failed (${errorCategory})] `;
6158
+ if (resultObj && Array.isArray(contentArr)) {
6159
+ const clonedContent = contentArr.map((content) => ({ ...content }));
6160
+ for (const content of clonedContent) {
6161
+ if (content.type === "text" && content.text) {
6162
+ content.text = prefix + content.text;
6163
+ break;
6114
6164
  }
6115
- // Emit tool end event BEFORE the error event.
6116
- // Node.js EventEmitter throws on unhandled 'error' events,
6117
- // which would prevent tool:end from being emitted.
6118
- this.emitToolEndEvent(toolName, executionStartTime, false, undefined, structuredError);
6119
- // Centralized error event emission
6120
- this.emitter.emit("error", structuredError);
6121
- // Add execution context to structured error
6122
- structuredError = new NeuroLinkError({
6123
- ...structuredError,
6124
- context: {
6125
- ...structuredError.context,
6126
- executionTime,
6127
- params,
6128
- options: finalOptions,
6129
- circuitBreakerState: circuitBreaker?.getState(),
6130
- circuitBreakerFailures: circuitBreaker?.getFailureCount(),
6131
- metrics: { ...metrics },
6132
- },
6133
- });
6134
- // Log structured error
6135
- logStructuredError(structuredError);
6136
- // Record error on span
6137
- toolSpan.setAttribute("tool.result.status", "error");
6138
- toolSpan.setAttribute("tool.duration_ms", executionTime);
6139
- toolSpan.recordException(structuredError);
6140
- toolSpan.setStatus({
6141
- code: SpanStatusCode.ERROR,
6142
- message: structuredError.message,
6143
- });
6144
- throw structuredError;
6145
6165
  }
6166
+ resultObj.content = clonedContent;
6146
6167
  }
6147
- catch (outerError) {
6148
- // If the error was not already recorded on the span (from inner catch), record it
6149
- if (!(outerError instanceof NeuroLinkError)) {
6150
- const errMsg = outerError instanceof Error ? outerError.message : String(outerError);
6151
- toolSpan.recordException(outerError instanceof Error ? outerError : new Error(errMsg));
6152
- toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg });
6153
- }
6154
- throw outerError;
6168
+ toolSpan.setAttribute("tool.error.message", errorText.substring(0, 500));
6169
+ toolSpan.setAttribute("tool.error.category", errorCategory);
6170
+ toolSpan.setStatus({
6171
+ code: SpanStatusCode.ERROR,
6172
+ message: `MCP tool returned isError: ${errorText.substring(0, 200)}`,
6173
+ });
6174
+ prepared.metrics.failedExecutions++;
6175
+ const prevSuccessful = prepared.metrics.successfulExecutions;
6176
+ prepared.metrics.successfulExecutions = Math.max(0, prepared.metrics.successfulExecutions - 1);
6177
+ prepared.metrics.averageExecutionTime =
6178
+ prevSuccessful > 1
6179
+ ? (prepared.metrics.averageExecutionTime * prevSuccessful -
6180
+ executionTime) /
6181
+ (prevSuccessful - 1)
6182
+ : 0;
6183
+ const mappedCategory = mcpCategoryToErrorCategory(errorCategory);
6184
+ prepared.metrics.errorCategories[mappedCategory] =
6185
+ (prepared.metrics.errorCategories[mappedCategory] || 0) + 1;
6186
+ }
6187
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, !isToolError, result);
6188
+ toolSpan.setAttribute("tool.result.status", isToolError ? "error" : "success");
6189
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6190
+ return result;
6191
+ }
6192
+ async handleFailedToolExecution(toolName, params, error, prepared, executionContext, toolSpan) {
6193
+ prepared.metrics.failedExecutions++;
6194
+ const executionTime = Date.now() - executionContext.executionStartTime;
6195
+ if (error instanceof CircuitBreakerOpenError) {
6196
+ mcpLogger.warn(`[${executionContext.functionTag}] Tool blocked by circuit breaker: ${toolName}`, {
6197
+ toolName,
6198
+ breakerState: error.breakerState,
6199
+ retryAfter: error.retryAfter,
6200
+ retryAfterMs: error.retryAfterMs,
6201
+ failureCount: error.failureCount,
6202
+ executionTime,
6203
+ });
6204
+ prepared.metrics.errorCategories[ErrorCategory.EXECUTION] =
6205
+ (prepared.metrics.errorCategories[ErrorCategory.EXECUTION] || 0) + 1;
6206
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, false, undefined);
6207
+ toolSpan.setAttribute("tool.result.status", "circuit_breaker_open");
6208
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6209
+ toolSpan.setAttribute("tool.circuit_breaker.state", error.breakerState);
6210
+ toolSpan.setAttribute("tool.circuit_breaker.retry_after_ms", error.retryAfterMs);
6211
+ toolSpan.setAttribute("tool.circuit_breaker.failure_count", error.failureCount);
6212
+ toolSpan.setStatus({
6213
+ code: SpanStatusCode.ERROR,
6214
+ message: `Circuit breaker open for ${toolName}: ${error.message}`,
6215
+ });
6216
+ return {
6217
+ isError: true,
6218
+ content: [
6219
+ {
6220
+ type: "text",
6221
+ text: `TOOL TEMPORARILY UNAVAILABLE: "${toolName}" has been disabled after ` +
6222
+ `${error.failureCount} failures. ` +
6223
+ `This is a circuit breaker protection — do NOT retry this tool. ` +
6224
+ `It will become available again after ${Math.ceil(error.retryAfterMs / 1000)} seconds ` +
6225
+ `(at ${error.retryAfter}). ` +
6226
+ `Instead, inform the user that the operation failed and suggest trying again later.`,
6227
+ },
6228
+ ],
6229
+ };
6230
+ }
6231
+ let structuredError;
6232
+ if (error instanceof NeuroLinkError) {
6233
+ structuredError = error;
6234
+ }
6235
+ else if (error instanceof Error) {
6236
+ if (error.message.includes("timeout")) {
6237
+ structuredError = ErrorFactory.toolTimeout(toolName, prepared.finalOptions.timeout);
6155
6238
  }
6156
- finally {
6157
- toolSpan.end();
6239
+ else if (error.message.includes("not found")) {
6240
+ const availableTools = await this.getAllAvailableTools();
6241
+ structuredError = ErrorFactory.toolNotFound(toolName, extractToolNames(availableTools.map((tool) => ({ name: tool.name }))));
6242
+ }
6243
+ else if (error.message.includes("validation") ||
6244
+ error.message.includes("parameter")) {
6245
+ structuredError = ErrorFactory.invalidParameters(toolName, error, params);
6246
+ }
6247
+ else if (error.message.includes("network") ||
6248
+ error.message.includes("connection")) {
6249
+ structuredError = ErrorFactory.networkError(toolName, error);
6250
+ }
6251
+ else {
6252
+ structuredError = ErrorFactory.toolExecutionFailed(toolName, error);
6158
6253
  }
6254
+ }
6255
+ else {
6256
+ structuredError = ErrorFactory.toolExecutionFailed(toolName, new Error(String(error)));
6257
+ }
6258
+ const category = structuredError.category || ErrorCategory.EXECUTION;
6259
+ prepared.metrics.errorCategories[category] =
6260
+ (prepared.metrics.errorCategories[category] || 0) + 1;
6261
+ this.emitToolEndEvent(toolName, executionContext.executionStartTime, false, undefined, structuredError);
6262
+ this.emitter.emit("error", structuredError);
6263
+ structuredError = new NeuroLinkError({
6264
+ ...structuredError,
6265
+ context: {
6266
+ ...structuredError.context,
6267
+ executionTime,
6268
+ params,
6269
+ options: prepared.finalOptions,
6270
+ circuitBreakerState: prepared.circuitBreaker.getState(),
6271
+ circuitBreakerFailures: prepared.circuitBreaker.getFailureCount(),
6272
+ metrics: { ...prepared.metrics },
6273
+ },
6159
6274
  });
6275
+ logStructuredError(structuredError);
6276
+ toolSpan.setAttribute("tool.result.status", "error");
6277
+ toolSpan.setAttribute("tool.duration_ms", executionTime);
6278
+ toolSpan.recordException(structuredError);
6279
+ toolSpan.setStatus({
6280
+ code: SpanStatusCode.ERROR,
6281
+ message: structuredError.message,
6282
+ });
6283
+ throw structuredError;
6160
6284
  }
6161
6285
  /**
6162
6286
  * Internal tool execution method with MCP enhancements wired in:
@@ -6234,7 +6358,9 @@ Current user's request: ${currentInput}`;
6234
6358
  inputSchema: {},
6235
6359
  };
6236
6360
  const decision = this.mcpToolRouter.route(mcpTool);
6237
- externalTool = matchingTools.find((t) => t.serverId === decision.serverId) || matchingTools[0];
6361
+ externalTool =
6362
+ matchingTools.find((t) => t.serverId === decision.serverId) ||
6363
+ matchingTools[0];
6238
6364
  logger.debug(`[${functionTag}] Router selected server: ${decision.serverId}`, {
6239
6365
  strategy: decision.strategy,
6240
6366
  confidence: decision.confidence,
@@ -6290,7 +6416,10 @@ Current user's request: ${currentInput}`;
6290
6416
  });
6291
6417
  const result = (await this.toolRegistry.executeTool(toolName, params, context));
6292
6418
  // Check if result indicates a failure and emit error event
6293
- if (result && typeof result === "object" && "success" in result && result.success === false) {
6419
+ if (result &&
6420
+ typeof result === "object" &&
6421
+ "success" in result &&
6422
+ result.success === false) {
6294
6423
  const errorMessage = result.error || "Tool execution failed";
6295
6424
  const errorToEmit = new Error(errorMessage);
6296
6425
  this.emitter.emit("error", errorToEmit);
@@ -6328,7 +6457,10 @@ Current user's request: ${currentInput}`;
6328
6457
  execute: async () => ({}),
6329
6458
  }
6330
6459
  : undefined;
6331
- if (toolStubForRetry && isSafeToRetry(toolStubForRetry) && error instanceof Error && isRetriableError(error)) {
6460
+ if (toolStubForRetry &&
6461
+ isSafeToRetry(toolStubForRetry) &&
6462
+ error instanceof Error &&
6463
+ isRetriableError(error)) {
6332
6464
  logger.debug(`[${functionTag}] Tool ${toolName} is safe to retry, attempting once more`);
6333
6465
  try {
6334
6466
  const retryResult = await executeWithMiddleware(executeCore);
@@ -6373,7 +6505,8 @@ Current user's request: ${currentInput}`;
6373
6505
  }
6374
6506
  async getAllAvailableTools() {
6375
6507
  // Return from cache if available and not stale
6376
- if (this.toolCache && Date.now() - this.toolCache.timestamp < this.toolCacheDuration) {
6508
+ if (this.toolCache &&
6509
+ Date.now() - this.toolCache.timestamp < this.toolCacheDuration) {
6377
6510
  logger.debug("Returning available tools from cache");
6378
6511
  return this.toolCache.tools;
6379
6512
  }
@@ -6454,7 +6587,9 @@ Current user's request: ${currentInput}`;
6454
6587
  if (!allTools.has(tool.name)) {
6455
6588
  const optimizedTool = optimizeToolForCollection(tool, {
6456
6589
  category: detectCategory({
6457
- existingCategory: typeof tool.metadata?.category === "string" ? tool.metadata.category : undefined,
6590
+ existingCategory: typeof tool.metadata?.category === "string"
6591
+ ? tool.metadata.category
6592
+ : undefined,
6458
6593
  isExternal: true,
6459
6594
  serverId: tool.serverId,
6460
6595
  }),
@@ -6610,7 +6745,9 @@ Current user's request: ${currentInput}`;
6610
6745
  status: "failed",
6611
6746
  configured: false,
6612
6747
  authenticated: false,
6613
- error: error instanceof Error ? error.message : "Ollama service not running",
6748
+ error: error instanceof Error
6749
+ ? error.message
6750
+ : "Ollama service not running",
6614
6751
  responseTime: Date.now() - startTime,
6615
6752
  };
6616
6753
  }
@@ -6733,7 +6870,9 @@ Current user's request: ${currentInput}`;
6733
6870
  inMemoryServerInfos.length +
6734
6871
  builtInServerInfos.length +
6735
6872
  autoDiscoveredServerInfos.length;
6736
- const availableServers = externalStats.connectedServers + inMemoryServerInfos.length + builtInServerInfos.length; // in-memory and built-in always available
6873
+ const availableServers = externalStats.connectedServers +
6874
+ inMemoryServerInfos.length +
6875
+ builtInServerInfos.length; // in-memory and built-in always available
6737
6876
  const totalTools = allTools.length + externalStats.totalTools;
6738
6877
  return {
6739
6878
  mcpInitialized: this.mcpInitialized,
@@ -6802,7 +6941,8 @@ Current user's request: ${currentInput}`;
6802
6941
  // Test external MCP servers
6803
6942
  const externalServer = this.externalServerManager.getServer(serverId);
6804
6943
  if (externalServer) {
6805
- return externalServer.status === "connected" && externalServer.client !== null;
6944
+ return (externalServer.status === "connected" &&
6945
+ externalServer.client !== null);
6806
6946
  }
6807
6947
  return false;
6808
6948
  }
@@ -6922,7 +7062,9 @@ Current user's request: ${currentInput}`;
6922
7062
  metrics[toolName] = {
6923
7063
  ...toolMetrics,
6924
7064
  errorCategories: { ...toolMetrics.errorCategories },
6925
- successRate: toolMetrics.totalExecutions > 0 ? toolMetrics.successfulExecutions / toolMetrics.totalExecutions : 0,
7065
+ successRate: toolMetrics.totalExecutions > 0
7066
+ ? toolMetrics.successfulExecutions / toolMetrics.totalExecutions
7067
+ : 0,
6926
7068
  };
6927
7069
  }
6928
7070
  return metrics;
@@ -6942,7 +7084,7 @@ Current user's request: ${currentInput}`;
6942
7084
  */
6943
7085
  getToolCircuitBreakerStatus() {
6944
7086
  const status = {};
6945
- for (const [toolName, circuitBreaker] of this.toolCircuitBreakers.entries()) {
7087
+ for (const [toolName, circuitBreaker,] of this.toolCircuitBreakers.entries()) {
6946
7088
  status[toolName] = {
6947
7089
  state: circuitBreaker.getState(),
6948
7090
  failureCount: circuitBreaker.getFailureCount(),
@@ -6995,7 +7137,8 @@ Current user's request: ${currentInput}`;
6995
7137
  ? metrics.successfulExecutions / metrics.totalExecutions
6996
7138
  : 0
6997
7139
  : 0;
6998
- const isHealthy = (!circuitBreaker || circuitBreaker.getState() === "closed") && successRate >= 0.8;
7140
+ const isHealthy = (!circuitBreaker || circuitBreaker.getState() === "closed") &&
7141
+ successRate >= 0.8;
6999
7142
  if (isHealthy) {
7000
7143
  healthyCount++;
7001
7144
  }
@@ -7036,7 +7179,9 @@ Current user's request: ${currentInput}`;
7036
7179
  successRate,
7037
7180
  averageExecutionTime: metrics?.averageExecutionTime || 0,
7038
7181
  lastExecutionTime: metrics?.lastExecutionTime || 0,
7039
- errorCategories: metrics?.errorCategories ? { ...metrics.errorCategories } : {},
7182
+ errorCategories: metrics?.errorCategories
7183
+ ? { ...metrics.errorCategories }
7184
+ : {},
7040
7185
  },
7041
7186
  circuitBreaker: {
7042
7187
  state: circuitBreaker?.getState() || "closed",
@@ -7188,7 +7333,8 @@ Current user's request: ${currentInput}`;
7188
7333
  */
7189
7334
  async storeToolExecutions(sessionId, userId, toolCalls, toolResults, currentTime) {
7190
7335
  // Check if tools are not empty
7191
- const hasToolData = (toolCalls && toolCalls.length > 0) || (toolResults && toolResults.length > 0);
7336
+ const hasToolData = (toolCalls && toolCalls.length > 0) ||
7337
+ (toolResults && toolResults.length > 0);
7192
7338
  if (!hasToolData) {
7193
7339
  logger.debug("Tool execution storage skipped", {
7194
7340
  hasToolData,
@@ -7198,7 +7344,8 @@ Current user's request: ${currentInput}`;
7198
7344
  return;
7199
7345
  }
7200
7346
  // Type guard to ensure it's Redis conversation memory manager
7201
- const redisMemory = this.conversationMemory;
7347
+ const redisMemory = this
7348
+ .conversationMemory;
7202
7349
  try {
7203
7350
  await redisMemory.storeToolExecution(sessionId, userId, toolCalls, toolResults, currentTime);
7204
7351
  }
@@ -7217,7 +7364,9 @@ Current user's request: ${currentInput}`;
7217
7364
  */
7218
7365
  isToolExecutionStorageAvailable() {
7219
7366
  const isRedisStorage = process.env.STORAGE_TYPE === "redis";
7220
- const hasRedisConversationMemory = this.conversationMemory && this.conversationMemory.constructor.name === "RedisConversationMemoryManager";
7367
+ const hasRedisConversationMemory = this.conversationMemory &&
7368
+ this.conversationMemory.constructor.name ===
7369
+ "RedisConversationMemoryManager";
7221
7370
  return !!(isRedisStorage && hasRedisConversationMemory);
7222
7371
  }
7223
7372
  /**
@@ -7736,7 +7885,8 @@ Current user's request: ${currentInput}`;
7736
7885
  return null;
7737
7886
  }
7738
7887
  // Check for explicit annotations set on the tool first
7739
- const explicitAnnotations = toolInfo.tool.annotations;
7888
+ const explicitAnnotations = toolInfo.tool
7889
+ .annotations;
7740
7890
  // Infer annotations from the tool name/description as fallback
7741
7891
  const inferredAnnotations = inferAnnotations({
7742
7892
  name: toolInfo.tool.name,
@@ -7768,7 +7918,9 @@ Current user's request: ${currentInput}`;
7768
7918
  const result = await this.externalServerManager.executeTool(tool.serverId, tool.name, params, { timeout: 30000 });
7769
7919
  mcpLogger.debug(`[NeuroLink] External MCP tool execution result: ${tool.name}`, {
7770
7920
  success: !!result,
7771
- hasData: !!(result && typeof result === "object" && "content" in result),
7921
+ hasData: !!(result &&
7922
+ typeof result === "object" &&
7923
+ "content" in result),
7772
7924
  });
7773
7925
  return result;
7774
7926
  }
@@ -8184,7 +8336,9 @@ Current user's request: ${currentInput}`;
8184
8336
  logger.debug("[NeuroLink] OpenTelemetry shutdown successfully");
8185
8337
  }
8186
8338
  catch (error) {
8187
- const err = error instanceof Error ? error : new Error(`OpenTelemetry shutdown error: ${String(error)}`);
8339
+ const err = error instanceof Error
8340
+ ? error
8341
+ : new Error(`OpenTelemetry shutdown error: ${String(error)}`);
8188
8342
  cleanupErrors.push(err);
8189
8343
  logger.warn("[NeuroLink] Error shutting down OpenTelemetry:", error);
8190
8344
  }
@@ -8196,7 +8350,9 @@ Current user's request: ${currentInput}`;
8196
8350
  logger.debug("[NeuroLink] External MCP servers shutdown successfully");
8197
8351
  }
8198
8352
  catch (error) {
8199
- const err = error instanceof Error ? error : new Error(`External server shutdown error: ${String(error)}`);
8353
+ const err = error instanceof Error
8354
+ ? error
8355
+ : new Error(`External server shutdown error: ${String(error)}`);
8200
8356
  cleanupErrors.push(err);
8201
8357
  logger.warn("[NeuroLink] Error shutting down external MCP servers:", error);
8202
8358
  }
@@ -8210,7 +8366,9 @@ Current user's request: ${currentInput}`;
8210
8366
  logger.debug("[NeuroLink] Event listeners removed successfully");
8211
8367
  }
8212
8368
  catch (error) {
8213
- const err = error instanceof Error ? error : new Error(`Event emitter cleanup error: ${String(error)}`);
8369
+ const err = error instanceof Error
8370
+ ? error
8371
+ : new Error(`Event emitter cleanup error: ${String(error)}`);
8214
8372
  cleanupErrors.push(err);
8215
8373
  logger.warn("[NeuroLink] Error removing event listeners:", error);
8216
8374
  }
@@ -8223,7 +8381,9 @@ Current user's request: ${currentInput}`;
8223
8381
  logger.debug("[NeuroLink] Circuit breakers cleared successfully");
8224
8382
  }
8225
8383
  catch (error) {
8226
- const err = error instanceof Error ? error : new Error(`Circuit breaker cleanup error: ${String(error)}`);
8384
+ const err = error instanceof Error
8385
+ ? error
8386
+ : new Error(`Circuit breaker cleanup error: ${String(error)}`);
8227
8387
  cleanupErrors.push(err);
8228
8388
  logger.warn("[NeuroLink] Error clearing circuit breakers:", error);
8229
8389
  }
@@ -8260,7 +8420,9 @@ Current user's request: ${currentInput}`;
8260
8420
  logger.debug("[NeuroLink] Maps and caches cleared successfully");
8261
8421
  }
8262
8422
  catch (error) {
8263
- const err = error instanceof Error ? error : new Error(`Cache cleanup error: ${String(error)}`);
8423
+ const err = error instanceof Error
8424
+ ? error
8425
+ : new Error(`Cache cleanup error: ${String(error)}`);
8264
8426
  cleanupErrors.push(err);
8265
8427
  logger.warn("[NeuroLink] Error clearing caches:", error);
8266
8428
  }
@@ -8286,7 +8448,9 @@ Current user's request: ${currentInput}`;
8286
8448
  logger.debug("[NeuroLink] Initialization state reset successfully");
8287
8449
  }
8288
8450
  catch (error) {
8289
- const err = error instanceof Error ? error : new Error(`State reset error: ${String(error)}`);
8451
+ const err = error instanceof Error
8452
+ ? error
8453
+ : new Error(`State reset error: ${String(error)}`);
8290
8454
  cleanupErrors.push(err);
8291
8455
  logger.warn("[NeuroLink] Error resetting state:", error);
8292
8456
  }
@@ -8330,8 +8494,11 @@ Current user's request: ${currentInput}`;
8330
8494
  }
8331
8495
  const compactor = new ContextCompactor({
8332
8496
  ...config,
8333
- summarizationProvider: config?.summarizationProvider ?? this.conversationMemoryConfig?.conversationMemory?.summarizationProvider,
8334
- summarizationModel: config?.summarizationModel ?? this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
8497
+ summarizationProvider: config?.summarizationProvider ??
8498
+ this.conversationMemoryConfig?.conversationMemory
8499
+ ?.summarizationProvider,
8500
+ summarizationModel: config?.summarizationModel ??
8501
+ this.conversationMemoryConfig?.conversationMemory?.summarizationModel,
8335
8502
  });
8336
8503
  // Use actual context window to determine target, not arbitrary heuristic
8337
8504
  const budgetInfo = checkContextBudget({
@@ -8406,7 +8573,8 @@ Current user's request: ${currentInput}`;
8406
8573
  let provider;
8407
8574
  let providerType;
8408
8575
  // Duck-type check: direct MastraAuthProvider instance
8409
- if ("authenticateToken" in config && typeof config.authenticateToken === "function") {
8576
+ if ("authenticateToken" in config &&
8577
+ typeof config.authenticateToken === "function") {
8410
8578
  provider = config;
8411
8579
  providerType = provider.type;
8412
8580
  }
@@ -8450,7 +8618,8 @@ Current user's request: ${currentInput}`;
8450
8618
  }
8451
8619
  finally {
8452
8620
  if (this.authInitPromise &&
8453
- (this.pendingAuthConfig === undefined || this.pendingAuthConfig === pendingAuthConfig)) {
8621
+ (this.pendingAuthConfig === undefined ||
8622
+ this.pendingAuthConfig === pendingAuthConfig)) {
8454
8623
  this.authInitPromise = undefined;
8455
8624
  }
8456
8625
  }