@juspay/neurolink 9.23.0 → 9.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +10 -13
  3. package/dist/adapters/tts/googleTTSHandler.js +26 -1
  4. package/dist/adapters/video/vertexVideoHandler.js +23 -17
  5. package/dist/cli/commands/config.d.ts +3 -3
  6. package/dist/cli/commands/observability.d.ts +53 -0
  7. package/dist/cli/commands/observability.js +453 -0
  8. package/dist/cli/commands/telemetry.d.ts +63 -0
  9. package/dist/cli/commands/telemetry.js +689 -0
  10. package/dist/cli/factories/commandFactory.d.ts +34 -0
  11. package/dist/cli/factories/commandFactory.js +321 -116
  12. package/dist/cli/parser.js +6 -9
  13. package/dist/cli/utils/formatters.d.ts +13 -0
  14. package/dist/cli/utils/formatters.js +23 -0
  15. package/dist/constants/contextWindows.js +6 -0
  16. package/dist/constants/enums.d.ts +6 -0
  17. package/dist/constants/enums.js +8 -2
  18. package/dist/context/budgetChecker.js +75 -48
  19. package/dist/context/contextCompactor.js +135 -127
  20. package/dist/core/baseProvider.d.ts +5 -0
  21. package/dist/core/baseProvider.js +158 -102
  22. package/dist/core/conversationMemoryInitializer.js +7 -4
  23. package/dist/core/conversationMemoryManager.d.ts +2 -0
  24. package/dist/core/conversationMemoryManager.js +6 -2
  25. package/dist/core/modules/GenerationHandler.d.ts +2 -2
  26. package/dist/core/modules/GenerationHandler.js +12 -12
  27. package/dist/evaluation/ragasEvaluator.js +39 -19
  28. package/dist/evaluation/scoring.js +46 -20
  29. package/dist/features/ppt/index.d.ts +1 -1
  30. package/dist/features/ppt/index.js +1 -1
  31. package/dist/features/ppt/presentationOrchestrator.js +23 -0
  32. package/dist/features/ppt/slideGenerator.js +13 -0
  33. package/dist/features/ppt/slideRenderers.d.ts +1 -1
  34. package/dist/features/ppt/slideRenderers.js +6 -4
  35. package/dist/features/ppt/slideTypeInference.d.ts +1 -1
  36. package/dist/features/ppt/slideTypeInference.js +75 -73
  37. package/dist/files/fileTools.d.ts +6 -6
  38. package/dist/index.d.ts +46 -12
  39. package/dist/index.js +79 -17
  40. package/dist/lib/adapters/tts/googleTTSHandler.js +26 -1
  41. package/dist/lib/adapters/video/vertexVideoHandler.js +23 -17
  42. package/dist/lib/constants/contextWindows.js +6 -0
  43. package/dist/lib/constants/enums.d.ts +6 -0
  44. package/dist/lib/constants/enums.js +8 -2
  45. package/dist/lib/context/budgetChecker.js +75 -48
  46. package/dist/lib/context/contextCompactor.js +135 -127
  47. package/dist/lib/core/baseProvider.d.ts +5 -0
  48. package/dist/lib/core/baseProvider.js +158 -102
  49. package/dist/lib/core/conversationMemoryInitializer.js +7 -4
  50. package/dist/lib/core/conversationMemoryManager.d.ts +2 -0
  51. package/dist/lib/core/conversationMemoryManager.js +6 -2
  52. package/dist/lib/core/modules/GenerationHandler.d.ts +2 -2
  53. package/dist/lib/core/modules/GenerationHandler.js +12 -12
  54. package/dist/lib/evaluation/ragasEvaluator.js +39 -19
  55. package/dist/lib/evaluation/scoring.js +46 -20
  56. package/dist/lib/features/ppt/index.d.ts +1 -1
  57. package/dist/lib/features/ppt/index.js +1 -1
  58. package/dist/lib/features/ppt/presentationOrchestrator.js +23 -0
  59. package/dist/lib/features/ppt/slideGenerator.js +13 -0
  60. package/dist/lib/features/ppt/slideRenderers.d.ts +1 -1
  61. package/dist/lib/features/ppt/slideRenderers.js +6 -4
  62. package/dist/lib/features/ppt/slideTypeInference.d.ts +1 -1
  63. package/dist/lib/features/ppt/slideTypeInference.js +75 -73
  64. package/dist/lib/files/fileTools.d.ts +6 -6
  65. package/dist/lib/index.d.ts +46 -12
  66. package/dist/lib/index.js +79 -17
  67. package/dist/lib/mcp/httpRateLimiter.js +39 -12
  68. package/dist/lib/mcp/httpRetryHandler.js +22 -1
  69. package/dist/lib/mcp/mcpClientFactory.js +13 -15
  70. package/dist/lib/memory/memoryRetrievalTools.js +22 -0
  71. package/dist/lib/neurolink.d.ts +64 -72
  72. package/dist/lib/neurolink.js +984 -566
  73. package/dist/lib/observability/exporterRegistry.d.ts +152 -0
  74. package/dist/lib/observability/exporterRegistry.js +414 -0
  75. package/dist/lib/observability/exporters/arizeExporter.d.ts +32 -0
  76. package/dist/lib/observability/exporters/arizeExporter.js +139 -0
  77. package/dist/lib/observability/exporters/baseExporter.d.ts +117 -0
  78. package/dist/lib/observability/exporters/baseExporter.js +191 -0
  79. package/dist/lib/observability/exporters/braintrustExporter.d.ts +30 -0
  80. package/dist/lib/observability/exporters/braintrustExporter.js +155 -0
  81. package/dist/lib/observability/exporters/datadogExporter.d.ts +37 -0
  82. package/dist/lib/observability/exporters/datadogExporter.js +197 -0
  83. package/dist/lib/observability/exporters/index.d.ts +13 -0
  84. package/dist/lib/observability/exporters/index.js +14 -0
  85. package/dist/lib/observability/exporters/laminarExporter.d.ts +48 -0
  86. package/dist/lib/observability/exporters/laminarExporter.js +303 -0
  87. package/dist/lib/observability/exporters/langfuseExporter.d.ts +47 -0
  88. package/dist/lib/observability/exporters/langfuseExporter.js +200 -0
  89. package/dist/lib/observability/exporters/langsmithExporter.d.ts +26 -0
  90. package/dist/lib/observability/exporters/langsmithExporter.js +124 -0
  91. package/dist/lib/observability/exporters/otelExporter.d.ts +39 -0
  92. package/dist/lib/observability/exporters/otelExporter.js +165 -0
  93. package/dist/lib/observability/exporters/posthogExporter.d.ts +48 -0
  94. package/dist/lib/observability/exporters/posthogExporter.js +288 -0
  95. package/dist/lib/observability/exporters/sentryExporter.d.ts +32 -0
  96. package/dist/lib/observability/exporters/sentryExporter.js +166 -0
  97. package/dist/lib/observability/index.d.ts +25 -0
  98. package/dist/lib/observability/index.js +32 -0
  99. package/dist/lib/observability/metricsAggregator.d.ts +260 -0
  100. package/dist/lib/observability/metricsAggregator.js +553 -0
  101. package/dist/lib/observability/otelBridge.d.ts +49 -0
  102. package/dist/lib/observability/otelBridge.js +132 -0
  103. package/dist/lib/observability/retryPolicy.d.ts +192 -0
  104. package/dist/lib/observability/retryPolicy.js +384 -0
  105. package/dist/lib/observability/sampling/index.d.ts +4 -0
  106. package/dist/lib/observability/sampling/index.js +5 -0
  107. package/dist/lib/observability/sampling/samplers.d.ts +116 -0
  108. package/dist/lib/observability/sampling/samplers.js +217 -0
  109. package/dist/lib/observability/spanProcessor.d.ts +129 -0
  110. package/dist/lib/observability/spanProcessor.js +288 -0
  111. package/dist/lib/observability/tokenTracker.d.ts +156 -0
  112. package/dist/lib/observability/tokenTracker.js +414 -0
  113. package/dist/lib/observability/types/exporterTypes.d.ts +250 -0
  114. package/dist/lib/observability/types/exporterTypes.js +6 -0
  115. package/dist/lib/observability/types/index.d.ts +6 -0
  116. package/dist/lib/observability/types/index.js +5 -0
  117. package/dist/lib/observability/types/spanTypes.d.ts +244 -0
  118. package/dist/lib/observability/types/spanTypes.js +93 -0
  119. package/dist/lib/observability/utils/index.d.ts +4 -0
  120. package/dist/lib/observability/utils/index.js +5 -0
  121. package/dist/lib/observability/utils/spanSerializer.d.ts +115 -0
  122. package/dist/lib/observability/utils/spanSerializer.js +287 -0
  123. package/dist/lib/providers/amazonSagemaker.d.ts +5 -4
  124. package/dist/lib/providers/amazonSagemaker.js +3 -4
  125. package/dist/lib/providers/googleVertex.d.ts +7 -0
  126. package/dist/lib/providers/googleVertex.js +80 -2
  127. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +0 -5
  128. package/dist/lib/rag/pipeline/RAGPipeline.js +122 -87
  129. package/dist/lib/rag/ragIntegration.js +30 -0
  130. package/dist/lib/rag/retrieval/hybridSearch.js +22 -0
  131. package/dist/lib/server/abstract/baseServerAdapter.js +51 -19
  132. package/dist/lib/server/middleware/common.js +44 -12
  133. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +2 -2
  134. package/dist/lib/services/server/ai/observability/instrumentation.js +10 -5
  135. package/dist/lib/types/cli.d.ts +18 -2
  136. package/dist/lib/types/conversationMemoryInterface.d.ts +2 -0
  137. package/dist/lib/types/generateTypes.d.ts +2 -2
  138. package/dist/lib/types/modelTypes.d.ts +18 -18
  139. package/dist/lib/types/providers.d.ts +5 -0
  140. package/dist/lib/utils/pricing.js +25 -1
  141. package/dist/lib/utils/ttsProcessor.js +74 -59
  142. package/dist/lib/workflow/config.d.ts +36 -36
  143. package/dist/lib/workflow/core/ensembleExecutor.js +10 -0
  144. package/dist/lib/workflow/core/judgeScorer.js +20 -2
  145. package/dist/lib/workflow/core/workflowRunner.js +34 -1
  146. package/dist/mcp/httpRateLimiter.js +39 -12
  147. package/dist/mcp/httpRetryHandler.js +22 -1
  148. package/dist/mcp/mcpClientFactory.js +13 -15
  149. package/dist/memory/memoryRetrievalTools.js +22 -0
  150. package/dist/neurolink.d.ts +64 -72
  151. package/dist/neurolink.js +984 -566
  152. package/dist/observability/FEATURE-STATUS.md +269 -0
  153. package/dist/observability/exporterRegistry.d.ts +152 -0
  154. package/dist/observability/exporterRegistry.js +413 -0
  155. package/dist/observability/exporters/arizeExporter.d.ts +32 -0
  156. package/dist/observability/exporters/arizeExporter.js +138 -0
  157. package/dist/observability/exporters/baseExporter.d.ts +117 -0
  158. package/dist/observability/exporters/baseExporter.js +190 -0
  159. package/dist/observability/exporters/braintrustExporter.d.ts +30 -0
  160. package/dist/observability/exporters/braintrustExporter.js +154 -0
  161. package/dist/observability/exporters/datadogExporter.d.ts +37 -0
  162. package/dist/observability/exporters/datadogExporter.js +196 -0
  163. package/dist/observability/exporters/index.d.ts +13 -0
  164. package/dist/observability/exporters/index.js +13 -0
  165. package/dist/observability/exporters/laminarExporter.d.ts +48 -0
  166. package/dist/observability/exporters/laminarExporter.js +302 -0
  167. package/dist/observability/exporters/langfuseExporter.d.ts +47 -0
  168. package/dist/observability/exporters/langfuseExporter.js +199 -0
  169. package/dist/observability/exporters/langsmithExporter.d.ts +26 -0
  170. package/dist/observability/exporters/langsmithExporter.js +123 -0
  171. package/dist/observability/exporters/otelExporter.d.ts +39 -0
  172. package/dist/observability/exporters/otelExporter.js +164 -0
  173. package/dist/observability/exporters/posthogExporter.d.ts +48 -0
  174. package/dist/observability/exporters/posthogExporter.js +287 -0
  175. package/dist/observability/exporters/sentryExporter.d.ts +32 -0
  176. package/dist/observability/exporters/sentryExporter.js +165 -0
  177. package/dist/observability/index.d.ts +25 -0
  178. package/dist/observability/index.js +31 -0
  179. package/dist/observability/metricsAggregator.d.ts +260 -0
  180. package/dist/observability/metricsAggregator.js +552 -0
  181. package/dist/observability/otelBridge.d.ts +49 -0
  182. package/dist/observability/otelBridge.js +131 -0
  183. package/dist/observability/retryPolicy.d.ts +192 -0
  184. package/dist/observability/retryPolicy.js +383 -0
  185. package/dist/observability/sampling/index.d.ts +4 -0
  186. package/dist/observability/sampling/index.js +4 -0
  187. package/dist/observability/sampling/samplers.d.ts +116 -0
  188. package/dist/observability/sampling/samplers.js +216 -0
  189. package/dist/observability/spanProcessor.d.ts +129 -0
  190. package/dist/observability/spanProcessor.js +287 -0
  191. package/dist/observability/tokenTracker.d.ts +156 -0
  192. package/dist/observability/tokenTracker.js +413 -0
  193. package/dist/observability/types/exporterTypes.d.ts +250 -0
  194. package/dist/observability/types/exporterTypes.js +5 -0
  195. package/dist/observability/types/index.d.ts +6 -0
  196. package/dist/observability/types/index.js +4 -0
  197. package/dist/observability/types/spanTypes.d.ts +244 -0
  198. package/dist/observability/types/spanTypes.js +92 -0
  199. package/dist/observability/utils/index.d.ts +4 -0
  200. package/dist/observability/utils/index.js +4 -0
  201. package/dist/observability/utils/spanSerializer.d.ts +115 -0
  202. package/dist/observability/utils/spanSerializer.js +286 -0
  203. package/dist/providers/amazonSagemaker.d.ts +5 -4
  204. package/dist/providers/amazonSagemaker.js +3 -4
  205. package/dist/providers/googleVertex.d.ts +7 -0
  206. package/dist/providers/googleVertex.js +80 -2
  207. package/dist/rag/pipeline/RAGPipeline.d.ts +0 -5
  208. package/dist/rag/pipeline/RAGPipeline.js +122 -87
  209. package/dist/rag/ragIntegration.js +30 -0
  210. package/dist/rag/retrieval/hybridSearch.js +22 -0
  211. package/dist/server/abstract/baseServerAdapter.js +51 -19
  212. package/dist/server/middleware/common.js +44 -12
  213. package/dist/services/server/ai/observability/instrumentation.d.ts +2 -2
  214. package/dist/services/server/ai/observability/instrumentation.js +10 -5
  215. package/dist/types/cli.d.ts +18 -2
  216. package/dist/types/conversationMemoryInterface.d.ts +2 -0
  217. package/dist/types/generateTypes.d.ts +2 -2
  218. package/dist/types/providers.d.ts +5 -0
  219. package/dist/utils/pricing.js +25 -1
  220. package/dist/utils/ttsProcessor.js +74 -59
  221. package/dist/workflow/config.d.ts +52 -52
  222. package/dist/workflow/core/ensembleExecutor.js +10 -0
  223. package/dist/workflow/core/judgeScorer.js +20 -2
  224. package/dist/workflow/core/workflowRunner.js +34 -1
  225. package/package.json +1 -1
@@ -13,15 +13,17 @@ try {
13
13
  catch {
14
14
  // Environment variables should be set externally in production
15
15
  }
16
+ import { SpanKind, SpanStatusCode } from "@opentelemetry/api";
17
+ import { AsyncLocalStorage } from "async_hooks";
16
18
  import { EventEmitter } from "events";
17
19
  import pLimit from "p-limit";
18
20
  import { ErrorCategory, ErrorSeverity } from "./constants/enums.js";
19
21
  import { CIRCUIT_BREAKER, CIRCUIT_BREAKER_RESET_MS, MEMORY_THRESHOLDS, NANOSECOND_TO_MS_DIVISOR, PERFORMANCE_THRESHOLDS, PROVIDER_TIMEOUTS, RETRY_ATTEMPTS, RETRY_DELAYS, TOOL_TIMEOUTS, } from "./constants/index.js";
20
22
  import { checkContextBudget } from "./context/budgetChecker.js";
21
23
  import { ContextCompactor, } from "./context/contextCompactor.js";
22
- import { isContextOverflowError, getContextOverflowProvider, parseProviderOverflowDetails, } from "./context/errorDetection.js";
23
- import { ContextBudgetExceededError } from "./context/errors.js";
24
24
  import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
25
+ import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
26
+ import { ContextBudgetExceededError } from "./context/errors.js";
25
27
  import { repairToolPairs } from "./context/toolPairRepair.js";
26
28
  import { SYSTEM_LIMITS } from "./core/constants.js";
27
29
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
@@ -34,11 +36,17 @@ import { ExternalServerManager } from "./mcp/externalServerManager.js";
34
36
  // Import direct tools server for automatic registration
35
37
  import { directToolsServer } from "./mcp/servers/agent/directToolsServer.js";
36
38
  import { MCPToolRegistry } from "./mcp/toolRegistry.js";
39
+ import { initializeHippocampus, } from "./memory/hippocampusInitializer.js";
37
40
  import { initializeMem0 } from "./memory/mem0Initializer.js";
38
41
  import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
39
- import { initializeHippocampus, } from "./memory/hippocampusInitializer.js";
42
+ import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
43
+ import { SpanStatus, SpanType } from "./observability/types/spanTypes.js";
44
+ import { SpanSerializer } from "./observability/utils/spanSerializer.js";
40
45
  import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
46
+ import { ATTR } from "./telemetry/attributes.js";
47
+ import { tracers } from "./telemetry/tracers.js";
41
48
  import { ConversationMemoryError } from "./types/conversation.js";
49
+ import { AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/errors.js";
42
50
  import { getConversationMessages, storeConversationTurn, } from "./utils/conversationMemory.js";
43
51
  // Enhanced error handling imports
44
52
  import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableError, logStructuredError, NeuroLinkError, withRetry, withTimeout, } from "./utils/errorHandling.js";
@@ -49,17 +57,13 @@ import { createCustomToolServerInfo, detectCategory, } from "./utils/mcpDefaults
49
57
  // Import orchestration components
50
58
  import { ModelRouter } from "./utils/modelRouter.js";
51
59
  import { getBestProvider } from "./utils/providerUtils.js";
60
+ import { NON_RETRYABLE_HTTP_STATUS_CODES } from "./utils/retryability.js";
52
61
  import { isZodSchema } from "./utils/schemaConversion.js";
53
62
  import { BinaryTaskClassifier } from "./utils/taskClassifier.js";
54
63
  // Tool detection and execution imports
55
64
  // Transformation utilities
56
65
  import { extractToolNames, optimizeToolForCollection, transformAvailableTools, transformParamsForLogging, transformToolExecutions, transformToolExecutionsForMCP, transformToolsForMCP, transformToolsToDescriptions, transformToolsToExpectedFormat, } from "./utils/transformationUtils.js";
57
- import { InvalidModelError, AuthenticationError, AuthorizationError, } from "./types/errors.js";
58
66
  import { isNonNullObject } from "./utils/typeUtils.js";
59
- import { NON_RETRYABLE_HTTP_STATUS_CODES } from "./utils/retryability.js";
60
- import { SpanKind, SpanStatusCode } from "@opentelemetry/api";
61
- import { tracers } from "./telemetry/tracers.js";
62
- import { ATTR } from "./telemetry/attributes.js";
63
67
  import { getWorkflow } from "./workflow/core/workflowRegistry.js";
64
68
  import { runWorkflow } from "./workflow/core/workflowRunner.js";
65
69
  /**
@@ -109,77 +113,11 @@ function isNonRetryableProviderError(error) {
109
113
  return false;
110
114
  }
111
115
  /**
112
- * NeuroLink - Universal AI Development Platform
113
- *
114
- * Main SDK class providing unified access to 14+ AI providers with enterprise features:
115
- * - Multi-provider support (OpenAI, Anthropic, Google AI Studio, Google Vertex, AWS Bedrock, etc.)
116
- * - MCP (Model Context Protocol) tool integration with 58+ external servers
117
- * - Human-in-the-Loop (HITL) security workflows for regulated industries
118
- * - Redis-based conversation memory and persistence
119
- * - Enterprise middleware system for monitoring and control
120
- * - Automatic provider fallback and retry logic
121
- * - Streaming with real-time token delivery
122
- * - Multimodal support (text, images, PDFs, CSV)
123
- *
124
- * @category Core
125
- *
126
- * @example Basic usage
127
- * ```typescript
128
- * import { NeuroLink } from '@juspay/neurolink';
129
- *
130
- * const neurolink = new NeuroLink();
131
- *
132
- * const result = await neurolink.generate({
133
- * input: { text: 'Explain quantum computing' },
134
- * provider: 'vertex',
135
- * model: 'gemini-3-flash'
136
- * });
137
- *
138
- * console.log(result.content);
139
- * ```
140
- *
141
- * @example With HITL security
142
- * ```typescript
143
- * const neurolink = new NeuroLink({
144
- * hitl: {
145
- * enabled: true,
146
- * requireApproval: ['writeFile', 'executeCode'],
147
- * confidenceThreshold: 0.85
148
- * }
149
- * });
150
- * ```
151
- *
152
- * @example With Redis memory
153
- * ```typescript
154
- * const neurolink = new NeuroLink({
155
- * conversationMemory: {
156
- * enabled: true,
157
- * redis: {
158
- * url: 'redis://localhost:6379'
159
- * }
160
- * }
161
- * });
162
- * ```
163
- *
164
- * @example With MCP tools
165
- * ```typescript
166
- * const neurolink = new NeuroLink();
167
- *
168
- * // Discover available tools
169
- * const tools = await neurolink.getAvailableTools();
170
- *
171
- * // Use tools in generation
172
- * const result = await neurolink.generate({
173
- * input: { text: 'Read the README.md file' },
174
- * tools: ['readFile']
175
- * });
176
- * ```
177
- *
178
- * @see {@link GenerateOptions} for generation options
179
- * @see {@link StreamOptions} for streaming options
180
- * @see {@link NeurolinkConstructorConfig} for configuration options
181
- * @since 1.0.0
116
+ * Module-level AsyncLocalStorage for per-request metrics trace context.
117
+ * Eliminates the race condition where overlapping generate/stream calls on the
118
+ * same NeuroLink instance would clobber each other's trace context.
182
119
  */
120
+ const metricsTraceContextStorage = new AsyncLocalStorage();
183
121
  export class NeuroLink {
184
122
  mcpInitialized = false;
185
123
  mcpInitPromise = null;
@@ -422,6 +360,15 @@ export class NeuroLink {
422
360
  * @throws {Error} When HITL configuration is invalid (if enabled)
423
361
  */
424
362
  observabilityConfig;
363
+ metricsAggregator = new MetricsAggregator();
364
+ /**
365
+ * Per-request metrics trace context backed by AsyncLocalStorage.
366
+ * Safe for concurrent requests on the same SDK instance.
367
+ * Context is set via metricsTraceContextStorage.run() in generate/stream.
368
+ */
369
+ get _metricsTraceContext() {
370
+ return metricsTraceContextStorage.getStore() ?? null;
371
+ }
425
372
  constructor(config) {
426
373
  this.toolRegistry = config?.toolRegistry || new MCPToolRegistry();
427
374
  this.fileRegistry = new FileReferenceRegistry();
@@ -444,6 +391,7 @@ export class NeuroLink {
444
391
  this.registerFileTools();
445
392
  this.registerMemoryRetrievalTools();
446
393
  this.initializeLangfuse(constructorId, constructorStartTime, constructorHrTimeStart);
394
+ this.initializeMetricsListeners();
447
395
  this.logConstructorComplete(constructorId, constructorStartTime, constructorHrTimeStart);
448
396
  }
449
397
  /**
@@ -666,9 +614,6 @@ export class NeuroLink {
666
614
  logger.debug("[NeuroLink] Skipping memory retrieval tools — requires Redis conversation memory");
667
615
  return;
668
616
  }
669
- // Defer registration until conversation memory is actually initialized
670
- // We register a placeholder that will use the lazy-initialized memory manager
671
- const self = this;
672
617
  const tools = {
673
618
  retrieve_context: {
674
619
  description: "Retrieve messages from conversation memory. Use this to access full tool " +
@@ -676,7 +621,7 @@ export class NeuroLink {
676
621
  "or search through conversation history.",
677
622
  execute: async (params) => {
678
623
  // Lazy access: conversationMemory is initialized on first generate() call
679
- const memoryManager = self.conversationMemory;
624
+ const memoryManager = this.conversationMemory;
680
625
  if (!memoryManager || !("getSessionRaw" in memoryManager)) {
681
626
  return {
682
627
  success: false,
@@ -1576,6 +1521,88 @@ Current user's request: ${currentInput}`;
1576
1521
  // Check if OpenTelemetry was initialized (by this or external app)
1577
1522
  return isOpenTelemetryInitialized();
1578
1523
  }
1524
+ /**
1525
+ * Get comprehensive telemetry status including Langfuse, OTel, and exporter health
1526
+ */
1527
+ getTelemetryStatus() {
1528
+ const langfuseConfig = this.observabilityConfig?.langfuse;
1529
+ const otelConfig = this.observabilityConfig?.openTelemetry;
1530
+ return {
1531
+ enabled: this.isTelemetryEnabled(),
1532
+ langfuse: langfuseConfig
1533
+ ? {
1534
+ enabled: langfuseConfig.enabled ?? false,
1535
+ baseUrl: langfuseConfig.baseUrl,
1536
+ environment: langfuseConfig.environment,
1537
+ }
1538
+ : undefined,
1539
+ openTelemetry: otelConfig
1540
+ ? {
1541
+ enabled: otelConfig.enabled ?? false,
1542
+ endpoint: otelConfig.endpoint,
1543
+ serviceName: otelConfig.serviceName,
1544
+ }
1545
+ : isOpenTelemetryInitialized() ||
1546
+ process.env.OTEL_EXPORTER_OTLP_ENDPOINT
1547
+ ? {
1548
+ enabled: isOpenTelemetryInitialized(),
1549
+ endpoint: process.env.OTEL_EXPORTER_OTLP_ENDPOINT,
1550
+ serviceName: process.env.OTEL_SERVICE_NAME,
1551
+ }
1552
+ : undefined,
1553
+ exporters: [],
1554
+ };
1555
+ }
1556
+ /**
1557
+ * Get aggregated observability metrics (latency, tokens, cost, success rate)
1558
+ */
1559
+ getMetrics() {
1560
+ return this.metricsAggregator.getMetrics();
1561
+ }
1562
+ /**
1563
+ * Get all recorded spans
1564
+ */
1565
+ getSpans() {
1566
+ return this.metricsAggregator.getSpans();
1567
+ }
1568
+ /**
1569
+ * Get traces (spans grouped by traceId with parent-child hierarchy)
1570
+ */
1571
+ getTraces() {
1572
+ return this.metricsAggregator.getTraces();
1573
+ }
1574
+ /**
1575
+ * Reset all collected metrics and spans
1576
+ */
1577
+ resetMetrics() {
1578
+ this.metricsAggregator.reset();
1579
+ }
1580
+ /**
1581
+ * Record a span for metrics tracking
1582
+ */
1583
+ recordMetricsSpan(span) {
1584
+ this.metricsAggregator.recordSpan(span);
1585
+ }
1586
+ /**
1587
+ * Record a memory operation span to both instance and global metrics aggregators.
1588
+ * This ensures memory spans are visible via sdk.getSpans() and getMetricsAggregator().getSpans().
1589
+ */
1590
+ recordMemorySpan(operationName, attributes, durationMs, status, statusMessage) {
1591
+ const traceCtx = this._metricsTraceContext;
1592
+ const span = SpanSerializer.createSpan(SpanType.MEMORY, operationName, attributes, traceCtx?.parentSpanId, traceCtx?.traceId);
1593
+ span.durationMs = durationMs;
1594
+ const endedSpan = SpanSerializer.endSpan(span, status);
1595
+ if (statusMessage) {
1596
+ endedSpan.statusMessage = statusMessage;
1597
+ }
1598
+ this.metricsAggregator.recordSpan(endedSpan);
1599
+ try {
1600
+ getMetricsAggregator().recordSpan(endedSpan);
1601
+ }
1602
+ catch {
1603
+ /* ignore */
1604
+ }
1605
+ }
1579
1606
  /**
1580
1607
  * Public method to initialize Langfuse observability
1581
1608
  * This method can be called externally to ensure Langfuse is properly initialized
@@ -1618,6 +1645,16 @@ Current user's request: ${currentInput}`;
1618
1645
  logger.warn("[NeuroLink] MCP servers shutdown failed:", error);
1619
1646
  }
1620
1647
  }
1648
+ // Close conversation memory manager (release Redis connections, etc.)
1649
+ if (this.conversationMemory?.close) {
1650
+ try {
1651
+ await this.conversationMemory.close();
1652
+ logger.debug("[NeuroLink] Conversation memory shutdown completed");
1653
+ }
1654
+ catch (error) {
1655
+ logger.warn("[NeuroLink] Conversation memory shutdown failed:", error);
1656
+ }
1657
+ }
1621
1658
  logger.debug("[NeuroLink] Graceful shutdown completed");
1622
1659
  }
1623
1660
  catch (error) {
@@ -1625,6 +1662,229 @@ Current user's request: ${currentInput}`;
1625
1662
  throw error;
1626
1663
  }
1627
1664
  }
1665
+ /**
1666
+ * Initialize event listeners that feed span data to MetricsAggregator.
1667
+ * Listens to generation:end, stream:complete, and tool:end events.
1668
+ */
1669
+ initializeMetricsListeners() {
1670
+ this.emitter.on("generation:end", ((...args) => {
1671
+ const data = args[0];
1672
+ try {
1673
+ const result = data.result;
1674
+ const usage = result?.usage;
1675
+ const analytics = result?.analytics;
1676
+ const provider = data.provider ||
1677
+ result?.provider ||
1678
+ "unknown";
1679
+ const model = result?.model || "unknown";
1680
+ const responseTime = data.responseTime || 0;
1681
+ const traceCtx = this._metricsTraceContext;
1682
+ let span = SpanSerializer.createGenerationSpan({
1683
+ provider,
1684
+ model,
1685
+ name: `gen_ai.${provider}.chat`,
1686
+ traceId: traceCtx?.traceId,
1687
+ input: data.prompt,
1688
+ temperature: data.temperature,
1689
+ maxTokens: data.maxTokens,
1690
+ });
1691
+ // Make this the root span by using the pre-generated rootSpanId
1692
+ if (traceCtx) {
1693
+ span.spanId = traceCtx.parentSpanId;
1694
+ span.parentSpanId = undefined;
1695
+ }
1696
+ span = SpanSerializer.endSpan(span, SpanStatus.OK);
1697
+ span.durationMs = responseTime;
1698
+ if (usage) {
1699
+ span = SpanSerializer.enrichWithTokenUsage(span, {
1700
+ promptTokens: usage.input || 0,
1701
+ completionTokens: usage.output || 0,
1702
+ totalTokens: usage.total || (usage.input || 0) + (usage.output || 0),
1703
+ });
1704
+ }
1705
+ if (analytics?.cost && analytics.cost > 0) {
1706
+ span = SpanSerializer.enrichWithCost(span, {
1707
+ totalCost: analytics.cost,
1708
+ });
1709
+ }
1710
+ else if (usage && model !== "unknown") {
1711
+ // Fallback: compute cost from token usage + built-in pricing
1712
+ const tokenTracker = this.metricsAggregator.getTokenTracker();
1713
+ const pricing = tokenTracker.getModelPricing(model);
1714
+ if (pricing) {
1715
+ const inputCost = ((usage.input || 0) / 1_000_000) * pricing.inputPricePerMillion;
1716
+ const outputCost = ((usage.output || 0) / 1_000_000) * pricing.outputPricePerMillion;
1717
+ const totalCost = inputCost + outputCost;
1718
+ if (totalCost > 0) {
1719
+ span = SpanSerializer.enrichWithCost(span, {
1720
+ inputCost,
1721
+ outputCost,
1722
+ totalCost,
1723
+ });
1724
+ }
1725
+ }
1726
+ }
1727
+ // Record output (truncated for safety)
1728
+ const content = result?.content || result?.text;
1729
+ if (content) {
1730
+ span = SpanSerializer.updateAttributes(span, {
1731
+ output: content.length > 5000
1732
+ ? content.substring(0, 5000) + "...[truncated]"
1733
+ : content,
1734
+ });
1735
+ }
1736
+ this.metricsAggregator.recordSpan(span);
1737
+ getMetricsAggregator().recordSpan(span);
1738
+ }
1739
+ catch {
1740
+ // Non-blocking
1741
+ }
1742
+ }));
1743
+ this.emitter.on("stream:complete", ((...args) => {
1744
+ const data = args[0];
1745
+ try {
1746
+ const metadata = data.metadata;
1747
+ const durationMs = metadata?.durationMs || 0;
1748
+ const chunkCount = metadata?.chunkCount || 0;
1749
+ const totalLength = metadata?.totalLength || 0;
1750
+ const provider = data.provider || "unknown";
1751
+ const model = data.model || "unknown";
1752
+ const traceCtx = this._metricsTraceContext;
1753
+ let span = SpanSerializer.createGenerationSpan({
1754
+ provider,
1755
+ model,
1756
+ name: `gen_ai.${provider}.stream`,
1757
+ traceId: traceCtx?.traceId,
1758
+ });
1759
+ // Make this the root span by using the pre-generated rootSpanId
1760
+ if (traceCtx) {
1761
+ span.spanId = traceCtx.parentSpanId;
1762
+ span.parentSpanId = undefined;
1763
+ }
1764
+ span = SpanSerializer.endSpan(span, SpanStatus.OK);
1765
+ span.durationMs = durationMs;
1766
+ span.attributes["stream.chunk_count"] = chunkCount;
1767
+ span.attributes["stream.content_length"] = totalLength;
1768
+ // Record stream input prompt
1769
+ if (data.prompt) {
1770
+ const promptStr = String(data.prompt);
1771
+ span = SpanSerializer.updateAttributes(span, {
1772
+ input: promptStr.length > 5000
1773
+ ? promptStr.substring(0, 5000) + "...[truncated]"
1774
+ : promptStr,
1775
+ });
1776
+ }
1777
+ // Record streamed output (truncated for safety)
1778
+ const streamContent = data.content;
1779
+ if (streamContent) {
1780
+ span = SpanSerializer.updateAttributes(span, {
1781
+ output: streamContent.length > 5000
1782
+ ? streamContent.substring(0, 5000) + "...[truncated]"
1783
+ : streamContent,
1784
+ });
1785
+ }
1786
+ // Enrich stream span with token usage if available
1787
+ const usage = metadata?.usage;
1788
+ if (usage) {
1789
+ span = SpanSerializer.enrichWithTokenUsage(span, {
1790
+ promptTokens: usage.input || 0,
1791
+ completionTokens: usage.output || 0,
1792
+ totalTokens: usage.total || (usage.input || 0) + (usage.output || 0),
1793
+ });
1794
+ // Compute cost from token usage
1795
+ if (model !== "unknown") {
1796
+ const tokenTracker = this.metricsAggregator.getTokenTracker();
1797
+ const pricing = tokenTracker.getModelPricing(model);
1798
+ if (pricing) {
1799
+ const inputCost = ((usage.input || 0) / 1_000_000) * pricing.inputPricePerMillion;
1800
+ const outputCost = ((usage.output || 0) / 1_000_000) *
1801
+ pricing.outputPricePerMillion;
1802
+ const totalCost = inputCost + outputCost;
1803
+ if (totalCost > 0) {
1804
+ span = SpanSerializer.enrichWithCost(span, {
1805
+ inputCost,
1806
+ outputCost,
1807
+ totalCost,
1808
+ });
1809
+ }
1810
+ }
1811
+ }
1812
+ }
1813
+ this.metricsAggregator.recordSpan(span);
1814
+ getMetricsAggregator().recordSpan(span);
1815
+ }
1816
+ catch {
1817
+ // Non-blocking
1818
+ }
1819
+ }));
1820
+ this.emitter.on("tool:end", ((...args) => {
1821
+ const data = args[0];
1822
+ try {
1823
+ // Handle both event formats: {toolName} (from emitToolEnd) and {tool} (from executeToolInternal)
1824
+ const toolName = data.toolName || data.tool || "unknown";
1825
+ const responseTime = data.responseTime || data.duration || 0;
1826
+ // success is explicit in one format; infer from error presence in the other
1827
+ const success = data.success !== undefined ? data.success : !data.error;
1828
+ const traceCtx = this._metricsTraceContext;
1829
+ let span = SpanSerializer.createSpan(SpanType.TOOL_CALL, `tool.${toolName}`, {
1830
+ "tool.name": toolName,
1831
+ "tool.success": success,
1832
+ }, traceCtx?.parentSpanId, traceCtx?.traceId);
1833
+ span = SpanSerializer.endSpan(span, success ? SpanStatus.OK : SpanStatus.ERROR);
1834
+ span.durationMs = responseTime;
1835
+ if (!success && data.error) {
1836
+ span.statusMessage =
1837
+ data.error.message || String(data.error);
1838
+ }
1839
+ if (data.result) {
1840
+ try {
1841
+ span.attributes["tool.result"] = JSON.stringify(data.result).substring(0, 500);
1842
+ }
1843
+ catch {
1844
+ // Non-blocking
1845
+ }
1846
+ }
1847
+ this.metricsAggregator.recordSpan(span);
1848
+ getMetricsAggregator().recordSpan(span);
1849
+ }
1850
+ catch {
1851
+ // Non-blocking
1852
+ }
1853
+ }));
1854
+ this.emitter.on("stream:error", ((...args) => {
1855
+ const data = args[0];
1856
+ try {
1857
+ const metadata = data.metadata;
1858
+ const durationMs = metadata?.durationMs || 0;
1859
+ const chunkCount = metadata?.chunkCount || 0;
1860
+ const errorName = metadata?.errorName || "UnknownError";
1861
+ const errorMessage = data.content || "Stream error";
1862
+ const provider = data.provider || "unknown";
1863
+ const model = data.model || "unknown";
1864
+ const traceCtx = this._metricsTraceContext;
1865
+ let span = SpanSerializer.createGenerationSpan({
1866
+ provider,
1867
+ model,
1868
+ name: `gen_ai.${provider}.stream.error`,
1869
+ traceId: traceCtx?.traceId,
1870
+ });
1871
+ // Make this the root span
1872
+ if (traceCtx) {
1873
+ span.spanId = traceCtx.parentSpanId;
1874
+ span.parentSpanId = undefined;
1875
+ }
1876
+ span = SpanSerializer.endSpan(span, SpanStatus.ERROR);
1877
+ span.durationMs = durationMs;
1878
+ span.statusMessage = `${errorName}: ${errorMessage}`;
1879
+ span.attributes["stream.chunk_count"] = chunkCount;
1880
+ this.metricsAggregator.recordSpan(span);
1881
+ getMetricsAggregator().recordSpan(span);
1882
+ }
1883
+ catch {
1884
+ // Non-blocking
1885
+ }
1886
+ }));
1887
+ }
1628
1888
  /**
1629
1889
  * Generate AI response with comprehensive feature support.
1630
1890
  *
@@ -1727,320 +1987,329 @@ Current user's request: ${currentInput}`;
1727
1987
  */
1728
1988
  async generate(optionsOrPrompt) {
1729
1989
  return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, async (generateSpan) => {
1730
- try {
1731
- const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
1732
- // Convert string prompt to full options
1733
- const options = typeof optionsOrPrompt === "string"
1734
- ? { input: { text: optionsOrPrompt } }
1735
- : optionsOrPrompt;
1736
- // Set span attributes for observability
1737
- generateSpan.setAttribute("neurolink.provider", options.provider || "default");
1738
- generateSpan.setAttribute("neurolink.model", options.model || "default");
1739
- generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
1740
- ? optionsOrPrompt.length
1741
- : options.input?.text?.length || 0);
1742
- generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
1743
- // Validate prompt
1744
- if (!options.input?.text || typeof options.input.text !== "string") {
1745
- throw new Error("Input text is required and must be a non-empty string");
1746
- }
1747
- // Check budget limit before making API call
1748
- if (options.maxBudgetUsd !== undefined &&
1749
- options.maxBudgetUsd > 0 &&
1750
- this._sessionCostUsd >= options.maxBudgetUsd) {
1751
- throw new NeuroLinkError({
1752
- code: "SESSION_BUDGET_EXCEEDED",
1753
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
1754
- category: ErrorCategory.VALIDATION,
1755
- severity: ErrorSeverity.HIGH,
1756
- retriable: false,
1757
- context: {
1758
- spent: this._sessionCostUsd,
1759
- limit: options.maxBudgetUsd,
1760
- },
1761
- });
1762
- }
1763
- // Check if workflow is requested
1764
- if (options.workflow || options.workflowConfig) {
1765
- return await this.generateWithWorkflow(options);
1766
- }
1767
- // Check if PPT output mode is requested
1768
- if (options.output?.mode === "ppt") {
1769
- const pptResult = await this.generateWithPPT(options);
1770
- generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
1771
- if (pptResult.analytics) {
1772
- generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
1773
- generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
1774
- generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
1990
+ // Set metrics trace context for parent-child span linking.
1991
+ // The generation span will be the root (no parentSpanId).
1992
+ // Tool spans will be children of the root span via rootSpanId.
1993
+ const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
1994
+ const metricsRootSpanId = crypto
1995
+ .randomUUID()
1996
+ .replace(/-/g, "")
1997
+ .substring(0, 16);
1998
+ // Scope trace context to this request via AsyncLocalStorage
1999
+ // so concurrent generate/stream calls don't race.
2000
+ return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsRootSpanId }, async () => {
2001
+ try {
2002
+ const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
2003
+ // Convert string prompt to full options
2004
+ const options = typeof optionsOrPrompt === "string"
2005
+ ? { input: { text: optionsOrPrompt } }
2006
+ : optionsOrPrompt;
2007
+ // Set span attributes for observability
2008
+ generateSpan.setAttribute("neurolink.provider", options.provider || "default");
2009
+ generateSpan.setAttribute("neurolink.model", options.model || "default");
2010
+ generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
2011
+ ? optionsOrPrompt.length
2012
+ : options.input?.text?.length || 0);
2013
+ generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
2014
+ // Validate prompt
2015
+ if (!options.input?.text ||
2016
+ typeof options.input.text !== "string") {
2017
+ throw new Error("Input text is required and must be a non-empty string");
1775
2018
  }
1776
- generateSpan.setStatus({ code: SpanStatusCode.OK });
1777
- return pptResult;
1778
- }
1779
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
1780
- return await this.setLangfuseContextFromOptions(options, async () => {
1781
- if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1782
- options.context?.userId) {
1783
- try {
1784
- const mem0 = await this.ensureMem0Ready();
1785
- if (!mem0) {
1786
- logger.debug("Mem0 not available, continuing without memory retrieval");
1787
- }
1788
- else {
1789
- const memories = await mem0.search(options.input.text, {
1790
- user_id: options.context.userId,
1791
- limit: 5,
1792
- });
1793
- if (memories && memories.length > 0) {
1794
- // Enhance the input with memory context
1795
- const memoryContext = this.extractMemoryContext(memories);
1796
- options.input.text = this.formatMemoryContext(memoryContext, options.input.text);
1797
- }
1798
- }
1799
- }
1800
- catch (error) {
1801
- logger.warn("Mem0 memory retrieval failed:", error);
1802
- }
2019
+ // Check budget limit before making API call
2020
+ if (options.maxBudgetUsd !== undefined &&
2021
+ options.maxBudgetUsd > 0 &&
2022
+ this._sessionCostUsd >= options.maxBudgetUsd) {
2023
+ throw new NeuroLinkError({
2024
+ code: "SESSION_BUDGET_EXCEEDED",
2025
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
2026
+ category: ErrorCategory.VALIDATION,
2027
+ severity: ErrorSeverity.HIGH,
2028
+ retriable: false,
2029
+ context: {
2030
+ spent: this._sessionCostUsd,
2031
+ limit: options.maxBudgetUsd,
2032
+ },
2033
+ });
1803
2034
  }
1804
- // Memory retrieval
1805
- if (this.conversationMemoryConfig?.conversationMemory?.memory
1806
- ?.enabled &&
1807
- options.context?.userId) {
1808
- try {
1809
- options.input.text = await this.retrieveMemory(options.input.text, options.context.userId);
1810
- logger.debug("Memory retrieval successful");
1811
- }
1812
- catch (error) {
1813
- logger.warn("Memory retrieval failed:", error);
2035
+ // Check if workflow is requested
2036
+ if (options.workflow || options.workflowConfig) {
2037
+ return await this.generateWithWorkflow(options);
2038
+ }
2039
+ // Check if PPT output mode is requested
2040
+ if (options.output?.mode === "ppt") {
2041
+ const pptResult = await this.generateWithPPT(options);
2042
+ generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
2043
+ if (pptResult.analytics) {
2044
+ generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
2045
+ generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
2046
+ generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
1814
2047
  }
2048
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2049
+ return pptResult;
1815
2050
  }
1816
- const startTime = Date.now();
1817
- // Apply orchestration if enabled and no specific provider/model requested
1818
- if (this.enableOrchestration &&
1819
- !options.provider &&
1820
- !options.model) {
1821
- try {
1822
- const orchestratedOptions = await this.applyOrchestration(options);
1823
- logger.debug("Orchestration applied", {
1824
- originalProvider: options.provider || "auto",
1825
- orchestratedProvider: orchestratedOptions.provider,
1826
- orchestratedModel: orchestratedOptions.model,
1827
- prompt: options.input.text.substring(0, 100),
1828
- });
1829
- // Use orchestrated options
1830
- Object.assign(options, orchestratedOptions);
2051
+ // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
2052
+ return await this.setLangfuseContextFromOptions(options, async () => {
2053
+ if (this.conversationMemoryConfig?.conversationMemory
2054
+ ?.mem0Enabled &&
2055
+ options.context?.userId) {
2056
+ try {
2057
+ const mem0 = await this.ensureMem0Ready();
2058
+ if (!mem0) {
2059
+ logger.debug("Mem0 not available, continuing without memory retrieval");
2060
+ }
2061
+ else {
2062
+ const memories = await mem0.search(options.input.text, {
2063
+ user_id: options.context.userId,
2064
+ limit: 5,
2065
+ });
2066
+ if (memories && memories.length > 0) {
2067
+ // Enhance the input with memory context
2068
+ const memoryContext = this.extractMemoryContext(memories);
2069
+ options.input.text = this.formatMemoryContext(memoryContext, options.input.text);
2070
+ }
2071
+ }
2072
+ }
2073
+ catch (error) {
2074
+ logger.warn("Mem0 memory retrieval failed:", error);
2075
+ }
1831
2076
  }
1832
- catch (error) {
1833
- logger.warn("Orchestration failed, continuing with original options", {
1834
- error: error instanceof Error ? error.message : String(error),
1835
- originalProvider: options.provider || "auto",
1836
- });
1837
- // Continue with original options if orchestration fails
2077
+ const startTime = Date.now();
2078
+ // Apply orchestration if enabled and no specific provider/model requested
2079
+ if (this.enableOrchestration &&
2080
+ !options.provider &&
2081
+ !options.model) {
2082
+ try {
2083
+ const orchestratedOptions = await this.applyOrchestration(options);
2084
+ logger.debug("Orchestration applied", {
2085
+ originalProvider: options.provider || "auto",
2086
+ orchestratedProvider: orchestratedOptions.provider,
2087
+ orchestratedModel: orchestratedOptions.model,
2088
+ prompt: options.input.text.substring(0, 100),
2089
+ });
2090
+ // Use orchestrated options
2091
+ Object.assign(options, orchestratedOptions);
2092
+ }
2093
+ catch (error) {
2094
+ logger.warn("Orchestration failed, continuing with original options", {
2095
+ error: error instanceof Error
2096
+ ? error.message
2097
+ : String(error),
2098
+ originalProvider: options.provider || "auto",
2099
+ });
2100
+ // Continue with original options if orchestration fails
2101
+ }
1838
2102
  }
1839
- }
1840
- // Emit generation start event (NeuroLink format - keep existing)
1841
- this.emitter.emit("generation:start", {
1842
- provider: options.provider || "auto",
1843
- timestamp: startTime,
1844
- });
1845
- // ADD: Bedrock-compatible response:start event
1846
- this.emitter.emit("response:start");
1847
- // ADD: Bedrock-compatible message event
1848
- this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
1849
- // Process factory configuration
1850
- const factoryResult = processFactoryOptions(options);
1851
- // Validate factory configuration if present
1852
- if (factoryResult.hasFactoryConfig && options.factoryConfig) {
1853
- const validation = validateFactoryConfig(options.factoryConfig);
1854
- if (!validation.isValid) {
1855
- logger.warn("Invalid factory configuration detected", {
1856
- errors: validation.errors,
1857
- });
1858
- // Continue with warning rather than throwing - graceful degradation
2103
+ // Emit generation start event (NeuroLink format - keep existing)
2104
+ this.emitter.emit("generation:start", {
2105
+ provider: options.provider || "auto",
2106
+ timestamp: startTime,
2107
+ });
2108
+ // ADD: Bedrock-compatible response:start event
2109
+ this.emitter.emit("response:start");
2110
+ // ADD: Bedrock-compatible message event
2111
+ this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
2112
+ // Process factory configuration
2113
+ const factoryResult = processFactoryOptions(options);
2114
+ // Validate factory configuration if present
2115
+ if (factoryResult.hasFactoryConfig && options.factoryConfig) {
2116
+ const validation = validateFactoryConfig(options.factoryConfig);
2117
+ if (!validation.isValid) {
2118
+ logger.warn("Invalid factory configuration detected", {
2119
+ errors: validation.errors,
2120
+ });
2121
+ // Continue with warning rather than throwing - graceful degradation
2122
+ }
1859
2123
  }
1860
- }
1861
- // RAG Integration: If rag config is provided, prepare the RAG search tool
1862
- if (options.rag?.files?.length) {
1863
- try {
1864
- const { prepareRAGTool } = await import("./rag/ragIntegration.js");
1865
- const ragResult = await prepareRAGTool(options.rag, options.provider);
1866
- // Inject the RAG tool into the tools record
1867
- if (!options.tools) {
1868
- options.tools = {};
2124
+ // RAG Integration: If rag config is provided, prepare the RAG search tool
2125
+ if (options.rag?.files?.length) {
2126
+ try {
2127
+ const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2128
+ const ragResult = await prepareRAGTool(options.rag, options.provider);
2129
+ // Inject the RAG tool into the tools record
2130
+ if (!options.tools) {
2131
+ options.tools = {};
2132
+ }
2133
+ options.tools[ragResult.toolName] = ragResult.tool;
2134
+ // Inject RAG-aware system prompt so the AI uses the RAG tool first
2135
+ const ragSystemInstruction = [
2136
+ `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2137
+ `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2138
+ `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2139
+ `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2140
+ `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2141
+ ].join(" ");
2142
+ options.systemPrompt =
2143
+ (options.systemPrompt || "") + ragSystemInstruction;
2144
+ logger.info("[RAG] Tool injected into generate()", {
2145
+ toolName: ragResult.toolName,
2146
+ filesLoaded: ragResult.filesLoaded,
2147
+ chunksIndexed: ragResult.chunksIndexed,
2148
+ });
2149
+ }
2150
+ catch (error) {
2151
+ logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2152
+ error: error instanceof Error
2153
+ ? error.message
2154
+ : String(error),
2155
+ });
1869
2156
  }
1870
- options.tools[ragResult.toolName] =
1871
- ragResult.tool;
1872
- // Inject RAG-aware system prompt so the AI uses the RAG tool first
1873
- const ragSystemInstruction = [
1874
- `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
1875
- `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
1876
- `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
1877
- `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
1878
- `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
1879
- ].join(" ");
1880
- options.systemPrompt =
1881
- (options.systemPrompt || "") + ragSystemInstruction;
1882
- logger.info("[RAG] Tool injected into generate()", {
1883
- toolName: ragResult.toolName,
1884
- filesLoaded: ragResult.filesLoaded,
1885
- chunksIndexed: ragResult.chunksIndexed,
1886
- });
1887
2157
  }
1888
- catch (error) {
1889
- logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
1890
- error: error instanceof Error ? error.message : String(error),
2158
+ // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
2159
+ const baseOptions = {
2160
+ prompt: options.input.text,
2161
+ provider: options.provider,
2162
+ model: options.model,
2163
+ temperature: options.temperature,
2164
+ maxTokens: options.maxTokens,
2165
+ systemPrompt: options.systemPrompt,
2166
+ schema: options.schema,
2167
+ output: options.output,
2168
+ tools: options.tools, // Includes RAG tools if rag config was provided
2169
+ disableTools: options.disableTools,
2170
+ toolFilter: options.toolFilter,
2171
+ excludeTools: options.excludeTools,
2172
+ maxSteps: options.maxSteps,
2173
+ toolChoice: options.toolChoice,
2174
+ prepareStep: options.prepareStep,
2175
+ enableAnalytics: options.enableAnalytics,
2176
+ enableEvaluation: options.enableEvaluation,
2177
+ context: options.context,
2178
+ evaluationDomain: options.evaluationDomain,
2179
+ toolUsageContext: options.toolUsageContext,
2180
+ input: options.input, // This includes text, images, and content arrays
2181
+ region: options.region,
2182
+ tts: options.tts,
2183
+ fileRegistry: this.fileRegistry,
2184
+ abortSignal: options.abortSignal,
2185
+ skipToolPromptInjection: options.skipToolPromptInjection,
2186
+ };
2187
+ // Auto-map top-level sessionId/userId to context for convenience
2188
+ // Tests and users may pass sessionId/userId as top-level options
2189
+ const extraContext = options;
2190
+ if (extraContext.sessionId || extraContext.userId) {
2191
+ baseOptions.context = {
2192
+ ...baseOptions.context,
2193
+ ...(extraContext.sessionId &&
2194
+ !baseOptions.context?.sessionId
2195
+ ? { sessionId: extraContext.sessionId }
2196
+ : {}),
2197
+ ...(extraContext.userId && !baseOptions.context?.userId
2198
+ ? { userId: extraContext.userId }
2199
+ : {}),
2200
+ };
2201
+ }
2202
+ // Apply factory enhancement using centralized utilities
2203
+ const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
2204
+ // Pass conversation memory config if available
2205
+ if (this.conversationMemory) {
2206
+ textOptions.conversationMemoryConfig =
2207
+ this.conversationMemory.config;
2208
+ // Include original prompt for context summarization
2209
+ textOptions.originalPrompt = originalPrompt;
2210
+ }
2211
+ // Detect and execute domain-specific tools
2212
+ const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
2213
+ // Update prompt with tool results if available
2214
+ if (enhancedPrompt !== textOptions.prompt) {
2215
+ textOptions.prompt = enhancedPrompt;
2216
+ logger.debug("Enhanced prompt with tool results", {
2217
+ originalLength: options.input.text.length,
2218
+ enhancedLength: enhancedPrompt.length,
2219
+ toolResults: toolResults.length,
1891
2220
  });
1892
2221
  }
1893
- }
1894
- // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
1895
- const baseOptions = {
1896
- prompt: options.input.text,
1897
- provider: options.provider,
1898
- model: options.model,
1899
- temperature: options.temperature,
1900
- maxTokens: options.maxTokens,
1901
- systemPrompt: options.systemPrompt,
1902
- schema: options.schema,
1903
- output: options.output,
1904
- tools: options.tools, // Includes RAG tools if rag config was provided
1905
- disableTools: options.disableTools,
1906
- toolFilter: options.toolFilter,
1907
- excludeTools: options.excludeTools,
1908
- maxSteps: options.maxSteps,
1909
- toolChoice: options.toolChoice,
1910
- prepareStep: options.prepareStep,
1911
- enableAnalytics: options.enableAnalytics,
1912
- enableEvaluation: options.enableEvaluation,
1913
- context: options.context,
1914
- evaluationDomain: options.evaluationDomain,
1915
- toolUsageContext: options.toolUsageContext,
1916
- input: options.input, // This includes text, images, and content arrays
1917
- region: options.region,
1918
- tts: options.tts,
1919
- fileRegistry: this.fileRegistry,
1920
- abortSignal: options.abortSignal,
1921
- skipToolPromptInjection: options.skipToolPromptInjection,
1922
- };
1923
- // Auto-map top-level sessionId/userId to context for convenience
1924
- // Tests and users may pass sessionId/userId as top-level options
1925
- const extraContext = options;
1926
- if (extraContext.sessionId || extraContext.userId) {
1927
- baseOptions.context = {
1928
- ...baseOptions.context,
1929
- ...(extraContext.sessionId && !baseOptions.context?.sessionId
1930
- ? { sessionId: extraContext.sessionId }
1931
- : {}),
1932
- ...(extraContext.userId && !baseOptions.context?.userId
1933
- ? { userId: extraContext.userId }
1934
- : {}),
1935
- };
1936
- }
1937
- // Apply factory enhancement using centralized utilities
1938
- const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
1939
- // Pass conversation memory config if available
1940
- if (this.conversationMemory) {
1941
- textOptions.conversationMemoryConfig =
1942
- this.conversationMemory.config;
1943
- // Include original prompt for context summarization
1944
- textOptions.originalPrompt = originalPrompt;
1945
- }
1946
- // Detect and execute domain-specific tools
1947
- const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
1948
- // Update prompt with tool results if available
1949
- if (enhancedPrompt !== textOptions.prompt) {
1950
- textOptions.prompt = enhancedPrompt;
1951
- logger.debug("Enhanced prompt with tool results", {
1952
- originalLength: options.input.text.length,
1953
- enhancedLength: enhancedPrompt.length,
1954
- toolResults: toolResults.length,
2222
+ // Use redesigned generation logic
2223
+ const textResult = await this.generateTextInternal(textOptions);
2224
+ // Emit generation completion event (NeuroLink format - enhanced with content)
2225
+ this.emitter.emit("generation:end", {
2226
+ provider: textResult.provider,
2227
+ responseTime: Date.now() - startTime,
2228
+ toolsUsed: textResult.toolsUsed,
2229
+ timestamp: Date.now(),
2230
+ result: textResult, // Enhanced: include full result
2231
+ prompt: options.input?.text ||
2232
+ options.prompt,
2233
+ temperature: textOptions.temperature,
2234
+ maxTokens: textOptions.maxTokens,
1955
2235
  });
1956
- }
1957
- // Use redesigned generation logic
1958
- const textResult = await this.generateTextInternal(textOptions);
1959
- // Emit generation completion event (NeuroLink format - enhanced with content)
1960
- this.emitter.emit("generation:end", {
1961
- provider: textResult.provider,
1962
- responseTime: Date.now() - startTime,
1963
- toolsUsed: textResult.toolsUsed,
1964
- timestamp: Date.now(),
1965
- result: textResult, // Enhanced: include full result
2236
+ // ADD: Bedrock-compatible response:end event with content
2237
+ this.emitter.emit("response:end", textResult.content || "");
2238
+ // ADD: Bedrock-compatible message event
2239
+ this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
2240
+ // Convert back to GenerateResult
2241
+ const generateResult = {
2242
+ content: textResult.content,
2243
+ finishReason: textResult.finishReason,
2244
+ provider: textResult.provider,
2245
+ model: textResult.model,
2246
+ usage: textResult.usage
2247
+ ? {
2248
+ input: textResult.usage.input || 0,
2249
+ output: textResult.usage.output || 0,
2250
+ total: textResult.usage.total || 0,
2251
+ }
2252
+ : undefined,
2253
+ responseTime: textResult.responseTime,
2254
+ toolsUsed: textResult.toolsUsed,
2255
+ toolExecutions: transformToolExecutions(textResult.toolExecutions),
2256
+ enhancedWithTools: textResult.enhancedWithTools,
2257
+ availableTools: transformAvailableTools(textResult.availableTools),
2258
+ analytics: textResult.analytics,
2259
+ // CRITICAL FIX: Include imageOutput for image generation models
2260
+ imageOutput: textResult.imageOutput,
2261
+ evaluation: textResult.evaluation
2262
+ ? {
2263
+ ...textResult.evaluation,
2264
+ isOffTopic: textResult.evaluation
2265
+ .isOffTopic ?? false,
2266
+ alertSeverity: textResult.evaluation
2267
+ .alertSeverity ?? "none",
2268
+ reasoning: textResult.evaluation
2269
+ .reasoning ?? "No evaluation provided",
2270
+ evaluationModel: textResult.evaluation
2271
+ .evaluationModel ?? "unknown",
2272
+ evaluationTime: textResult.evaluation
2273
+ .evaluationTime ?? Date.now(),
2274
+ // Include evaluationDomain from original options
2275
+ evaluationDomain: textResult.evaluation
2276
+ .evaluationDomain ??
2277
+ textOptions.evaluationDomain ??
2278
+ factoryResult.domainType,
2279
+ }
2280
+ : undefined,
2281
+ audio: textResult.audio,
2282
+ video: textResult.video,
2283
+ ppt: textResult.ppt,
2284
+ };
2285
+ // Accumulate session cost for budget tracking
2286
+ if (generateResult.analytics?.cost &&
2287
+ generateResult.analytics.cost > 0) {
2288
+ this._sessionCostUsd += generateResult.analytics.cost;
2289
+ }
2290
+ this.scheduleGenerateMem0Storage(options, originalPrompt, generateResult);
2291
+ // Set completion span attributes
2292
+ generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2293
+ generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2294
+ generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2295
+ generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2296
+ generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2297
+ generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2298
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2299
+ return generateResult;
1966
2300
  });
1967
- // ADD: Bedrock-compatible response:end event with content
1968
- this.emitter.emit("response:end", textResult.content || "");
1969
- // ADD: Bedrock-compatible message event
1970
- this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
1971
- // Convert back to GenerateResult
1972
- const generateResult = {
1973
- content: textResult.content,
1974
- finishReason: textResult.finishReason,
1975
- provider: textResult.provider,
1976
- model: textResult.model,
1977
- usage: textResult.usage
1978
- ? {
1979
- input: textResult.usage.input || 0,
1980
- output: textResult.usage.output || 0,
1981
- total: textResult.usage.total || 0,
1982
- }
1983
- : undefined,
1984
- responseTime: textResult.responseTime,
1985
- toolsUsed: textResult.toolsUsed,
1986
- toolExecutions: transformToolExecutions(textResult.toolExecutions),
1987
- enhancedWithTools: textResult.enhancedWithTools,
1988
- availableTools: transformAvailableTools(textResult.availableTools),
1989
- analytics: textResult.analytics,
1990
- // CRITICAL FIX: Include imageOutput for image generation models
1991
- imageOutput: textResult.imageOutput,
1992
- evaluation: textResult.evaluation
1993
- ? {
1994
- ...textResult.evaluation,
1995
- isOffTopic: textResult.evaluation
1996
- .isOffTopic ?? false,
1997
- alertSeverity: textResult.evaluation
1998
- .alertSeverity ??
1999
- "none",
2000
- reasoning: textResult.evaluation
2001
- .reasoning ?? "No evaluation provided",
2002
- evaluationModel: textResult.evaluation
2003
- .evaluationModel ?? "unknown",
2004
- evaluationTime: textResult.evaluation
2005
- .evaluationTime ?? Date.now(),
2006
- // Include evaluationDomain from original options
2007
- evaluationDomain: textResult.evaluation
2008
- .evaluationDomain ??
2009
- textOptions.evaluationDomain ??
2010
- factoryResult.domainType,
2011
- }
2012
- : undefined,
2013
- audio: textResult.audio,
2014
- video: textResult.video,
2015
- ppt: textResult.ppt,
2016
- };
2017
- // Accumulate session cost for budget tracking
2018
- if (generateResult.analytics?.cost &&
2019
- generateResult.analytics.cost > 0) {
2020
- this._sessionCostUsd += generateResult.analytics.cost;
2021
- }
2022
- this.scheduleGenerateMem0Storage(options, originalPrompt, generateResult);
2023
- // Set completion span attributes
2024
- generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2025
- generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2026
- generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2027
- generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2028
- generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2029
- generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2030
- generateSpan.setStatus({ code: SpanStatusCode.OK });
2031
- return generateResult;
2032
- });
2033
- }
2034
- catch (error) {
2035
- generateSpan.setStatus({
2036
- code: SpanStatusCode.ERROR,
2037
- message: error instanceof Error ? error.message : String(error),
2038
- });
2039
- throw error;
2040
- }
2041
- finally {
2042
- generateSpan.end();
2043
- }
2301
+ }
2302
+ catch (error) {
2303
+ generateSpan.setStatus({
2304
+ code: SpanStatusCode.ERROR,
2305
+ message: error instanceof Error ? error.message : String(error),
2306
+ });
2307
+ throw error;
2308
+ }
2309
+ finally {
2310
+ generateSpan.end();
2311
+ }
2312
+ }); // end metricsTraceContextStorage.run
2044
2313
  });
2045
2314
  }
2046
2315
  /**
@@ -2409,7 +2678,16 @@ Current user's request: ${currentInput}`;
2409
2678
  cacheSavingsPercent: mcpResult.usage.cacheSavingsPercent,
2410
2679
  }),
2411
2680
  });
2412
- await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
2681
+ {
2682
+ const memStoreStart = Date.now();
2683
+ try {
2684
+ await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
2685
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.OK);
2686
+ }
2687
+ catch (memErr) {
2688
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
2689
+ }
2690
+ }
2413
2691
  this.emitter.emit("response:end", mcpResult.content || "");
2414
2692
  internalSpan.setAttribute("neurolink.path", "mcp");
2415
2693
  internalSpan.setAttribute("neurolink.tokens.input", mcpResult.usage?.input || 0);
@@ -2448,7 +2726,16 @@ Current user's request: ${currentInput}`;
2448
2726
  cacheSavingsPercent: directResult.usage.cacheSavingsPercent,
2449
2727
  }),
2450
2728
  });
2451
- await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
2729
+ {
2730
+ const memStoreStart = Date.now();
2731
+ try {
2732
+ await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
2733
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.OK);
2734
+ }
2735
+ catch (memErr) {
2736
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
2737
+ }
2738
+ }
2452
2739
  this.emitter.emit("response:end", directResult.content || "");
2453
2740
  this.emitter.emit("message", `Text generation completed successfully`);
2454
2741
  internalSpan.setAttribute("neurolink.path", "direct");
@@ -2975,6 +3262,8 @@ Current user's request: ${currentInput}`;
2975
3262
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
2976
3263
  this, // Pass SDK instance
2977
3264
  options.region);
3265
+ // Propagate trace context for parent-child span hierarchy
3266
+ provider._traceContext = this._metricsTraceContext;
2978
3267
  // ADD: Emit connection events for all providers (Bedrock-compatible)
2979
3268
  this.emitter.emit("connected");
2980
3269
  this.emitter.emit("message", `${providerName} provider initialized successfully`);
@@ -3039,6 +3328,7 @@ Current user's request: ${currentInput}`;
3039
3328
  audio: result.audio,
3040
3329
  video: result.video,
3041
3330
  ppt: result.ppt,
3331
+ imageOutput: result.imageOutput,
3042
3332
  // Include analytics and evaluation from BaseProvider
3043
3333
  analytics: result.analytics,
3044
3334
  evaluation: result.evaluation,
@@ -3189,6 +3479,8 @@ Current user's request: ${currentInput}`;
3189
3479
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
3190
3480
  this, // Pass SDK instance
3191
3481
  options.region);
3482
+ // Propagate trace context for parent-child span hierarchy
3483
+ provider._traceContext = this._metricsTraceContext;
3192
3484
  // ADD: Emit connection events for successful provider creation (Bedrock-compatible)
3193
3485
  this.emitter.emit("connected");
3194
3486
  this.emitter.emit("message", `${providerName} provider initialized successfully`);
@@ -3422,206 +3714,276 @@ Current user's request: ${currentInput}`;
3422
3714
  * @throws {Error} When conversation memory operations fail (if enabled)
3423
3715
  */
3424
3716
  async stream(options) {
3425
- // Manual span lifecycle: the span must stay open until the stream is fully consumed,
3426
- // NOT when the StreamResult object is returned. withSpan would end the span too early
3427
- // because streaming results resolve lazily via the async generator.
3428
- const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
3429
- kind: SpanKind.INTERNAL,
3430
- attributes: {
3431
- [ATTR.NL_PROVIDER]: options.provider || "default",
3432
- [ATTR.GEN_AI_MODEL]: options.model || "default",
3433
- [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
3434
- [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
3435
- [ATTR.NL_STREAM_MODE]: true,
3436
- },
3437
- });
3438
- const spanStartTime = Date.now();
3439
- try {
3440
- const startTime = Date.now();
3441
- const hrTimeStart = process.hrtime.bigint();
3442
- const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3443
- const originalPrompt = options.input.text; // Store the original prompt for memory storage
3444
- // Inject file registry for lazy on-demand file processing
3445
- options.fileRegistry = this.fileRegistry;
3446
- await this.validateStreamInput(options);
3447
- // Check budget limit before making API call
3448
- if (options.maxBudgetUsd !== undefined &&
3449
- options.maxBudgetUsd > 0 &&
3450
- this._sessionCostUsd >= options.maxBudgetUsd) {
3451
- throw new NeuroLinkError({
3452
- code: "SESSION_BUDGET_EXCEEDED",
3453
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
3454
- category: ErrorCategory.VALIDATION,
3455
- severity: ErrorSeverity.HIGH,
3456
- retriable: false,
3457
- context: {
3458
- spent: this._sessionCostUsd,
3459
- limit: options.maxBudgetUsd,
3460
- },
3461
- });
3462
- }
3463
- this.emitStreamStartEvents(options, startTime);
3464
- // Check if workflow is requested
3465
- if (options.workflow || options.workflowConfig) {
3466
- const result = await this.streamWithWorkflow(options, startTime);
3467
- streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
3468
- streamSpan.setStatus({ code: SpanStatusCode.OK });
3469
- streamSpan.end();
3470
- return result;
3471
- }
3472
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
3473
- return await this.setLangfuseContextFromOptions(options, async () => {
3474
- try {
3475
- // Prepare options: init memory, MCP, Mem0, orchestration, Ollama auto-disable, tool detection
3476
- const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
3477
- const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
3478
- // Update span with resolved provider name
3479
- streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
3480
- let accumulatedContent = "";
3481
- let chunkCount = 0;
3482
- // Set up event capture listeners
3483
- const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
3484
- const metadata = {
3485
- fallbackAttempted: false,
3486
- guardrailsBlocked: false,
3487
- error: undefined,
3488
- };
3489
- const self = this;
3490
- const streamStartTime = Date.now();
3491
- const sessionId = enhancedOptions.context
3492
- ?.sessionId;
3493
- const processedStream = (async function* () {
3494
- let streamError = undefined;
3717
+ // Set metrics trace context for parent-child span linking
3718
+ const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
3719
+ const metricsParentSpanId = crypto
3720
+ .randomUUID()
3721
+ .replace(/-/g, "")
3722
+ .substring(0, 16);
3723
+ // Scope trace context to this request via AsyncLocalStorage
3724
+ // so concurrent generate/stream calls don't race.
3725
+ return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsParentSpanId }, async () => {
3726
+ // Manual span lifecycle: the span must stay open until the stream is fully consumed,
3727
+ // NOT when the StreamResult object is returned. withSpan would end the span too early
3728
+ // because streaming results resolve lazily via the async generator.
3729
+ const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
3730
+ kind: SpanKind.INTERNAL,
3731
+ attributes: {
3732
+ [ATTR.NL_PROVIDER]: options.provider || "default",
3733
+ [ATTR.GEN_AI_MODEL]: options.model || "default",
3734
+ [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
3735
+ [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
3736
+ [ATTR.NL_STREAM_MODE]: true,
3737
+ },
3738
+ });
3739
+ const spanStartTime = Date.now();
3740
+ try {
3741
+ const startTime = Date.now();
3742
+ const hrTimeStart = process.hrtime.bigint();
3743
+ const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3744
+ const originalPrompt = options.input.text; // Store the original prompt for memory storage
3745
+ // Inject file registry for lazy on-demand file processing
3746
+ options.fileRegistry = this.fileRegistry;
3747
+ await this.validateStreamInput(options);
3748
+ // Check budget limit before making API call
3749
+ if (options.maxBudgetUsd !== undefined &&
3750
+ options.maxBudgetUsd > 0 &&
3751
+ this._sessionCostUsd >= options.maxBudgetUsd) {
3752
+ throw new NeuroLinkError({
3753
+ code: "SESSION_BUDGET_EXCEEDED",
3754
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
3755
+ category: ErrorCategory.VALIDATION,
3756
+ severity: ErrorSeverity.HIGH,
3757
+ retriable: false,
3758
+ context: {
3759
+ spent: this._sessionCostUsd,
3760
+ limit: options.maxBudgetUsd,
3761
+ },
3762
+ });
3763
+ }
3764
+ this.emitStreamStartEvents(options, startTime);
3765
+ // Check if workflow is requested
3766
+ if (options.workflow || options.workflowConfig) {
3767
+ const result = await this.streamWithWorkflow(options, startTime);
3768
+ // Wrap the workflow stream so the span stays open until fully consumed
3769
+ const originalWorkflowStream = result.stream;
3770
+ result.stream = (async function* () {
3495
3771
  try {
3496
- for await (const chunk of mcpStream) {
3497
- chunkCount++;
3498
- if (chunk &&
3499
- "content" in chunk &&
3500
- typeof chunk.content === "string") {
3501
- accumulatedContent += chunk.content;
3502
- self.emitter.emit("response:chunk", chunk.content);
3503
- // Emit stream:chunk event (Observability Solution 8)
3504
- self.emitter.emit("stream:chunk", {
3505
- type: "stream:chunk",
3506
- content: chunk.content,
3507
- metadata: {
3508
- chunkIndex: chunkCount,
3509
- totalLength: accumulatedContent.length,
3510
- },
3511
- timestamp: Date.now(),
3512
- });
3513
- }
3772
+ for await (const chunk of originalWorkflowStream) {
3514
3773
  yield chunk;
3515
3774
  }
3516
- if (chunkCount === 0 && !metadata.fallbackAttempted) {
3517
- yield* self.handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
3518
- accumulatedContent += content;
3519
- });
3520
- }
3521
- // Emit stream:complete event (Observability Solution 8)
3522
- self.emitter.emit("stream:complete", {
3523
- type: "stream:complete",
3524
- content: accumulatedContent,
3525
- metadata: {
3526
- chunkCount,
3527
- totalLength: accumulatedContent.length,
3528
- durationMs: Date.now() - streamStartTime,
3529
- sessionId,
3530
- },
3531
- timestamp: Date.now(),
3532
- });
3775
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
3533
3776
  }
3534
3777
  catch (error) {
3535
- streamError = error;
3536
- // Emit stream:error event (Observability Solution 8)
3537
- self.emitter.emit("stream:error", {
3538
- type: "stream:error",
3539
- content: error instanceof Error ? error.message : String(error),
3540
- metadata: {
3541
- chunkCount,
3542
- totalLength: accumulatedContent.length,
3543
- durationMs: Date.now() - streamStartTime,
3544
- errorName: error instanceof Error ? error.name : "UnknownError",
3545
- sessionId,
3546
- },
3547
- timestamp: Date.now(),
3778
+ streamSpan.setStatus({
3779
+ code: SpanStatusCode.ERROR,
3780
+ message: error instanceof Error ? error.message : String(error),
3548
3781
  });
3549
3782
  throw error;
3550
3783
  }
3551
3784
  finally {
3552
- cleanupListeners();
3553
- // Finalize span now that the stream is fully consumed
3554
3785
  streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
3555
- streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
3556
- streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, metadata.error || streamError ? "error" : "stop");
3557
- if (metadata.error || streamError) {
3558
- streamSpan.setStatus({
3559
- code: SpanStatusCode.ERROR,
3560
- message: metadata.error ||
3561
- (streamError instanceof Error
3562
- ? streamError.message
3563
- : String(streamError)),
3786
+ streamSpan.end();
3787
+ }
3788
+ })();
3789
+ return result;
3790
+ }
3791
+ // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
3792
+ return await this.setLangfuseContextFromOptions(options, async () => {
3793
+ try {
3794
+ // Prepare options: init memory, MCP, Mem0, orchestration, Ollama auto-disable, tool detection
3795
+ const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
3796
+ const { stream: mcpStream, provider: providerName, usage: streamUsage, model: streamModel, analytics: streamAnalytics, } = await this.createMCPStream(enhancedOptions);
3797
+ // Update span with resolved provider name
3798
+ streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
3799
+ let accumulatedContent = "";
3800
+ let chunkCount = 0;
3801
+ // Set up event capture listeners
3802
+ const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
3803
+ const metadata = {
3804
+ fallbackAttempted: false,
3805
+ guardrailsBlocked: false,
3806
+ error: undefined,
3807
+ fallbackProvider: undefined,
3808
+ fallbackModel: undefined,
3809
+ };
3810
+ const self = this;
3811
+ const streamStartTime = Date.now();
3812
+ const sessionId = enhancedOptions.context?.sessionId;
3813
+ const processedStream = (async function* () {
3814
+ let streamError;
3815
+ try {
3816
+ for await (const chunk of mcpStream) {
3817
+ chunkCount++;
3818
+ if (chunk &&
3819
+ "content" in chunk &&
3820
+ typeof chunk.content === "string") {
3821
+ accumulatedContent += chunk.content;
3822
+ self.emitter.emit("response:chunk", chunk.content);
3823
+ // Emit stream:chunk event (Observability Solution 8)
3824
+ self.emitter.emit("stream:chunk", {
3825
+ type: "stream:chunk",
3826
+ content: chunk.content,
3827
+ metadata: {
3828
+ chunkIndex: chunkCount,
3829
+ totalLength: accumulatedContent.length,
3830
+ },
3831
+ timestamp: Date.now(),
3832
+ });
3833
+ }
3834
+ yield chunk;
3835
+ }
3836
+ if (chunkCount === 0 && !metadata.fallbackAttempted) {
3837
+ yield* self.handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
3838
+ accumulatedContent += content;
3839
+ });
3840
+ }
3841
+ // Emit stream:complete event (Observability Solution 8)
3842
+ // When fallback took over, attribute the completion to the
3843
+ // fallback provider so downstream telemetry reflects reality.
3844
+ const effectiveProvider = metadata.fallbackProvider ?? providerName;
3845
+ const effectiveModel = metadata.fallbackModel ??
3846
+ streamModel ??
3847
+ enhancedOptions.model;
3848
+ // Resolve analytics promise to get final token usage
3849
+ let resolvedUsage = streamUsage;
3850
+ if (!resolvedUsage && streamAnalytics) {
3851
+ try {
3852
+ const resolved = await Promise.resolve(streamAnalytics);
3853
+ if (resolved?.tokenUsage) {
3854
+ resolvedUsage = resolved.tokenUsage;
3855
+ }
3856
+ }
3857
+ catch {
3858
+ /* non-blocking */
3859
+ }
3860
+ }
3861
+ self.emitter.emit("stream:complete", {
3862
+ type: "stream:complete",
3863
+ content: accumulatedContent,
3864
+ provider: effectiveProvider,
3865
+ model: effectiveModel,
3866
+ prompt: enhancedOptions.input?.text ||
3867
+ enhancedOptions.prompt,
3868
+ metadata: {
3869
+ chunkCount,
3870
+ totalLength: accumulatedContent.length,
3871
+ durationMs: Date.now() - streamStartTime,
3872
+ sessionId,
3873
+ usage: resolvedUsage,
3874
+ ...(metadata.fallbackAttempted && {
3875
+ primaryProvider: providerName,
3876
+ primaryModel: enhancedOptions.model,
3877
+ fallback: true,
3878
+ }),
3879
+ },
3880
+ timestamp: Date.now(),
3564
3881
  });
3565
3882
  }
3566
- else {
3567
- streamSpan.setStatus({ code: SpanStatusCode.OK });
3568
- }
3569
- streamSpan.end();
3570
- if (accumulatedContent.trim()) {
3571
- logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
3883
+ catch (error) {
3884
+ streamError = error;
3885
+ // Emit stream:error event (Observability Solution 8)
3886
+ self.emitter.emit("stream:error", {
3887
+ type: "stream:error",
3888
+ content: error instanceof Error ? error.message : String(error),
3572
3889
  provider: providerName,
3573
3890
  model: enhancedOptions.model,
3574
- responseTimeMs: Date.now() - startTime,
3575
- contentLength: accumulatedContent.length,
3576
- fallback: metadata.fallbackAttempted,
3891
+ metadata: {
3892
+ chunkCount,
3893
+ totalLength: accumulatedContent.length,
3894
+ durationMs: Date.now() - streamStartTime,
3895
+ errorName: error instanceof Error ? error.name : "UnknownError",
3896
+ sessionId,
3897
+ },
3898
+ timestamp: Date.now(),
3577
3899
  });
3900
+ throw error;
3578
3901
  }
3579
- await self.storeStreamConversationMemory({
3580
- enhancedOptions,
3581
- providerName,
3582
- originalPrompt,
3583
- accumulatedContent,
3584
- startTime,
3585
- eventSequence,
3586
- });
3902
+ finally {
3903
+ cleanupListeners();
3904
+ // Finalize span now that the stream is fully consumed
3905
+ streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
3906
+ streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
3907
+ // When fallback took over, the primary provider's span must
3908
+ // reflect that it failed — never mark it as successful.
3909
+ const primaryFailed = !!(metadata.error || streamError);
3910
+ streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, primaryFailed ? "error" : "stop");
3911
+ if (metadata.fallbackAttempted) {
3912
+ streamSpan.setAttribute("neurolink.fallback_triggered", true);
3913
+ if (metadata.fallbackProvider) {
3914
+ streamSpan.setAttribute("neurolink.fallback_provider", metadata.fallbackProvider);
3915
+ }
3916
+ }
3917
+ if (primaryFailed) {
3918
+ streamSpan.setStatus({
3919
+ code: SpanStatusCode.ERROR,
3920
+ message: metadata.error ||
3921
+ (streamError instanceof Error
3922
+ ? streamError.message
3923
+ : String(streamError)),
3924
+ });
3925
+ }
3926
+ else {
3927
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
3928
+ }
3929
+ streamSpan.end();
3930
+ if (accumulatedContent.trim()) {
3931
+ logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
3932
+ provider: providerName,
3933
+ model: enhancedOptions.model,
3934
+ responseTimeMs: Date.now() - startTime,
3935
+ contentLength: accumulatedContent.length,
3936
+ fallback: metadata.fallbackAttempted,
3937
+ });
3938
+ }
3939
+ await self.storeStreamConversationMemory({
3940
+ enhancedOptions,
3941
+ providerName,
3942
+ originalPrompt,
3943
+ accumulatedContent,
3944
+ startTime,
3945
+ eventSequence,
3946
+ });
3947
+ }
3948
+ })();
3949
+ const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
3950
+ const responseTime = Date.now() - startTime;
3951
+ // Accumulate session cost for budget tracking
3952
+ if (streamResult.analytics?.cost &&
3953
+ streamResult.analytics.cost > 0) {
3954
+ this._sessionCostUsd += streamResult.analytics.cost;
3587
3955
  }
3588
- })();
3589
- const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
3590
- const responseTime = Date.now() - startTime;
3591
- // Accumulate session cost for budget tracking
3592
- if (streamResult.analytics?.cost && streamResult.analytics.cost > 0) {
3593
- this._sessionCostUsd += streamResult.analytics.cost;
3956
+ this.emitStreamEndEvents(streamResult);
3957
+ return this.createStreamResponse(streamResult, processedStream, {
3958
+ providerName,
3959
+ options,
3960
+ startTime,
3961
+ responseTime,
3962
+ streamId,
3963
+ fallback: metadata.fallbackAttempted,
3964
+ guardrailsBlocked: metadata.guardrailsBlocked,
3965
+ error: metadata.error,
3966
+ events: eventSequence,
3967
+ });
3594
3968
  }
3595
- this.emitStreamEndEvents(streamResult);
3596
- return this.createStreamResponse(streamResult, processedStream, {
3597
- providerName,
3598
- options,
3599
- startTime,
3600
- responseTime,
3601
- streamId,
3602
- fallback: metadata.fallbackAttempted,
3603
- guardrailsBlocked: metadata.guardrailsBlocked,
3604
- error: metadata.error,
3605
- events: eventSequence,
3606
- });
3607
- }
3608
- catch (error) {
3609
- return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
3969
+ catch (error) {
3970
+ return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
3971
+ }
3972
+ });
3973
+ }
3974
+ catch (error) {
3975
+ // End span on error before re-throwing
3976
+ streamSpan.setStatus({
3977
+ code: SpanStatusCode.ERROR,
3978
+ message: error instanceof Error ? error.message : String(error),
3979
+ });
3980
+ if (error instanceof Error) {
3981
+ streamSpan.recordException(error);
3610
3982
  }
3611
- });
3612
- }
3613
- catch (error) {
3614
- // End span on error before re-throwing
3615
- streamSpan.setStatus({
3616
- code: SpanStatusCode.ERROR,
3617
- message: error instanceof Error ? error.message : String(error),
3618
- });
3619
- if (error instanceof Error) {
3620
- streamSpan.recordException(error);
3983
+ streamSpan.end();
3984
+ throw error;
3621
3985
  }
3622
- streamSpan.end();
3623
- throw error;
3624
- }
3986
+ }); // end metricsTraceContextStorage.run
3625
3987
  }
3626
3988
  /**
3627
3989
  * Prepare stream options: initialize memory, MCP, Mem0 retrieval, orchestration,
@@ -3828,6 +4190,25 @@ Current user's request: ${currentInput}`;
3828
4190
  metadata.fallbackAttempted = true;
3829
4191
  const errorMsg = "Stream completed with 0 chunks (possible guardrails block)";
3830
4192
  metadata.error = errorMsg;
4193
+ // Record a failed-provider span for the primary provider that returned 0 chunks
4194
+ try {
4195
+ const traceCtx = this._metricsTraceContext;
4196
+ let failedSpan = SpanSerializer.createGenerationSpan({
4197
+ provider: providerName,
4198
+ model: enhancedOptions.model || "unknown",
4199
+ name: `gen_ai.${providerName}.stream.failed`,
4200
+ traceId: traceCtx?.traceId,
4201
+ parentSpanId: traceCtx?.parentSpanId,
4202
+ });
4203
+ failedSpan = SpanSerializer.endSpan(failedSpan, SpanStatus.ERROR);
4204
+ failedSpan.statusMessage = errorMsg;
4205
+ failedSpan.durationMs = 0;
4206
+ this.metricsAggregator.recordSpan(failedSpan);
4207
+ getMetricsAggregator().recordSpan(failedSpan);
4208
+ }
4209
+ catch {
4210
+ /* non-blocking */
4211
+ }
3831
4212
  const fallbackRoute = ModelRouter.getFallbackRoute(originalPrompt || enhancedOptions.input.text || "", {
3832
4213
  provider: providerName,
3833
4214
  model: enhancedOptions.model || "gpt-4o",
@@ -3871,6 +4252,8 @@ Current user's request: ${currentInput}`;
3871
4252
  throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
3872
4253
  }
3873
4254
  // Fallback succeeded - likely guardrails blocked primary
4255
+ metadata.fallbackProvider = fallbackRoute.provider;
4256
+ metadata.fallbackModel = fallbackRoute.model;
3874
4257
  metadata.guardrailsBlocked = true;
3875
4258
  }
3876
4259
  catch (fallbackError) {
@@ -3913,6 +4296,7 @@ Current user's request: ${currentInput}`;
3913
4296
  model: enhancedOptions.model,
3914
4297
  };
3915
4298
  }
4299
+ const memStoreStart = Date.now();
3916
4300
  try {
3917
4301
  await this.conversationMemory.storeConversationTurn({
3918
4302
  sessionId,
@@ -3926,6 +4310,7 @@ Current user's request: ${currentInput}`;
3926
4310
  requestId: enhancedOptions.context
3927
4311
  ?.requestId,
3928
4312
  });
4313
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "stream" }, Date.now() - memStoreStart, SpanStatus.OK);
3929
4314
  logger.debug("[NeuroLink.stream] Stored conversation turn with events", {
3930
4315
  sessionId,
3931
4316
  eventCount: eventSequence.length,
@@ -3933,6 +4318,7 @@ Current user's request: ${currentInput}`;
3933
4318
  });
3934
4319
  }
3935
4320
  catch (error) {
4321
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "stream" }, Date.now() - memStoreStart, SpanStatus.ERROR, error instanceof Error ? error.message : String(error));
3936
4322
  logger.warn("Failed to store stream conversation turn", {
3937
4323
  error: error instanceof Error ? error.message : String(error),
3938
4324
  });
@@ -4002,6 +4388,8 @@ Current user's request: ${currentInput}`;
4002
4388
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
4003
4389
  this, // Pass SDK instance
4004
4390
  options.region);
4391
+ // Propagate trace context for parent-child span hierarchy
4392
+ provider._traceContext = this._metricsTraceContext;
4005
4393
  // Enable tool execution for the provider using BaseProvider method
4006
4394
  provider.setupToolExecutor({
4007
4395
  customTools: this.getCustomTools(),
@@ -4051,7 +4439,13 @@ Current user's request: ${currentInput}`;
4051
4439
  provider: providerName,
4052
4440
  systemPromptPassedLength: enhancedSystemPrompt.length,
4053
4441
  });
4054
- return { stream: streamResult.stream, provider: providerName };
4442
+ return {
4443
+ stream: streamResult.stream,
4444
+ provider: providerName,
4445
+ usage: streamResult.usage,
4446
+ model: streamResult.model || options.model,
4447
+ analytics: streamResult.analytics,
4448
+ };
4055
4449
  }
4056
4450
  /**
4057
4451
  * Process stream result
@@ -4110,6 +4504,27 @@ Current user's request: ${currentInput}`;
4110
4504
  logger.error("Stream generation failed, attempting fallback", {
4111
4505
  error: error instanceof Error ? error.message : String(error),
4112
4506
  });
4507
+ // Record a failed-provider span for the primary provider that threw
4508
+ try {
4509
+ const failedProvider = options.provider || "unknown";
4510
+ const traceCtx = this._metricsTraceContext;
4511
+ let failedSpan = SpanSerializer.createGenerationSpan({
4512
+ provider: failedProvider,
4513
+ model: options.model || "unknown",
4514
+ name: `gen_ai.${failedProvider}.stream.failed`,
4515
+ traceId: traceCtx?.traceId,
4516
+ parentSpanId: traceCtx?.parentSpanId,
4517
+ });
4518
+ failedSpan = SpanSerializer.endSpan(failedSpan, SpanStatus.ERROR);
4519
+ failedSpan.statusMessage =
4520
+ error instanceof Error ? error.message : String(error);
4521
+ failedSpan.durationMs = Date.now() - startTime;
4522
+ this.metricsAggregator.recordSpan(failedSpan);
4523
+ getMetricsAggregator().recordSpan(failedSpan);
4524
+ }
4525
+ catch {
4526
+ /* non-blocking */
4527
+ }
4113
4528
  const originalPrompt = options.input.text;
4114
4529
  const responseTime = Date.now() - startTime;
4115
4530
  const providerName = await getBestProvider(options.provider);
@@ -4159,6 +4574,7 @@ Current user's request: ${currentInput}`;
4159
4574
  model: options.model,
4160
4575
  };
4161
4576
  }
4577
+ const memStoreStart = Date.now();
4162
4578
  try {
4163
4579
  await self.conversationMemory.storeConversationTurn({
4164
4580
  sessionId: sessionId || options.context?.sessionId,
@@ -4172,8 +4588,10 @@ Current user's request: ${currentInput}`;
4172
4588
  options.context
4173
4589
  ?.requestId,
4174
4590
  });
4591
+ self.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "fallback-stream" }, Date.now() - memStoreStart, SpanStatus.OK);
4175
4592
  }
4176
4593
  catch (error) {
4594
+ self.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "fallback-stream" }, Date.now() - memStoreStart, SpanStatus.ERROR, error instanceof Error ? error.message : String(error));
4177
4595
  logger.warn("Failed to store fallback stream conversation turn", {
4178
4596
  error: error instanceof Error ? error.message : String(error),
4179
4597
  });