@juspay/neurolink 9.24.0 → 9.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/adapters/tts/googleTTSHandler.js +26 -1
  3. package/dist/adapters/video/vertexVideoHandler.js +23 -17
  4. package/dist/cli/commands/config.d.ts +3 -3
  5. package/dist/cli/commands/observability.d.ts +53 -0
  6. package/dist/cli/commands/observability.js +453 -0
  7. package/dist/cli/commands/telemetry.d.ts +63 -0
  8. package/dist/cli/commands/telemetry.js +689 -0
  9. package/dist/cli/factories/commandFactory.js +29 -15
  10. package/dist/cli/parser.js +6 -9
  11. package/dist/cli/utils/formatters.d.ts +13 -0
  12. package/dist/cli/utils/formatters.js +23 -0
  13. package/dist/constants/contextWindows.js +6 -0
  14. package/dist/constants/enums.d.ts +6 -0
  15. package/dist/constants/enums.js +8 -2
  16. package/dist/context/budgetChecker.js +75 -48
  17. package/dist/context/contextCompactor.js +135 -127
  18. package/dist/core/baseProvider.d.ts +5 -0
  19. package/dist/core/baseProvider.js +117 -110
  20. package/dist/core/conversationMemoryInitializer.js +7 -4
  21. package/dist/core/conversationMemoryManager.d.ts +2 -0
  22. package/dist/core/conversationMemoryManager.js +6 -2
  23. package/dist/core/modules/GenerationHandler.d.ts +2 -2
  24. package/dist/core/modules/GenerationHandler.js +12 -12
  25. package/dist/evaluation/ragasEvaluator.js +39 -19
  26. package/dist/evaluation/scoring.js +46 -20
  27. package/dist/features/ppt/presentationOrchestrator.js +23 -0
  28. package/dist/features/ppt/slideGenerator.js +13 -0
  29. package/dist/features/ppt/slideRenderers.d.ts +1 -1
  30. package/dist/features/ppt/slideRenderers.js +6 -4
  31. package/dist/features/ppt/slideTypeInference.d.ts +1 -1
  32. package/dist/features/ppt/slideTypeInference.js +75 -73
  33. package/dist/files/fileTools.d.ts +6 -6
  34. package/dist/index.d.ts +46 -12
  35. package/dist/index.js +79 -17
  36. package/dist/lib/adapters/tts/googleTTSHandler.js +26 -1
  37. package/dist/lib/adapters/video/vertexVideoHandler.js +23 -17
  38. package/dist/lib/constants/contextWindows.js +6 -0
  39. package/dist/lib/constants/enums.d.ts +6 -0
  40. package/dist/lib/constants/enums.js +8 -2
  41. package/dist/lib/context/budgetChecker.js +75 -48
  42. package/dist/lib/context/contextCompactor.js +135 -127
  43. package/dist/lib/core/baseProvider.d.ts +5 -0
  44. package/dist/lib/core/baseProvider.js +117 -110
  45. package/dist/lib/core/conversationMemoryInitializer.js +7 -4
  46. package/dist/lib/core/conversationMemoryManager.d.ts +2 -0
  47. package/dist/lib/core/conversationMemoryManager.js +6 -2
  48. package/dist/lib/core/modules/GenerationHandler.d.ts +2 -2
  49. package/dist/lib/core/modules/GenerationHandler.js +12 -12
  50. package/dist/lib/evaluation/ragasEvaluator.js +39 -19
  51. package/dist/lib/evaluation/scoring.js +46 -20
  52. package/dist/lib/features/ppt/presentationOrchestrator.js +23 -0
  53. package/dist/lib/features/ppt/slideGenerator.js +13 -0
  54. package/dist/lib/features/ppt/slideRenderers.d.ts +1 -1
  55. package/dist/lib/features/ppt/slideRenderers.js +6 -4
  56. package/dist/lib/features/ppt/slideTypeInference.d.ts +1 -1
  57. package/dist/lib/features/ppt/slideTypeInference.js +75 -73
  58. package/dist/lib/files/fileTools.d.ts +6 -6
  59. package/dist/lib/index.d.ts +46 -12
  60. package/dist/lib/index.js +79 -17
  61. package/dist/lib/mcp/httpRateLimiter.js +39 -12
  62. package/dist/lib/mcp/httpRetryHandler.js +22 -1
  63. package/dist/lib/mcp/mcpClientFactory.js +13 -15
  64. package/dist/lib/memory/memoryRetrievalTools.js +22 -0
  65. package/dist/lib/neurolink.d.ts +64 -72
  66. package/dist/lib/neurolink.js +1007 -564
  67. package/dist/lib/observability/exporterRegistry.d.ts +152 -0
  68. package/dist/lib/observability/exporterRegistry.js +414 -0
  69. package/dist/lib/observability/exporters/arizeExporter.d.ts +32 -0
  70. package/dist/lib/observability/exporters/arizeExporter.js +139 -0
  71. package/dist/lib/observability/exporters/baseExporter.d.ts +117 -0
  72. package/dist/lib/observability/exporters/baseExporter.js +191 -0
  73. package/dist/lib/observability/exporters/braintrustExporter.d.ts +30 -0
  74. package/dist/lib/observability/exporters/braintrustExporter.js +155 -0
  75. package/dist/lib/observability/exporters/datadogExporter.d.ts +37 -0
  76. package/dist/lib/observability/exporters/datadogExporter.js +197 -0
  77. package/dist/lib/observability/exporters/index.d.ts +13 -0
  78. package/dist/lib/observability/exporters/index.js +14 -0
  79. package/dist/lib/observability/exporters/laminarExporter.d.ts +48 -0
  80. package/dist/lib/observability/exporters/laminarExporter.js +303 -0
  81. package/dist/lib/observability/exporters/langfuseExporter.d.ts +47 -0
  82. package/dist/lib/observability/exporters/langfuseExporter.js +204 -0
  83. package/dist/lib/observability/exporters/langsmithExporter.d.ts +26 -0
  84. package/dist/lib/observability/exporters/langsmithExporter.js +124 -0
  85. package/dist/lib/observability/exporters/otelExporter.d.ts +39 -0
  86. package/dist/lib/observability/exporters/otelExporter.js +165 -0
  87. package/dist/lib/observability/exporters/posthogExporter.d.ts +48 -0
  88. package/dist/lib/observability/exporters/posthogExporter.js +288 -0
  89. package/dist/lib/observability/exporters/sentryExporter.d.ts +32 -0
  90. package/dist/lib/observability/exporters/sentryExporter.js +166 -0
  91. package/dist/lib/observability/index.d.ts +25 -0
  92. package/dist/lib/observability/index.js +32 -0
  93. package/dist/lib/observability/metricsAggregator.d.ts +260 -0
  94. package/dist/lib/observability/metricsAggregator.js +557 -0
  95. package/dist/lib/observability/otelBridge.d.ts +49 -0
  96. package/dist/lib/observability/otelBridge.js +132 -0
  97. package/dist/lib/observability/retryPolicy.d.ts +192 -0
  98. package/dist/lib/observability/retryPolicy.js +384 -0
  99. package/dist/lib/observability/sampling/index.d.ts +4 -0
  100. package/dist/lib/observability/sampling/index.js +5 -0
  101. package/dist/lib/observability/sampling/samplers.d.ts +116 -0
  102. package/dist/lib/observability/sampling/samplers.js +217 -0
  103. package/dist/lib/observability/spanProcessor.d.ts +129 -0
  104. package/dist/lib/observability/spanProcessor.js +304 -0
  105. package/dist/lib/observability/tokenTracker.d.ts +156 -0
  106. package/dist/lib/observability/tokenTracker.js +414 -0
  107. package/dist/lib/observability/types/exporterTypes.d.ts +250 -0
  108. package/dist/lib/observability/types/exporterTypes.js +6 -0
  109. package/dist/lib/observability/types/index.d.ts +6 -0
  110. package/dist/lib/observability/types/index.js +5 -0
  111. package/dist/lib/observability/types/spanTypes.d.ts +244 -0
  112. package/dist/lib/observability/types/spanTypes.js +93 -0
  113. package/dist/lib/observability/utils/index.d.ts +4 -0
  114. package/dist/lib/observability/utils/index.js +5 -0
  115. package/dist/lib/observability/utils/safeMetadata.d.ts +10 -0
  116. package/dist/lib/observability/utils/safeMetadata.js +26 -0
  117. package/dist/lib/observability/utils/spanSerializer.d.ts +115 -0
  118. package/dist/lib/observability/utils/spanSerializer.js +291 -0
  119. package/dist/lib/providers/amazonSagemaker.d.ts +5 -4
  120. package/dist/lib/providers/amazonSagemaker.js +3 -4
  121. package/dist/lib/providers/googleVertex.d.ts +7 -0
  122. package/dist/lib/providers/googleVertex.js +76 -2
  123. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +0 -5
  124. package/dist/lib/rag/pipeline/RAGPipeline.js +122 -87
  125. package/dist/lib/rag/ragIntegration.js +30 -0
  126. package/dist/lib/rag/retrieval/hybridSearch.js +22 -0
  127. package/dist/lib/server/abstract/baseServerAdapter.js +51 -19
  128. package/dist/lib/server/middleware/common.js +44 -12
  129. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +2 -2
  130. package/dist/lib/services/server/ai/observability/instrumentation.js +10 -5
  131. package/dist/lib/types/conversationMemoryInterface.d.ts +2 -0
  132. package/dist/lib/types/modelTypes.d.ts +18 -18
  133. package/dist/lib/types/providers.d.ts +5 -0
  134. package/dist/lib/utils/pricing.js +25 -1
  135. package/dist/lib/utils/ttsProcessor.js +74 -59
  136. package/dist/lib/workflow/config.d.ts +36 -36
  137. package/dist/lib/workflow/core/ensembleExecutor.js +10 -0
  138. package/dist/lib/workflow/core/judgeScorer.js +20 -2
  139. package/dist/lib/workflow/core/workflowRunner.js +34 -1
  140. package/dist/mcp/httpRateLimiter.js +39 -12
  141. package/dist/mcp/httpRetryHandler.js +22 -1
  142. package/dist/mcp/mcpClientFactory.js +13 -15
  143. package/dist/memory/memoryRetrievalTools.js +22 -0
  144. package/dist/neurolink.d.ts +64 -72
  145. package/dist/neurolink.js +1007 -564
  146. package/dist/observability/FEATURE-STATUS.md +269 -0
  147. package/dist/observability/exporterRegistry.d.ts +152 -0
  148. package/dist/observability/exporterRegistry.js +413 -0
  149. package/dist/observability/exporters/arizeExporter.d.ts +32 -0
  150. package/dist/observability/exporters/arizeExporter.js +138 -0
  151. package/dist/observability/exporters/baseExporter.d.ts +117 -0
  152. package/dist/observability/exporters/baseExporter.js +190 -0
  153. package/dist/observability/exporters/braintrustExporter.d.ts +30 -0
  154. package/dist/observability/exporters/braintrustExporter.js +154 -0
  155. package/dist/observability/exporters/datadogExporter.d.ts +37 -0
  156. package/dist/observability/exporters/datadogExporter.js +196 -0
  157. package/dist/observability/exporters/index.d.ts +13 -0
  158. package/dist/observability/exporters/index.js +13 -0
  159. package/dist/observability/exporters/laminarExporter.d.ts +48 -0
  160. package/dist/observability/exporters/laminarExporter.js +302 -0
  161. package/dist/observability/exporters/langfuseExporter.d.ts +47 -0
  162. package/dist/observability/exporters/langfuseExporter.js +203 -0
  163. package/dist/observability/exporters/langsmithExporter.d.ts +26 -0
  164. package/dist/observability/exporters/langsmithExporter.js +123 -0
  165. package/dist/observability/exporters/otelExporter.d.ts +39 -0
  166. package/dist/observability/exporters/otelExporter.js +164 -0
  167. package/dist/observability/exporters/posthogExporter.d.ts +48 -0
  168. package/dist/observability/exporters/posthogExporter.js +287 -0
  169. package/dist/observability/exporters/sentryExporter.d.ts +32 -0
  170. package/dist/observability/exporters/sentryExporter.js +165 -0
  171. package/dist/observability/index.d.ts +25 -0
  172. package/dist/observability/index.js +31 -0
  173. package/dist/observability/metricsAggregator.d.ts +260 -0
  174. package/dist/observability/metricsAggregator.js +556 -0
  175. package/dist/observability/otelBridge.d.ts +49 -0
  176. package/dist/observability/otelBridge.js +131 -0
  177. package/dist/observability/retryPolicy.d.ts +192 -0
  178. package/dist/observability/retryPolicy.js +383 -0
  179. package/dist/observability/sampling/index.d.ts +4 -0
  180. package/dist/observability/sampling/index.js +4 -0
  181. package/dist/observability/sampling/samplers.d.ts +116 -0
  182. package/dist/observability/sampling/samplers.js +216 -0
  183. package/dist/observability/spanProcessor.d.ts +129 -0
  184. package/dist/observability/spanProcessor.js +303 -0
  185. package/dist/observability/tokenTracker.d.ts +156 -0
  186. package/dist/observability/tokenTracker.js +413 -0
  187. package/dist/observability/types/exporterTypes.d.ts +250 -0
  188. package/dist/observability/types/exporterTypes.js +5 -0
  189. package/dist/observability/types/index.d.ts +6 -0
  190. package/dist/observability/types/index.js +4 -0
  191. package/dist/observability/types/spanTypes.d.ts +244 -0
  192. package/dist/observability/types/spanTypes.js +92 -0
  193. package/dist/observability/utils/index.d.ts +4 -0
  194. package/dist/observability/utils/index.js +4 -0
  195. package/dist/observability/utils/safeMetadata.d.ts +10 -0
  196. package/dist/observability/utils/safeMetadata.js +25 -0
  197. package/dist/observability/utils/spanSerializer.d.ts +115 -0
  198. package/dist/observability/utils/spanSerializer.js +290 -0
  199. package/dist/providers/amazonSagemaker.d.ts +5 -4
  200. package/dist/providers/amazonSagemaker.js +3 -4
  201. package/dist/providers/googleVertex.d.ts +7 -0
  202. package/dist/providers/googleVertex.js +76 -2
  203. package/dist/rag/pipeline/RAGPipeline.d.ts +0 -5
  204. package/dist/rag/pipeline/RAGPipeline.js +122 -87
  205. package/dist/rag/ragIntegration.js +30 -0
  206. package/dist/rag/retrieval/hybridSearch.js +22 -0
  207. package/dist/server/abstract/baseServerAdapter.js +51 -19
  208. package/dist/server/middleware/common.js +44 -12
  209. package/dist/services/server/ai/observability/instrumentation.d.ts +2 -2
  210. package/dist/services/server/ai/observability/instrumentation.js +10 -5
  211. package/dist/types/conversationMemoryInterface.d.ts +2 -0
  212. package/dist/types/providers.d.ts +5 -0
  213. package/dist/utils/pricing.js +25 -1
  214. package/dist/utils/ttsProcessor.js +74 -59
  215. package/dist/workflow/config.d.ts +52 -52
  216. package/dist/workflow/core/ensembleExecutor.js +10 -0
  217. package/dist/workflow/core/judgeScorer.js +20 -2
  218. package/dist/workflow/core/workflowRunner.js +34 -1
  219. package/package.json +1 -1
package/dist/neurolink.js CHANGED
@@ -13,15 +13,17 @@ try {
13
13
  catch {
14
14
  // Environment variables should be set externally in production
15
15
  }
16
+ import { SpanKind, SpanStatusCode } from "@opentelemetry/api";
17
+ import { AsyncLocalStorage } from "async_hooks";
16
18
  import { EventEmitter } from "events";
17
19
  import pLimit from "p-limit";
18
20
  import { ErrorCategory, ErrorSeverity } from "./constants/enums.js";
19
21
  import { CIRCUIT_BREAKER, CIRCUIT_BREAKER_RESET_MS, MEMORY_THRESHOLDS, NANOSECOND_TO_MS_DIVISOR, PERFORMANCE_THRESHOLDS, PROVIDER_TIMEOUTS, RETRY_ATTEMPTS, RETRY_DELAYS, TOOL_TIMEOUTS, } from "./constants/index.js";
20
22
  import { checkContextBudget } from "./context/budgetChecker.js";
21
23
  import { ContextCompactor, } from "./context/contextCompactor.js";
22
- import { isContextOverflowError, getContextOverflowProvider, parseProviderOverflowDetails, } from "./context/errorDetection.js";
23
- import { ContextBudgetExceededError } from "./context/errors.js";
24
24
  import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
25
+ import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
26
+ import { ContextBudgetExceededError } from "./context/errors.js";
25
27
  import { repairToolPairs } from "./context/toolPairRepair.js";
26
28
  import { SYSTEM_LIMITS } from "./core/constants.js";
27
29
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
@@ -34,11 +36,17 @@ import { ExternalServerManager } from "./mcp/externalServerManager.js";
34
36
  // Import direct tools server for automatic registration
35
37
  import { directToolsServer } from "./mcp/servers/agent/directToolsServer.js";
36
38
  import { MCPToolRegistry } from "./mcp/toolRegistry.js";
39
+ import { initializeHippocampus, } from "./memory/hippocampusInitializer.js";
37
40
  import { initializeMem0 } from "./memory/mem0Initializer.js";
38
41
  import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
39
- import { initializeHippocampus, } from "./memory/hippocampusInitializer.js";
42
+ import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
43
+ import { SpanStatus, SpanType } from "./observability/types/spanTypes.js";
44
+ import { SpanSerializer } from "./observability/utils/spanSerializer.js";
40
45
  import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
46
+ import { ATTR } from "./telemetry/attributes.js";
47
+ import { tracers } from "./telemetry/tracers.js";
41
48
  import { ConversationMemoryError } from "./types/conversation.js";
49
+ import { AuthenticationError, AuthorizationError, InvalidModelError, } from "./types/errors.js";
42
50
  import { getConversationMessages, storeConversationTurn, } from "./utils/conversationMemory.js";
43
51
  // Enhanced error handling imports
44
52
  import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableError, logStructuredError, NeuroLinkError, withRetry, withTimeout, } from "./utils/errorHandling.js";
@@ -49,17 +57,13 @@ import { createCustomToolServerInfo, detectCategory, } from "./utils/mcpDefaults
49
57
  // Import orchestration components
50
58
  import { ModelRouter } from "./utils/modelRouter.js";
51
59
  import { getBestProvider } from "./utils/providerUtils.js";
60
+ import { NON_RETRYABLE_HTTP_STATUS_CODES } from "./utils/retryability.js";
52
61
  import { isZodSchema } from "./utils/schemaConversion.js";
53
62
  import { BinaryTaskClassifier } from "./utils/taskClassifier.js";
54
63
  // Tool detection and execution imports
55
64
  // Transformation utilities
56
65
  import { extractToolNames, optimizeToolForCollection, transformAvailableTools, transformParamsForLogging, transformToolExecutions, transformToolExecutionsForMCP, transformToolsForMCP, transformToolsToDescriptions, transformToolsToExpectedFormat, } from "./utils/transformationUtils.js";
57
- import { InvalidModelError, AuthenticationError, AuthorizationError, } from "./types/errors.js";
58
66
  import { isNonNullObject } from "./utils/typeUtils.js";
59
- import { NON_RETRYABLE_HTTP_STATUS_CODES } from "./utils/retryability.js";
60
- import { SpanKind, SpanStatusCode } from "@opentelemetry/api";
61
- import { tracers } from "./telemetry/tracers.js";
62
- import { ATTR } from "./telemetry/attributes.js";
63
67
  import { getWorkflow } from "./workflow/core/workflowRegistry.js";
64
68
  import { runWorkflow } from "./workflow/core/workflowRunner.js";
65
69
  /**
@@ -109,77 +113,11 @@ function isNonRetryableProviderError(error) {
109
113
  return false;
110
114
  }
111
115
  /**
112
- * NeuroLink - Universal AI Development Platform
113
- *
114
- * Main SDK class providing unified access to 14+ AI providers with enterprise features:
115
- * - Multi-provider support (OpenAI, Anthropic, Google AI Studio, Google Vertex, AWS Bedrock, etc.)
116
- * - MCP (Model Context Protocol) tool integration with 58+ external servers
117
- * - Human-in-the-Loop (HITL) security workflows for regulated industries
118
- * - Redis-based conversation memory and persistence
119
- * - Enterprise middleware system for monitoring and control
120
- * - Automatic provider fallback and retry logic
121
- * - Streaming with real-time token delivery
122
- * - Multimodal support (text, images, PDFs, CSV)
123
- *
124
- * @category Core
125
- *
126
- * @example Basic usage
127
- * ```typescript
128
- * import { NeuroLink } from '@juspay/neurolink';
129
- *
130
- * const neurolink = new NeuroLink();
131
- *
132
- * const result = await neurolink.generate({
133
- * input: { text: 'Explain quantum computing' },
134
- * provider: 'vertex',
135
- * model: 'gemini-3-flash'
136
- * });
137
- *
138
- * console.log(result.content);
139
- * ```
140
- *
141
- * @example With HITL security
142
- * ```typescript
143
- * const neurolink = new NeuroLink({
144
- * hitl: {
145
- * enabled: true,
146
- * requireApproval: ['writeFile', 'executeCode'],
147
- * confidenceThreshold: 0.85
148
- * }
149
- * });
150
- * ```
151
- *
152
- * @example With Redis memory
153
- * ```typescript
154
- * const neurolink = new NeuroLink({
155
- * conversationMemory: {
156
- * enabled: true,
157
- * redis: {
158
- * url: 'redis://localhost:6379'
159
- * }
160
- * }
161
- * });
162
- * ```
163
- *
164
- * @example With MCP tools
165
- * ```typescript
166
- * const neurolink = new NeuroLink();
167
- *
168
- * // Discover available tools
169
- * const tools = await neurolink.getAvailableTools();
170
- *
171
- * // Use tools in generation
172
- * const result = await neurolink.generate({
173
- * input: { text: 'Read the README.md file' },
174
- * tools: ['readFile']
175
- * });
176
- * ```
177
- *
178
- * @see {@link GenerateOptions} for generation options
179
- * @see {@link StreamOptions} for streaming options
180
- * @see {@link NeurolinkConstructorConfig} for configuration options
181
- * @since 1.0.0
116
+ * Module-level AsyncLocalStorage for per-request metrics trace context.
117
+ * Eliminates the race condition where overlapping generate/stream calls on the
118
+ * same NeuroLink instance would clobber each other's trace context.
182
119
  */
120
+ const metricsTraceContextStorage = new AsyncLocalStorage();
183
121
  export class NeuroLink {
184
122
  mcpInitialized = false;
185
123
  mcpInitPromise = null;
@@ -422,6 +360,15 @@ export class NeuroLink {
422
360
  * @throws {Error} When HITL configuration is invalid (if enabled)
423
361
  */
424
362
  observabilityConfig;
363
+ metricsAggregator = new MetricsAggregator();
364
+ /**
365
+ * Per-request metrics trace context backed by AsyncLocalStorage.
366
+ * Safe for concurrent requests on the same SDK instance.
367
+ * Context is set via metricsTraceContextStorage.run() in generate/stream.
368
+ */
369
+ get _metricsTraceContext() {
370
+ return metricsTraceContextStorage.getStore() ?? null;
371
+ }
425
372
  constructor(config) {
426
373
  this.toolRegistry = config?.toolRegistry || new MCPToolRegistry();
427
374
  this.fileRegistry = new FileReferenceRegistry();
@@ -444,6 +391,7 @@ export class NeuroLink {
444
391
  this.registerFileTools();
445
392
  this.registerMemoryRetrievalTools();
446
393
  this.initializeLangfuse(constructorId, constructorStartTime, constructorHrTimeStart);
394
+ this.initializeMetricsListeners();
447
395
  this.logConstructorComplete(constructorId, constructorStartTime, constructorHrTimeStart);
448
396
  }
449
397
  /**
@@ -666,9 +614,6 @@ export class NeuroLink {
666
614
  logger.debug("[NeuroLink] Skipping memory retrieval tools — requires Redis conversation memory");
667
615
  return;
668
616
  }
669
- // Defer registration until conversation memory is actually initialized
670
- // We register a placeholder that will use the lazy-initialized memory manager
671
- const self = this;
672
617
  const tools = {
673
618
  retrieve_context: {
674
619
  description: "Retrieve messages from conversation memory. Use this to access full tool " +
@@ -676,7 +621,7 @@ export class NeuroLink {
676
621
  "or search through conversation history.",
677
622
  execute: async (params) => {
678
623
  // Lazy access: conversationMemory is initialized on first generate() call
679
- const memoryManager = self.conversationMemory;
624
+ const memoryManager = this.conversationMemory;
680
625
  if (!memoryManager || !("getSessionRaw" in memoryManager)) {
681
626
  return {
682
627
  success: false,
@@ -1576,6 +1521,88 @@ Current user's request: ${currentInput}`;
1576
1521
  // Check if OpenTelemetry was initialized (by this or external app)
1577
1522
  return isOpenTelemetryInitialized();
1578
1523
  }
1524
+ /**
1525
+ * Get comprehensive telemetry status including Langfuse, OTel, and exporter health
1526
+ */
1527
+ getTelemetryStatus() {
1528
+ const langfuseConfig = this.observabilityConfig?.langfuse;
1529
+ const otelConfig = this.observabilityConfig?.openTelemetry;
1530
+ return {
1531
+ enabled: this.isTelemetryEnabled(),
1532
+ langfuse: langfuseConfig
1533
+ ? {
1534
+ enabled: langfuseConfig.enabled ?? false,
1535
+ baseUrl: langfuseConfig.baseUrl,
1536
+ environment: langfuseConfig.environment,
1537
+ }
1538
+ : undefined,
1539
+ openTelemetry: otelConfig
1540
+ ? {
1541
+ enabled: otelConfig.enabled ?? false,
1542
+ endpoint: otelConfig.endpoint,
1543
+ serviceName: otelConfig.serviceName,
1544
+ }
1545
+ : isOpenTelemetryInitialized() ||
1546
+ process.env.OTEL_EXPORTER_OTLP_ENDPOINT
1547
+ ? {
1548
+ enabled: isOpenTelemetryInitialized(),
1549
+ endpoint: process.env.OTEL_EXPORTER_OTLP_ENDPOINT,
1550
+ serviceName: process.env.OTEL_SERVICE_NAME,
1551
+ }
1552
+ : undefined,
1553
+ exporters: [],
1554
+ };
1555
+ }
1556
+ /**
1557
+ * Get aggregated observability metrics (latency, tokens, cost, success rate)
1558
+ */
1559
+ getMetrics() {
1560
+ return this.metricsAggregator.getMetrics();
1561
+ }
1562
+ /**
1563
+ * Get all recorded spans
1564
+ */
1565
+ getSpans() {
1566
+ return this.metricsAggregator.getSpans();
1567
+ }
1568
+ /**
1569
+ * Get traces (spans grouped by traceId with parent-child hierarchy)
1570
+ */
1571
+ getTraces() {
1572
+ return this.metricsAggregator.getTraces();
1573
+ }
1574
+ /**
1575
+ * Reset all collected metrics and spans
1576
+ */
1577
+ resetMetrics() {
1578
+ this.metricsAggregator.reset();
1579
+ }
1580
+ /**
1581
+ * Record a span for metrics tracking
1582
+ */
1583
+ recordMetricsSpan(span) {
1584
+ this.metricsAggregator.recordSpan(span);
1585
+ }
1586
+ /**
1587
+ * Record a memory operation span to both instance and global metrics aggregators.
1588
+ * This ensures memory spans are visible via sdk.getSpans() and getMetricsAggregator().getSpans().
1589
+ */
1590
+ recordMemorySpan(operationName, attributes, durationMs, status, statusMessage) {
1591
+ const traceCtx = this._metricsTraceContext;
1592
+ const span = SpanSerializer.createSpan(SpanType.MEMORY, operationName, attributes, traceCtx?.parentSpanId, traceCtx?.traceId);
1593
+ span.durationMs = durationMs;
1594
+ const endedSpan = SpanSerializer.endSpan(span, status);
1595
+ if (statusMessage) {
1596
+ endedSpan.statusMessage = statusMessage;
1597
+ }
1598
+ this.metricsAggregator.recordSpan(endedSpan);
1599
+ try {
1600
+ getMetricsAggregator().recordSpan(endedSpan);
1601
+ }
1602
+ catch {
1603
+ /* ignore */
1604
+ }
1605
+ }
1579
1606
  /**
1580
1607
  * Public method to initialize Langfuse observability
1581
1608
  * This method can be called externally to ensure Langfuse is properly initialized
@@ -1618,6 +1645,16 @@ Current user's request: ${currentInput}`;
1618
1645
  logger.warn("[NeuroLink] MCP servers shutdown failed:", error);
1619
1646
  }
1620
1647
  }
1648
+ // Close conversation memory manager (release Redis connections, etc.)
1649
+ if (this.conversationMemory?.close) {
1650
+ try {
1651
+ await this.conversationMemory.close();
1652
+ logger.debug("[NeuroLink] Conversation memory shutdown completed");
1653
+ }
1654
+ catch (error) {
1655
+ logger.warn("[NeuroLink] Conversation memory shutdown failed:", error);
1656
+ }
1657
+ }
1621
1658
  logger.debug("[NeuroLink] Graceful shutdown completed");
1622
1659
  }
1623
1660
  catch (error) {
@@ -1625,6 +1662,233 @@ Current user's request: ${currentInput}`;
1625
1662
  throw error;
1626
1663
  }
1627
1664
  }
1665
+ /**
1666
+ * Initialize event listeners that feed span data to MetricsAggregator.
1667
+ * Listens to generation:end, stream:complete, and tool:end events.
1668
+ */
1669
+ initializeMetricsListeners() {
1670
+ this.emitter.on("generation:end", ((...args) => {
1671
+ const data = args[0];
1672
+ try {
1673
+ const result = data.result;
1674
+ const usage = result?.usage;
1675
+ const analytics = result?.analytics;
1676
+ const provider = data.provider ||
1677
+ result?.provider ||
1678
+ "unknown";
1679
+ const model = result?.model || "unknown";
1680
+ const responseTime = data.responseTime || 0;
1681
+ const traceCtx = this._metricsTraceContext;
1682
+ let span = SpanSerializer.createGenerationSpan({
1683
+ provider,
1684
+ model,
1685
+ name: `gen_ai.${provider}.chat`,
1686
+ traceId: traceCtx?.traceId,
1687
+ input: data.prompt,
1688
+ temperature: data.temperature,
1689
+ maxTokens: data.maxTokens,
1690
+ });
1691
+ // Make this the root span by using the pre-generated rootSpanId
1692
+ if (traceCtx) {
1693
+ span.spanId = traceCtx.parentSpanId;
1694
+ span.parentSpanId = undefined;
1695
+ }
1696
+ // Mark failed generations with ERROR status so metrics count them correctly
1697
+ const spanStatus = data.success === false || data.error
1698
+ ? SpanStatus.ERROR
1699
+ : SpanStatus.OK;
1700
+ span = SpanSerializer.endSpan(span, spanStatus, data.error ? String(data.error) : undefined);
1701
+ span.durationMs = responseTime;
1702
+ if (usage) {
1703
+ span = SpanSerializer.enrichWithTokenUsage(span, {
1704
+ promptTokens: usage.input || 0,
1705
+ completionTokens: usage.output || 0,
1706
+ totalTokens: usage.total || (usage.input || 0) + (usage.output || 0),
1707
+ });
1708
+ }
1709
+ if (analytics?.cost && analytics.cost > 0) {
1710
+ span = SpanSerializer.enrichWithCost(span, {
1711
+ totalCost: analytics.cost,
1712
+ });
1713
+ }
1714
+ else if (usage && model !== "unknown") {
1715
+ // Fallback: compute cost from token usage + built-in pricing
1716
+ const tokenTracker = this.metricsAggregator.getTokenTracker();
1717
+ const pricing = tokenTracker.getModelPricing(model);
1718
+ if (pricing) {
1719
+ const inputCost = ((usage.input || 0) / 1_000_000) * pricing.inputPricePerMillion;
1720
+ const outputCost = ((usage.output || 0) / 1_000_000) * pricing.outputPricePerMillion;
1721
+ const totalCost = inputCost + outputCost;
1722
+ if (totalCost > 0) {
1723
+ span = SpanSerializer.enrichWithCost(span, {
1724
+ inputCost,
1725
+ outputCost,
1726
+ totalCost,
1727
+ });
1728
+ }
1729
+ }
1730
+ }
1731
+ // Record output (truncated for safety)
1732
+ const content = result?.content || result?.text;
1733
+ if (content) {
1734
+ span = SpanSerializer.updateAttributes(span, {
1735
+ output: content.length > 5000
1736
+ ? content.substring(0, 5000) + "...[truncated]"
1737
+ : content,
1738
+ });
1739
+ }
1740
+ this.metricsAggregator.recordSpan(span);
1741
+ getMetricsAggregator().recordSpan(span);
1742
+ }
1743
+ catch {
1744
+ // Non-blocking
1745
+ }
1746
+ }));
1747
+ this.emitter.on("stream:complete", ((...args) => {
1748
+ const data = args[0];
1749
+ try {
1750
+ const metadata = data.metadata;
1751
+ const durationMs = metadata?.durationMs || 0;
1752
+ const chunkCount = metadata?.chunkCount || 0;
1753
+ const totalLength = metadata?.totalLength || 0;
1754
+ const provider = data.provider || "unknown";
1755
+ const model = data.model || "unknown";
1756
+ const traceCtx = this._metricsTraceContext;
1757
+ let span = SpanSerializer.createGenerationSpan({
1758
+ provider,
1759
+ model,
1760
+ name: `gen_ai.${provider}.stream`,
1761
+ traceId: traceCtx?.traceId,
1762
+ });
1763
+ // Make this the root span by using the pre-generated rootSpanId
1764
+ if (traceCtx) {
1765
+ span.spanId = traceCtx.parentSpanId;
1766
+ span.parentSpanId = undefined;
1767
+ }
1768
+ span = SpanSerializer.endSpan(span, SpanStatus.OK);
1769
+ span.durationMs = durationMs;
1770
+ span.attributes["stream.chunk_count"] = chunkCount;
1771
+ span.attributes["stream.content_length"] = totalLength;
1772
+ // Record stream input prompt
1773
+ if (data.prompt) {
1774
+ const promptStr = String(data.prompt);
1775
+ span = SpanSerializer.updateAttributes(span, {
1776
+ input: promptStr.length > 5000
1777
+ ? promptStr.substring(0, 5000) + "...[truncated]"
1778
+ : promptStr,
1779
+ });
1780
+ }
1781
+ // Record streamed output (truncated for safety)
1782
+ const streamContent = data.content;
1783
+ if (streamContent) {
1784
+ span = SpanSerializer.updateAttributes(span, {
1785
+ output: streamContent.length > 5000
1786
+ ? streamContent.substring(0, 5000) + "...[truncated]"
1787
+ : streamContent,
1788
+ });
1789
+ }
1790
+ // Enrich stream span with token usage if available
1791
+ const usage = metadata?.usage;
1792
+ if (usage) {
1793
+ span = SpanSerializer.enrichWithTokenUsage(span, {
1794
+ promptTokens: usage.input || 0,
1795
+ completionTokens: usage.output || 0,
1796
+ totalTokens: usage.total || (usage.input || 0) + (usage.output || 0),
1797
+ });
1798
+ // Compute cost from token usage
1799
+ if (model !== "unknown") {
1800
+ const tokenTracker = this.metricsAggregator.getTokenTracker();
1801
+ const pricing = tokenTracker.getModelPricing(model);
1802
+ if (pricing) {
1803
+ const inputCost = ((usage.input || 0) / 1_000_000) * pricing.inputPricePerMillion;
1804
+ const outputCost = ((usage.output || 0) / 1_000_000) *
1805
+ pricing.outputPricePerMillion;
1806
+ const totalCost = inputCost + outputCost;
1807
+ if (totalCost > 0) {
1808
+ span = SpanSerializer.enrichWithCost(span, {
1809
+ inputCost,
1810
+ outputCost,
1811
+ totalCost,
1812
+ });
1813
+ }
1814
+ }
1815
+ }
1816
+ }
1817
+ this.metricsAggregator.recordSpan(span);
1818
+ getMetricsAggregator().recordSpan(span);
1819
+ }
1820
+ catch {
1821
+ // Non-blocking
1822
+ }
1823
+ }));
1824
+ this.emitter.on("tool:end", ((...args) => {
1825
+ const data = args[0];
1826
+ try {
1827
+ // Handle both event formats: {toolName} (from emitToolEnd) and {tool} (from executeToolInternal)
1828
+ const toolName = data.toolName || data.tool || "unknown";
1829
+ const responseTime = data.responseTime || data.duration || 0;
1830
+ // success is explicit in one format; infer from error presence in the other
1831
+ const success = data.success !== undefined ? data.success : !data.error;
1832
+ const traceCtx = this._metricsTraceContext;
1833
+ let span = SpanSerializer.createSpan(SpanType.TOOL_CALL, `tool.${toolName}`, {
1834
+ "tool.name": toolName,
1835
+ "tool.success": success,
1836
+ }, traceCtx?.parentSpanId, traceCtx?.traceId);
1837
+ span = SpanSerializer.endSpan(span, success ? SpanStatus.OK : SpanStatus.ERROR);
1838
+ span.durationMs = responseTime;
1839
+ if (!success && data.error) {
1840
+ span.statusMessage =
1841
+ data.error.message || String(data.error);
1842
+ }
1843
+ if (data.result) {
1844
+ try {
1845
+ span.attributes["tool.result"] = JSON.stringify(data.result).substring(0, 500);
1846
+ }
1847
+ catch {
1848
+ // Non-blocking
1849
+ }
1850
+ }
1851
+ this.metricsAggregator.recordSpan(span);
1852
+ getMetricsAggregator().recordSpan(span);
1853
+ }
1854
+ catch {
1855
+ // Non-blocking
1856
+ }
1857
+ }));
1858
+ this.emitter.on("stream:error", ((...args) => {
1859
+ const data = args[0];
1860
+ try {
1861
+ const metadata = data.metadata;
1862
+ const durationMs = metadata?.durationMs || 0;
1863
+ const chunkCount = metadata?.chunkCount || 0;
1864
+ const errorName = metadata?.errorName || "UnknownError";
1865
+ const errorMessage = data.content || "Stream error";
1866
+ const provider = data.provider || "unknown";
1867
+ const model = data.model || "unknown";
1868
+ const traceCtx = this._metricsTraceContext;
1869
+ let span = SpanSerializer.createGenerationSpan({
1870
+ provider,
1871
+ model,
1872
+ name: `gen_ai.${provider}.stream.error`,
1873
+ traceId: traceCtx?.traceId,
1874
+ });
1875
+ // Make this the root span
1876
+ if (traceCtx) {
1877
+ span.spanId = traceCtx.parentSpanId;
1878
+ span.parentSpanId = undefined;
1879
+ }
1880
+ span = SpanSerializer.endSpan(span, SpanStatus.ERROR);
1881
+ span.durationMs = durationMs;
1882
+ span.statusMessage = `${errorName}: ${errorMessage}`;
1883
+ span.attributes["stream.chunk_count"] = chunkCount;
1884
+ this.metricsAggregator.recordSpan(span);
1885
+ getMetricsAggregator().recordSpan(span);
1886
+ }
1887
+ catch {
1888
+ // Non-blocking
1889
+ }
1890
+ }));
1891
+ }
1628
1892
  /**
1629
1893
  * Generate AI response with comprehensive feature support.
1630
1894
  *
@@ -1727,320 +1991,350 @@ Current user's request: ${currentInput}`;
1727
1991
  */
1728
1992
  async generate(optionsOrPrompt) {
1729
1993
  return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, async (generateSpan) => {
1730
- try {
1731
- const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
1732
- // Convert string prompt to full options
1733
- const options = typeof optionsOrPrompt === "string"
1734
- ? { input: { text: optionsOrPrompt } }
1735
- : optionsOrPrompt;
1736
- // Set span attributes for observability
1737
- generateSpan.setAttribute("neurolink.provider", options.provider || "default");
1738
- generateSpan.setAttribute("neurolink.model", options.model || "default");
1739
- generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
1740
- ? optionsOrPrompt.length
1741
- : options.input?.text?.length || 0);
1742
- generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
1743
- // Validate prompt
1744
- if (!options.input?.text || typeof options.input.text !== "string") {
1745
- throw new Error("Input text is required and must be a non-empty string");
1746
- }
1747
- // Check budget limit before making API call
1748
- if (options.maxBudgetUsd !== undefined &&
1749
- options.maxBudgetUsd > 0 &&
1750
- this._sessionCostUsd >= options.maxBudgetUsd) {
1751
- throw new NeuroLinkError({
1752
- code: "SESSION_BUDGET_EXCEEDED",
1753
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
1754
- category: ErrorCategory.VALIDATION,
1755
- severity: ErrorSeverity.HIGH,
1756
- retriable: false,
1757
- context: {
1758
- spent: this._sessionCostUsd,
1759
- limit: options.maxBudgetUsd,
1760
- },
1761
- });
1762
- }
1763
- // Check if workflow is requested
1764
- if (options.workflow || options.workflowConfig) {
1765
- return await this.generateWithWorkflow(options);
1766
- }
1767
- // Check if PPT output mode is requested
1768
- if (options.output?.mode === "ppt") {
1769
- const pptResult = await this.generateWithPPT(options);
1770
- generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
1771
- if (pptResult.analytics) {
1772
- generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
1773
- generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
1774
- generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
1994
+ // Set metrics trace context for parent-child span linking.
1995
+ // The generation span will be the root (no parentSpanId).
1996
+ // Tool spans will be children of the root span via rootSpanId.
1997
+ const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
1998
+ const metricsRootSpanId = crypto
1999
+ .randomUUID()
2000
+ .replace(/-/g, "")
2001
+ .substring(0, 16);
2002
+ // Scope trace context to this request via AsyncLocalStorage
2003
+ // so concurrent generate/stream calls don't race.
2004
+ return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsRootSpanId }, async () => {
2005
+ try {
2006
+ const originalPrompt = this._extractOriginalPrompt(optionsOrPrompt);
2007
+ // Convert string prompt to full options
2008
+ const options = typeof optionsOrPrompt === "string"
2009
+ ? { input: { text: optionsOrPrompt } }
2010
+ : optionsOrPrompt;
2011
+ // Set span attributes for observability
2012
+ generateSpan.setAttribute("neurolink.provider", options.provider || "default");
2013
+ generateSpan.setAttribute("neurolink.model", options.model || "default");
2014
+ generateSpan.setAttribute("neurolink.input_length", typeof optionsOrPrompt === "string"
2015
+ ? optionsOrPrompt.length
2016
+ : options.input?.text?.length || 0);
2017
+ generateSpan.setAttribute("neurolink.has_tools", !!(options.tools && Object.keys(options.tools).length > 0));
2018
+ // Validate prompt
2019
+ if (!options.input?.text ||
2020
+ typeof options.input.text !== "string") {
2021
+ throw new Error("Input text is required and must be a non-empty string");
1775
2022
  }
1776
- generateSpan.setStatus({ code: SpanStatusCode.OK });
1777
- return pptResult;
1778
- }
1779
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
1780
- return await this.setLangfuseContextFromOptions(options, async () => {
1781
- if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1782
- options.context?.userId) {
1783
- try {
1784
- const mem0 = await this.ensureMem0Ready();
1785
- if (!mem0) {
1786
- logger.debug("Mem0 not available, continuing without memory retrieval");
1787
- }
1788
- else {
1789
- const memories = await mem0.search(options.input.text, {
1790
- user_id: options.context.userId,
1791
- limit: 5,
1792
- });
1793
- if (memories && memories.length > 0) {
1794
- // Enhance the input with memory context
1795
- const memoryContext = this.extractMemoryContext(memories);
1796
- options.input.text = this.formatMemoryContext(memoryContext, options.input.text);
1797
- }
1798
- }
1799
- }
1800
- catch (error) {
1801
- logger.warn("Mem0 memory retrieval failed:", error);
1802
- }
2023
+ // Check budget limit before making API call
2024
+ if (options.maxBudgetUsd !== undefined &&
2025
+ options.maxBudgetUsd > 0 &&
2026
+ this._sessionCostUsd >= options.maxBudgetUsd) {
2027
+ throw new NeuroLinkError({
2028
+ code: "SESSION_BUDGET_EXCEEDED",
2029
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
2030
+ category: ErrorCategory.VALIDATION,
2031
+ severity: ErrorSeverity.HIGH,
2032
+ retriable: false,
2033
+ context: {
2034
+ spent: this._sessionCostUsd,
2035
+ limit: options.maxBudgetUsd,
2036
+ },
2037
+ });
1803
2038
  }
1804
- // Memory retrieval
1805
- if (this.conversationMemoryConfig?.conversationMemory?.memory
1806
- ?.enabled &&
1807
- options.context?.userId) {
1808
- try {
1809
- options.input.text = await this.retrieveMemory(options.input.text, options.context.userId);
1810
- logger.debug("Memory retrieval successful");
1811
- }
1812
- catch (error) {
1813
- logger.warn("Memory retrieval failed:", error);
2039
+ // Check if workflow is requested
2040
+ if (options.workflow || options.workflowConfig) {
2041
+ return await this.generateWithWorkflow(options);
2042
+ }
2043
+ // Check if PPT output mode is requested
2044
+ if (options.output?.mode === "ppt") {
2045
+ const pptResult = await this.generateWithPPT(options);
2046
+ generateSpan.setAttribute("neurolink.output_length", pptResult.content?.length ?? 0);
2047
+ if (pptResult.analytics) {
2048
+ generateSpan.setAttribute("neurolink.tokens.input", pptResult.analytics.tokenUsage?.input ?? 0);
2049
+ generateSpan.setAttribute("neurolink.tokens.output", pptResult.analytics.tokenUsage?.output ?? 0);
2050
+ generateSpan.setAttribute("neurolink.cost", pptResult.analytics.cost ?? 0);
1814
2051
  }
2052
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2053
+ return pptResult;
1815
2054
  }
1816
- const startTime = Date.now();
1817
- // Apply orchestration if enabled and no specific provider/model requested
1818
- if (this.enableOrchestration &&
1819
- !options.provider &&
1820
- !options.model) {
1821
- try {
1822
- const orchestratedOptions = await this.applyOrchestration(options);
1823
- logger.debug("Orchestration applied", {
1824
- originalProvider: options.provider || "auto",
1825
- orchestratedProvider: orchestratedOptions.provider,
1826
- orchestratedModel: orchestratedOptions.model,
1827
- prompt: options.input.text.substring(0, 100),
1828
- });
1829
- // Use orchestrated options
1830
- Object.assign(options, orchestratedOptions);
2055
+ // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
2056
+ return await this.setLangfuseContextFromOptions(options, async () => {
2057
+ if (this.conversationMemoryConfig?.conversationMemory
2058
+ ?.mem0Enabled &&
2059
+ options.context?.userId) {
2060
+ try {
2061
+ const mem0 = await this.ensureMem0Ready();
2062
+ if (!mem0) {
2063
+ logger.debug("Mem0 not available, continuing without memory retrieval");
2064
+ }
2065
+ else {
2066
+ const memories = await mem0.search(options.input.text, {
2067
+ user_id: options.context.userId,
2068
+ limit: 5,
2069
+ });
2070
+ if (memories && memories.length > 0) {
2071
+ // Enhance the input with memory context
2072
+ const memoryContext = this.extractMemoryContext(memories);
2073
+ options.input.text = this.formatMemoryContext(memoryContext, options.input.text);
2074
+ }
2075
+ }
2076
+ }
2077
+ catch (error) {
2078
+ logger.warn("Mem0 memory retrieval failed:", error);
2079
+ }
1831
2080
  }
1832
- catch (error) {
1833
- logger.warn("Orchestration failed, continuing with original options", {
1834
- error: error instanceof Error ? error.message : String(error),
1835
- originalProvider: options.provider || "auto",
1836
- });
1837
- // Continue with original options if orchestration fails
2081
+ const startTime = Date.now();
2082
+ // Apply orchestration if enabled and no specific provider/model requested
2083
+ if (this.enableOrchestration &&
2084
+ !options.provider &&
2085
+ !options.model) {
2086
+ try {
2087
+ const orchestratedOptions = await this.applyOrchestration(options);
2088
+ logger.debug("Orchestration applied", {
2089
+ originalProvider: options.provider || "auto",
2090
+ orchestratedProvider: orchestratedOptions.provider,
2091
+ orchestratedModel: orchestratedOptions.model,
2092
+ prompt: options.input.text.substring(0, 100),
2093
+ });
2094
+ // Use orchestrated options
2095
+ Object.assign(options, orchestratedOptions);
2096
+ }
2097
+ catch (error) {
2098
+ logger.warn("Orchestration failed, continuing with original options", {
2099
+ error: error instanceof Error
2100
+ ? error.message
2101
+ : String(error),
2102
+ originalProvider: options.provider || "auto",
2103
+ });
2104
+ // Continue with original options if orchestration fails
2105
+ }
1838
2106
  }
1839
- }
1840
- // Emit generation start event (NeuroLink format - keep existing)
1841
- this.emitter.emit("generation:start", {
1842
- provider: options.provider || "auto",
1843
- timestamp: startTime,
1844
- });
1845
- // ADD: Bedrock-compatible response:start event
1846
- this.emitter.emit("response:start");
1847
- // ADD: Bedrock-compatible message event
1848
- this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
1849
- // Process factory configuration
1850
- const factoryResult = processFactoryOptions(options);
1851
- // Validate factory configuration if present
1852
- if (factoryResult.hasFactoryConfig && options.factoryConfig) {
1853
- const validation = validateFactoryConfig(options.factoryConfig);
1854
- if (!validation.isValid) {
1855
- logger.warn("Invalid factory configuration detected", {
1856
- errors: validation.errors,
1857
- });
1858
- // Continue with warning rather than throwing - graceful degradation
2107
+ // Emit generation start event (NeuroLink format - keep existing)
2108
+ this.emitter.emit("generation:start", {
2109
+ provider: options.provider || "auto",
2110
+ timestamp: startTime,
2111
+ });
2112
+ // ADD: Bedrock-compatible response:start event
2113
+ this.emitter.emit("response:start");
2114
+ // ADD: Bedrock-compatible message event
2115
+ this.emitter.emit("message", `Starting ${options.provider || "auto"} text generation...`);
2116
+ // Process factory configuration
2117
+ const factoryResult = processFactoryOptions(options);
2118
+ // Validate factory configuration if present
2119
+ if (factoryResult.hasFactoryConfig && options.factoryConfig) {
2120
+ const validation = validateFactoryConfig(options.factoryConfig);
2121
+ if (!validation.isValid) {
2122
+ logger.warn("Invalid factory configuration detected", {
2123
+ errors: validation.errors,
2124
+ });
2125
+ // Continue with warning rather than throwing - graceful degradation
2126
+ }
1859
2127
  }
1860
- }
1861
- // RAG Integration: If rag config is provided, prepare the RAG search tool
1862
- if (options.rag?.files?.length) {
1863
- try {
1864
- const { prepareRAGTool } = await import("./rag/ragIntegration.js");
1865
- const ragResult = await prepareRAGTool(options.rag, options.provider);
1866
- // Inject the RAG tool into the tools record
1867
- if (!options.tools) {
1868
- options.tools = {};
2128
+ // RAG Integration: If rag config is provided, prepare the RAG search tool
2129
+ if (options.rag?.files?.length) {
2130
+ try {
2131
+ const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2132
+ const ragResult = await prepareRAGTool(options.rag, options.provider);
2133
+ // Inject the RAG tool into the tools record
2134
+ if (!options.tools) {
2135
+ options.tools = {};
2136
+ }
2137
+ options.tools[ragResult.toolName] = ragResult.tool;
2138
+ // Inject RAG-aware system prompt so the AI uses the RAG tool first
2139
+ const ragSystemInstruction = [
2140
+ `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2141
+ `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2142
+ `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2143
+ `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2144
+ `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2145
+ ].join(" ");
2146
+ options.systemPrompt =
2147
+ (options.systemPrompt || "") + ragSystemInstruction;
2148
+ logger.info("[RAG] Tool injected into generate()", {
2149
+ toolName: ragResult.toolName,
2150
+ filesLoaded: ragResult.filesLoaded,
2151
+ chunksIndexed: ragResult.chunksIndexed,
2152
+ });
2153
+ }
2154
+ catch (error) {
2155
+ logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2156
+ error: error instanceof Error
2157
+ ? error.message
2158
+ : String(error),
2159
+ });
1869
2160
  }
1870
- options.tools[ragResult.toolName] =
1871
- ragResult.tool;
1872
- // Inject RAG-aware system prompt so the AI uses the RAG tool first
1873
- const ragSystemInstruction = [
1874
- `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
1875
- `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
1876
- `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
1877
- `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
1878
- `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
1879
- ].join(" ");
1880
- options.systemPrompt =
1881
- (options.systemPrompt || "") + ragSystemInstruction;
1882
- logger.info("[RAG] Tool injected into generate()", {
1883
- toolName: ragResult.toolName,
1884
- filesLoaded: ragResult.filesLoaded,
1885
- chunksIndexed: ragResult.chunksIndexed,
1886
- });
1887
2161
  }
1888
- catch (error) {
1889
- logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
1890
- error: error instanceof Error ? error.message : String(error),
2162
+ // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
2163
+ const baseOptions = {
2164
+ prompt: options.input.text,
2165
+ provider: options.provider,
2166
+ model: options.model,
2167
+ temperature: options.temperature,
2168
+ maxTokens: options.maxTokens,
2169
+ systemPrompt: options.systemPrompt,
2170
+ schema: options.schema,
2171
+ output: options.output,
2172
+ tools: options.tools, // Includes RAG tools if rag config was provided
2173
+ disableTools: options.disableTools,
2174
+ toolFilter: options.toolFilter,
2175
+ excludeTools: options.excludeTools,
2176
+ maxSteps: options.maxSteps,
2177
+ toolChoice: options.toolChoice,
2178
+ prepareStep: options.prepareStep,
2179
+ enableAnalytics: options.enableAnalytics,
2180
+ enableEvaluation: options.enableEvaluation,
2181
+ context: options.context,
2182
+ evaluationDomain: options.evaluationDomain,
2183
+ toolUsageContext: options.toolUsageContext,
2184
+ input: options.input, // This includes text, images, and content arrays
2185
+ region: options.region,
2186
+ tts: options.tts,
2187
+ fileRegistry: this.fileRegistry,
2188
+ abortSignal: options.abortSignal,
2189
+ skipToolPromptInjection: options.skipToolPromptInjection,
2190
+ };
2191
+ // Auto-map top-level sessionId/userId to context for convenience
2192
+ // Tests and users may pass sessionId/userId as top-level options
2193
+ const extraContext = options;
2194
+ if (extraContext.sessionId || extraContext.userId) {
2195
+ baseOptions.context = {
2196
+ ...baseOptions.context,
2197
+ ...(extraContext.sessionId &&
2198
+ !baseOptions.context?.sessionId
2199
+ ? { sessionId: extraContext.sessionId }
2200
+ : {}),
2201
+ ...(extraContext.userId && !baseOptions.context?.userId
2202
+ ? { userId: extraContext.userId }
2203
+ : {}),
2204
+ };
2205
+ }
2206
+ // Apply factory enhancement using centralized utilities
2207
+ const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
2208
+ // Pass conversation memory config if available
2209
+ if (this.conversationMemory) {
2210
+ textOptions.conversationMemoryConfig =
2211
+ this.conversationMemory.config;
2212
+ // Include original prompt for context summarization
2213
+ textOptions.originalPrompt = originalPrompt;
2214
+ }
2215
+ // Detect and execute domain-specific tools
2216
+ const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
2217
+ // Update prompt with tool results if available
2218
+ if (enhancedPrompt !== textOptions.prompt) {
2219
+ textOptions.prompt = enhancedPrompt;
2220
+ logger.debug("Enhanced prompt with tool results", {
2221
+ originalLength: options.input.text.length,
2222
+ enhancedLength: enhancedPrompt.length,
2223
+ toolResults: toolResults.length,
1891
2224
  });
1892
2225
  }
1893
- }
1894
- // 🔧 CRITICAL FIX: Convert to TextGenerationOptions while preserving the input object for multimodal support
1895
- const baseOptions = {
1896
- prompt: options.input.text,
1897
- provider: options.provider,
1898
- model: options.model,
1899
- temperature: options.temperature,
1900
- maxTokens: options.maxTokens,
1901
- systemPrompt: options.systemPrompt,
1902
- schema: options.schema,
1903
- output: options.output,
1904
- tools: options.tools, // Includes RAG tools if rag config was provided
1905
- disableTools: options.disableTools,
1906
- toolFilter: options.toolFilter,
1907
- excludeTools: options.excludeTools,
1908
- maxSteps: options.maxSteps,
1909
- toolChoice: options.toolChoice,
1910
- prepareStep: options.prepareStep,
1911
- enableAnalytics: options.enableAnalytics,
1912
- enableEvaluation: options.enableEvaluation,
1913
- context: options.context,
1914
- evaluationDomain: options.evaluationDomain,
1915
- toolUsageContext: options.toolUsageContext,
1916
- input: options.input, // This includes text, images, and content arrays
1917
- region: options.region,
1918
- tts: options.tts,
1919
- fileRegistry: this.fileRegistry,
1920
- abortSignal: options.abortSignal,
1921
- skipToolPromptInjection: options.skipToolPromptInjection,
1922
- };
1923
- // Auto-map top-level sessionId/userId to context for convenience
1924
- // Tests and users may pass sessionId/userId as top-level options
1925
- const extraContext = options;
1926
- if (extraContext.sessionId || extraContext.userId) {
1927
- baseOptions.context = {
1928
- ...baseOptions.context,
1929
- ...(extraContext.sessionId && !baseOptions.context?.sessionId
1930
- ? { sessionId: extraContext.sessionId }
1931
- : {}),
1932
- ...(extraContext.userId && !baseOptions.context?.userId
1933
- ? { userId: extraContext.userId }
1934
- : {}),
2226
+ // Use redesigned generation logic
2227
+ const textResult = await this.generateTextInternal(textOptions);
2228
+ // Emit generation completion event (NeuroLink format - enhanced with content)
2229
+ this.emitter.emit("generation:end", {
2230
+ provider: textResult.provider,
2231
+ responseTime: Date.now() - startTime,
2232
+ toolsUsed: textResult.toolsUsed,
2233
+ timestamp: Date.now(),
2234
+ result: textResult, // Enhanced: include full result
2235
+ prompt: options.input?.text ||
2236
+ options.prompt,
2237
+ temperature: textOptions.temperature,
2238
+ maxTokens: textOptions.maxTokens,
2239
+ });
2240
+ // ADD: Bedrock-compatible response:end event with content
2241
+ this.emitter.emit("response:end", textResult.content || "");
2242
+ // ADD: Bedrock-compatible message event
2243
+ this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
2244
+ // Convert back to GenerateResult
2245
+ const generateResult = {
2246
+ content: textResult.content,
2247
+ finishReason: textResult.finishReason,
2248
+ provider: textResult.provider,
2249
+ model: textResult.model,
2250
+ usage: textResult.usage
2251
+ ? {
2252
+ input: textResult.usage.input || 0,
2253
+ output: textResult.usage.output || 0,
2254
+ total: textResult.usage.total || 0,
2255
+ }
2256
+ : undefined,
2257
+ responseTime: textResult.responseTime,
2258
+ toolsUsed: textResult.toolsUsed,
2259
+ toolExecutions: transformToolExecutions(textResult.toolExecutions),
2260
+ enhancedWithTools: textResult.enhancedWithTools,
2261
+ availableTools: transformAvailableTools(textResult.availableTools),
2262
+ analytics: textResult.analytics,
2263
+ // CRITICAL FIX: Include imageOutput for image generation models
2264
+ imageOutput: textResult.imageOutput,
2265
+ evaluation: textResult.evaluation
2266
+ ? {
2267
+ ...textResult.evaluation,
2268
+ isOffTopic: textResult.evaluation
2269
+ .isOffTopic ?? false,
2270
+ alertSeverity: textResult.evaluation
2271
+ .alertSeverity ?? "none",
2272
+ reasoning: textResult.evaluation
2273
+ .reasoning ?? "No evaluation provided",
2274
+ evaluationModel: textResult.evaluation
2275
+ .evaluationModel ?? "unknown",
2276
+ evaluationTime: textResult.evaluation
2277
+ .evaluationTime ?? Date.now(),
2278
+ // Include evaluationDomain from original options
2279
+ evaluationDomain: textResult.evaluation
2280
+ .evaluationDomain ??
2281
+ textOptions.evaluationDomain ??
2282
+ factoryResult.domainType,
2283
+ }
2284
+ : undefined,
2285
+ audio: textResult.audio,
2286
+ video: textResult.video,
2287
+ ppt: textResult.ppt,
1935
2288
  };
1936
- }
1937
- // Apply factory enhancement using centralized utilities
1938
- const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
1939
- // Pass conversation memory config if available
1940
- if (this.conversationMemory) {
1941
- textOptions.conversationMemoryConfig =
1942
- this.conversationMemory.config;
1943
- // Include original prompt for context summarization
1944
- textOptions.originalPrompt = originalPrompt;
1945
- }
1946
- // Detect and execute domain-specific tools
1947
- const { toolResults, enhancedPrompt } = await this.detectAndExecuteTools(textOptions.prompt || options.input.text, factoryResult.domainType);
1948
- // Update prompt with tool results if available
1949
- if (enhancedPrompt !== textOptions.prompt) {
1950
- textOptions.prompt = enhancedPrompt;
1951
- logger.debug("Enhanced prompt with tool results", {
1952
- originalLength: options.input.text.length,
1953
- enhancedLength: enhancedPrompt.length,
1954
- toolResults: toolResults.length,
2289
+ // Accumulate session cost for budget tracking
2290
+ if (generateResult.analytics?.cost &&
2291
+ generateResult.analytics.cost > 0) {
2292
+ this._sessionCostUsd += generateResult.analytics.cost;
2293
+ }
2294
+ this.scheduleGenerateMem0Storage(options, originalPrompt, generateResult);
2295
+ // Set completion span attributes
2296
+ generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2297
+ generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2298
+ generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2299
+ generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2300
+ generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2301
+ generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2302
+ generateSpan.setStatus({ code: SpanStatusCode.OK });
2303
+ return generateResult;
2304
+ });
2305
+ }
2306
+ catch (error) {
2307
+ generateSpan.setStatus({
2308
+ code: SpanStatusCode.ERROR,
2309
+ message: error instanceof Error ? error.message : String(error),
2310
+ });
2311
+ // Emit generation:end on error so metrics listeners still record the failure.
2312
+ // Note: variables declared inside try blocks are not accessible in error
2313
+ // handlers, so we extract what we can from the original input.
2314
+ const errProvider = typeof optionsOrPrompt === "object"
2315
+ ? optionsOrPrompt.provider || "unknown"
2316
+ : "unknown";
2317
+ const errModel = typeof optionsOrPrompt === "object"
2318
+ ? optionsOrPrompt.model || "unknown"
2319
+ : "unknown";
2320
+ try {
2321
+ this.emitter.emit("generation:end", {
2322
+ provider: errProvider,
2323
+ model: errModel,
2324
+ responseTime: 0,
2325
+ error: error instanceof Error ? error.message : String(error),
2326
+ success: false,
1955
2327
  });
1956
2328
  }
1957
- // Use redesigned generation logic
1958
- const textResult = await this.generateTextInternal(textOptions);
1959
- // Emit generation completion event (NeuroLink format - enhanced with content)
1960
- this.emitter.emit("generation:end", {
1961
- provider: textResult.provider,
1962
- responseTime: Date.now() - startTime,
1963
- toolsUsed: textResult.toolsUsed,
1964
- timestamp: Date.now(),
1965
- result: textResult, // Enhanced: include full result
1966
- });
1967
- // ADD: Bedrock-compatible response:end event with content
1968
- this.emitter.emit("response:end", textResult.content || "");
1969
- // ADD: Bedrock-compatible message event
1970
- this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
1971
- // Convert back to GenerateResult
1972
- const generateResult = {
1973
- content: textResult.content,
1974
- finishReason: textResult.finishReason,
1975
- provider: textResult.provider,
1976
- model: textResult.model,
1977
- usage: textResult.usage
1978
- ? {
1979
- input: textResult.usage.input || 0,
1980
- output: textResult.usage.output || 0,
1981
- total: textResult.usage.total || 0,
1982
- }
1983
- : undefined,
1984
- responseTime: textResult.responseTime,
1985
- toolsUsed: textResult.toolsUsed,
1986
- toolExecutions: transformToolExecutions(textResult.toolExecutions),
1987
- enhancedWithTools: textResult.enhancedWithTools,
1988
- availableTools: transformAvailableTools(textResult.availableTools),
1989
- analytics: textResult.analytics,
1990
- // CRITICAL FIX: Include imageOutput for image generation models
1991
- imageOutput: textResult.imageOutput,
1992
- evaluation: textResult.evaluation
1993
- ? {
1994
- ...textResult.evaluation,
1995
- isOffTopic: textResult.evaluation
1996
- .isOffTopic ?? false,
1997
- alertSeverity: textResult.evaluation
1998
- .alertSeverity ??
1999
- "none",
2000
- reasoning: textResult.evaluation
2001
- .reasoning ?? "No evaluation provided",
2002
- evaluationModel: textResult.evaluation
2003
- .evaluationModel ?? "unknown",
2004
- evaluationTime: textResult.evaluation
2005
- .evaluationTime ?? Date.now(),
2006
- // Include evaluationDomain from original options
2007
- evaluationDomain: textResult.evaluation
2008
- .evaluationDomain ??
2009
- textOptions.evaluationDomain ??
2010
- factoryResult.domainType,
2011
- }
2012
- : undefined,
2013
- audio: textResult.audio,
2014
- video: textResult.video,
2015
- ppt: textResult.ppt,
2016
- };
2017
- // Accumulate session cost for budget tracking
2018
- if (generateResult.analytics?.cost &&
2019
- generateResult.analytics.cost > 0) {
2020
- this._sessionCostUsd += generateResult.analytics.cost;
2329
+ catch (emitError) {
2330
+ void emitError; // non-blocking — error event emission is best-effort
2021
2331
  }
2022
- this.scheduleGenerateMem0Storage(options, originalPrompt, generateResult);
2023
- // Set completion span attributes
2024
- generateSpan.setAttribute("neurolink.output_length", generateResult.content?.length || 0);
2025
- generateSpan.setAttribute("neurolink.tokens.input", generateResult.usage?.input || 0);
2026
- generateSpan.setAttribute("neurolink.tokens.output", generateResult.usage?.output || 0);
2027
- generateSpan.setAttribute("neurolink.finish_reason", generateResult.finishReason || "unknown");
2028
- generateSpan.setAttribute("neurolink.result_provider", generateResult.provider || "unknown");
2029
- generateSpan.setAttribute("neurolink.result_model", generateResult.model || "unknown");
2030
- generateSpan.setStatus({ code: SpanStatusCode.OK });
2031
- return generateResult;
2032
- });
2033
- }
2034
- catch (error) {
2035
- generateSpan.setStatus({
2036
- code: SpanStatusCode.ERROR,
2037
- message: error instanceof Error ? error.message : String(error),
2038
- });
2039
- throw error;
2040
- }
2041
- finally {
2042
- generateSpan.end();
2043
- }
2332
+ throw error;
2333
+ }
2334
+ finally {
2335
+ generateSpan.end();
2336
+ }
2337
+ }); // end metricsTraceContextStorage.run
2044
2338
  });
2045
2339
  }
2046
2340
  /**
@@ -2409,7 +2703,16 @@ Current user's request: ${currentInput}`;
2409
2703
  cacheSavingsPercent: mcpResult.usage.cacheSavingsPercent,
2410
2704
  }),
2411
2705
  });
2412
- await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
2706
+ {
2707
+ const memStoreStart = Date.now();
2708
+ try {
2709
+ await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime), requestId);
2710
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.OK);
2711
+ }
2712
+ catch (memErr) {
2713
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "mcp" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
2714
+ }
2715
+ }
2413
2716
  this.emitter.emit("response:end", mcpResult.content || "");
2414
2717
  internalSpan.setAttribute("neurolink.path", "mcp");
2415
2718
  internalSpan.setAttribute("neurolink.tokens.input", mcpResult.usage?.input || 0);
@@ -2448,7 +2751,16 @@ Current user's request: ${currentInput}`;
2448
2751
  cacheSavingsPercent: directResult.usage.cacheSavingsPercent,
2449
2752
  }),
2450
2753
  });
2451
- await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
2754
+ {
2755
+ const memStoreStart = Date.now();
2756
+ try {
2757
+ await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime), requestId);
2758
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.OK);
2759
+ }
2760
+ catch (memErr) {
2761
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "direct" }, Date.now() - memStoreStart, SpanStatus.ERROR, memErr instanceof Error ? memErr.message : String(memErr));
2762
+ }
2763
+ }
2452
2764
  this.emitter.emit("response:end", directResult.content || "");
2453
2765
  this.emitter.emit("message", `Text generation completed successfully`);
2454
2766
  internalSpan.setAttribute("neurolink.path", "direct");
@@ -2975,6 +3287,8 @@ Current user's request: ${currentInput}`;
2975
3287
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
2976
3288
  this, // Pass SDK instance
2977
3289
  options.region);
3290
+ // Propagate trace context for parent-child span hierarchy
3291
+ provider._traceContext = this._metricsTraceContext;
2978
3292
  // ADD: Emit connection events for all providers (Bedrock-compatible)
2979
3293
  this.emitter.emit("connected");
2980
3294
  this.emitter.emit("message", `${providerName} provider initialized successfully`);
@@ -3039,6 +3353,7 @@ Current user's request: ${currentInput}`;
3039
3353
  audio: result.audio,
3040
3354
  video: result.video,
3041
3355
  ppt: result.ppt,
3356
+ imageOutput: result.imageOutput,
3042
3357
  // Include analytics and evaluation from BaseProvider
3043
3358
  analytics: result.analytics,
3044
3359
  evaluation: result.evaluation,
@@ -3189,6 +3504,8 @@ Current user's request: ${currentInput}`;
3189
3504
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
3190
3505
  this, // Pass SDK instance
3191
3506
  options.region);
3507
+ // Propagate trace context for parent-child span hierarchy
3508
+ provider._traceContext = this._metricsTraceContext;
3192
3509
  // ADD: Emit connection events for successful provider creation (Bedrock-compatible)
3193
3510
  this.emitter.emit("connected");
3194
3511
  this.emitter.emit("message", `${providerName} provider initialized successfully`);
@@ -3422,206 +3739,276 @@ Current user's request: ${currentInput}`;
3422
3739
  * @throws {Error} When conversation memory operations fail (if enabled)
3423
3740
  */
3424
3741
  async stream(options) {
3425
- // Manual span lifecycle: the span must stay open until the stream is fully consumed,
3426
- // NOT when the StreamResult object is returned. withSpan would end the span too early
3427
- // because streaming results resolve lazily via the async generator.
3428
- const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
3429
- kind: SpanKind.INTERNAL,
3430
- attributes: {
3431
- [ATTR.NL_PROVIDER]: options.provider || "default",
3432
- [ATTR.GEN_AI_MODEL]: options.model || "default",
3433
- [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
3434
- [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
3435
- [ATTR.NL_STREAM_MODE]: true,
3436
- },
3437
- });
3438
- const spanStartTime = Date.now();
3439
- try {
3440
- const startTime = Date.now();
3441
- const hrTimeStart = process.hrtime.bigint();
3442
- const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3443
- const originalPrompt = options.input.text; // Store the original prompt for memory storage
3444
- // Inject file registry for lazy on-demand file processing
3445
- options.fileRegistry = this.fileRegistry;
3446
- await this.validateStreamInput(options);
3447
- // Check budget limit before making API call
3448
- if (options.maxBudgetUsd !== undefined &&
3449
- options.maxBudgetUsd > 0 &&
3450
- this._sessionCostUsd >= options.maxBudgetUsd) {
3451
- throw new NeuroLinkError({
3452
- code: "SESSION_BUDGET_EXCEEDED",
3453
- message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
3454
- category: ErrorCategory.VALIDATION,
3455
- severity: ErrorSeverity.HIGH,
3456
- retriable: false,
3457
- context: {
3458
- spent: this._sessionCostUsd,
3459
- limit: options.maxBudgetUsd,
3460
- },
3461
- });
3462
- }
3463
- this.emitStreamStartEvents(options, startTime);
3464
- // Check if workflow is requested
3465
- if (options.workflow || options.workflowConfig) {
3466
- const result = await this.streamWithWorkflow(options, startTime);
3467
- streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
3468
- streamSpan.setStatus({ code: SpanStatusCode.OK });
3469
- streamSpan.end();
3470
- return result;
3471
- }
3472
- // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
3473
- return await this.setLangfuseContextFromOptions(options, async () => {
3474
- try {
3475
- // Prepare options: init memory, MCP, Mem0, orchestration, Ollama auto-disable, tool detection
3476
- const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
3477
- const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
3478
- // Update span with resolved provider name
3479
- streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
3480
- let accumulatedContent = "";
3481
- let chunkCount = 0;
3482
- // Set up event capture listeners
3483
- const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
3484
- const metadata = {
3485
- fallbackAttempted: false,
3486
- guardrailsBlocked: false,
3487
- error: undefined,
3488
- };
3489
- const self = this;
3490
- const streamStartTime = Date.now();
3491
- const sessionId = enhancedOptions.context
3492
- ?.sessionId;
3493
- const processedStream = (async function* () {
3494
- let streamError = undefined;
3742
+ // Set metrics trace context for parent-child span linking
3743
+ const metricsTraceId = crypto.randomUUID().replace(/-/g, "");
3744
+ const metricsParentSpanId = crypto
3745
+ .randomUUID()
3746
+ .replace(/-/g, "")
3747
+ .substring(0, 16);
3748
+ // Scope trace context to this request via AsyncLocalStorage
3749
+ // so concurrent generate/stream calls don't race.
3750
+ return metricsTraceContextStorage.run({ traceId: metricsTraceId, parentSpanId: metricsParentSpanId }, async () => {
3751
+ // Manual span lifecycle: the span must stay open until the stream is fully consumed,
3752
+ // NOT when the StreamResult object is returned. withSpan would end the span too early
3753
+ // because streaming results resolve lazily via the async generator.
3754
+ const streamSpan = tracers.sdk.startSpan("neurolink.stream", {
3755
+ kind: SpanKind.INTERNAL,
3756
+ attributes: {
3757
+ [ATTR.NL_PROVIDER]: options.provider || "default",
3758
+ [ATTR.GEN_AI_MODEL]: options.model || "default",
3759
+ [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
3760
+ [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
3761
+ [ATTR.NL_STREAM_MODE]: true,
3762
+ },
3763
+ });
3764
+ const spanStartTime = Date.now();
3765
+ try {
3766
+ const startTime = Date.now();
3767
+ const hrTimeStart = process.hrtime.bigint();
3768
+ const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
3769
+ const originalPrompt = options.input.text; // Store the original prompt for memory storage
3770
+ // Inject file registry for lazy on-demand file processing
3771
+ options.fileRegistry = this.fileRegistry;
3772
+ await this.validateStreamInput(options);
3773
+ // Check budget limit before making API call
3774
+ if (options.maxBudgetUsd !== undefined &&
3775
+ options.maxBudgetUsd > 0 &&
3776
+ this._sessionCostUsd >= options.maxBudgetUsd) {
3777
+ throw new NeuroLinkError({
3778
+ code: "SESSION_BUDGET_EXCEEDED",
3779
+ message: `Session budget exceeded: spent $${this._sessionCostUsd.toFixed(4)} of $${options.maxBudgetUsd.toFixed(4)} limit`,
3780
+ category: ErrorCategory.VALIDATION,
3781
+ severity: ErrorSeverity.HIGH,
3782
+ retriable: false,
3783
+ context: {
3784
+ spent: this._sessionCostUsd,
3785
+ limit: options.maxBudgetUsd,
3786
+ },
3787
+ });
3788
+ }
3789
+ this.emitStreamStartEvents(options, startTime);
3790
+ // Check if workflow is requested
3791
+ if (options.workflow || options.workflowConfig) {
3792
+ const result = await this.streamWithWorkflow(options, startTime);
3793
+ // Wrap the workflow stream so the span stays open until fully consumed
3794
+ const originalWorkflowStream = result.stream;
3795
+ result.stream = (async function* () {
3495
3796
  try {
3496
- for await (const chunk of mcpStream) {
3497
- chunkCount++;
3498
- if (chunk &&
3499
- "content" in chunk &&
3500
- typeof chunk.content === "string") {
3501
- accumulatedContent += chunk.content;
3502
- self.emitter.emit("response:chunk", chunk.content);
3503
- // Emit stream:chunk event (Observability Solution 8)
3504
- self.emitter.emit("stream:chunk", {
3505
- type: "stream:chunk",
3506
- content: chunk.content,
3507
- metadata: {
3508
- chunkIndex: chunkCount,
3509
- totalLength: accumulatedContent.length,
3510
- },
3511
- timestamp: Date.now(),
3512
- });
3513
- }
3797
+ for await (const chunk of originalWorkflowStream) {
3514
3798
  yield chunk;
3515
3799
  }
3516
- if (chunkCount === 0 && !metadata.fallbackAttempted) {
3517
- yield* self.handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
3518
- accumulatedContent += content;
3519
- });
3520
- }
3521
- // Emit stream:complete event (Observability Solution 8)
3522
- self.emitter.emit("stream:complete", {
3523
- type: "stream:complete",
3524
- content: accumulatedContent,
3525
- metadata: {
3526
- chunkCount,
3527
- totalLength: accumulatedContent.length,
3528
- durationMs: Date.now() - streamStartTime,
3529
- sessionId,
3530
- },
3531
- timestamp: Date.now(),
3532
- });
3800
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
3533
3801
  }
3534
3802
  catch (error) {
3535
- streamError = error;
3536
- // Emit stream:error event (Observability Solution 8)
3537
- self.emitter.emit("stream:error", {
3538
- type: "stream:error",
3539
- content: error instanceof Error ? error.message : String(error),
3540
- metadata: {
3541
- chunkCount,
3542
- totalLength: accumulatedContent.length,
3543
- durationMs: Date.now() - streamStartTime,
3544
- errorName: error instanceof Error ? error.name : "UnknownError",
3545
- sessionId,
3546
- },
3547
- timestamp: Date.now(),
3803
+ streamSpan.setStatus({
3804
+ code: SpanStatusCode.ERROR,
3805
+ message: error instanceof Error ? error.message : String(error),
3548
3806
  });
3549
3807
  throw error;
3550
3808
  }
3551
3809
  finally {
3552
- cleanupListeners();
3553
- // Finalize span now that the stream is fully consumed
3554
3810
  streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
3555
- streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
3556
- streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, metadata.error || streamError ? "error" : "stop");
3557
- if (metadata.error || streamError) {
3558
- streamSpan.setStatus({
3559
- code: SpanStatusCode.ERROR,
3560
- message: metadata.error ||
3561
- (streamError instanceof Error
3562
- ? streamError.message
3563
- : String(streamError)),
3811
+ streamSpan.end();
3812
+ }
3813
+ })();
3814
+ return result;
3815
+ }
3816
+ // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
3817
+ return await this.setLangfuseContextFromOptions(options, async () => {
3818
+ try {
3819
+ // Prepare options: init memory, MCP, Mem0, orchestration, Ollama auto-disable, tool detection
3820
+ const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
3821
+ const { stream: mcpStream, provider: providerName, usage: streamUsage, model: streamModel, analytics: streamAnalytics, } = await this.createMCPStream(enhancedOptions);
3822
+ // Update span with resolved provider name
3823
+ streamSpan.setAttribute(ATTR.NL_PROVIDER, providerName || "unknown");
3824
+ let accumulatedContent = "";
3825
+ let chunkCount = 0;
3826
+ // Set up event capture listeners
3827
+ const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
3828
+ const metadata = {
3829
+ fallbackAttempted: false,
3830
+ guardrailsBlocked: false,
3831
+ error: undefined,
3832
+ fallbackProvider: undefined,
3833
+ fallbackModel: undefined,
3834
+ };
3835
+ const self = this;
3836
+ const streamStartTime = Date.now();
3837
+ const sessionId = enhancedOptions.context?.sessionId;
3838
+ const processedStream = (async function* () {
3839
+ let streamError;
3840
+ try {
3841
+ for await (const chunk of mcpStream) {
3842
+ chunkCount++;
3843
+ if (chunk &&
3844
+ "content" in chunk &&
3845
+ typeof chunk.content === "string") {
3846
+ accumulatedContent += chunk.content;
3847
+ self.emitter.emit("response:chunk", chunk.content);
3848
+ // Emit stream:chunk event (Observability Solution 8)
3849
+ self.emitter.emit("stream:chunk", {
3850
+ type: "stream:chunk",
3851
+ content: chunk.content,
3852
+ metadata: {
3853
+ chunkIndex: chunkCount,
3854
+ totalLength: accumulatedContent.length,
3855
+ },
3856
+ timestamp: Date.now(),
3857
+ });
3858
+ }
3859
+ yield chunk;
3860
+ }
3861
+ if (chunkCount === 0 && !metadata.fallbackAttempted) {
3862
+ yield* self.handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
3863
+ accumulatedContent += content;
3864
+ });
3865
+ }
3866
+ // Emit stream:complete event (Observability Solution 8)
3867
+ // When fallback took over, attribute the completion to the
3868
+ // fallback provider so downstream telemetry reflects reality.
3869
+ const effectiveProvider = metadata.fallbackProvider ?? providerName;
3870
+ const effectiveModel = metadata.fallbackModel ??
3871
+ streamModel ??
3872
+ enhancedOptions.model;
3873
+ // Resolve analytics promise to get final token usage
3874
+ let resolvedUsage = streamUsage;
3875
+ if (!resolvedUsage && streamAnalytics) {
3876
+ try {
3877
+ const resolved = await Promise.resolve(streamAnalytics);
3878
+ if (resolved?.tokenUsage) {
3879
+ resolvedUsage = resolved.tokenUsage;
3880
+ }
3881
+ }
3882
+ catch {
3883
+ /* non-blocking */
3884
+ }
3885
+ }
3886
+ self.emitter.emit("stream:complete", {
3887
+ type: "stream:complete",
3888
+ content: accumulatedContent,
3889
+ provider: effectiveProvider,
3890
+ model: effectiveModel,
3891
+ prompt: enhancedOptions.input?.text ||
3892
+ enhancedOptions.prompt,
3893
+ metadata: {
3894
+ chunkCount,
3895
+ totalLength: accumulatedContent.length,
3896
+ durationMs: Date.now() - streamStartTime,
3897
+ sessionId,
3898
+ usage: resolvedUsage,
3899
+ ...(metadata.fallbackAttempted && {
3900
+ primaryProvider: providerName,
3901
+ primaryModel: enhancedOptions.model,
3902
+ fallback: true,
3903
+ }),
3904
+ },
3905
+ timestamp: Date.now(),
3564
3906
  });
3565
3907
  }
3566
- else {
3567
- streamSpan.setStatus({ code: SpanStatusCode.OK });
3568
- }
3569
- streamSpan.end();
3570
- if (accumulatedContent.trim()) {
3571
- logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
3908
+ catch (error) {
3909
+ streamError = error;
3910
+ // Emit stream:error event (Observability Solution 8)
3911
+ self.emitter.emit("stream:error", {
3912
+ type: "stream:error",
3913
+ content: error instanceof Error ? error.message : String(error),
3572
3914
  provider: providerName,
3573
3915
  model: enhancedOptions.model,
3574
- responseTimeMs: Date.now() - startTime,
3575
- contentLength: accumulatedContent.length,
3576
- fallback: metadata.fallbackAttempted,
3916
+ metadata: {
3917
+ chunkCount,
3918
+ totalLength: accumulatedContent.length,
3919
+ durationMs: Date.now() - streamStartTime,
3920
+ errorName: error instanceof Error ? error.name : "UnknownError",
3921
+ sessionId,
3922
+ },
3923
+ timestamp: Date.now(),
3577
3924
  });
3925
+ throw error;
3578
3926
  }
3579
- await self.storeStreamConversationMemory({
3580
- enhancedOptions,
3581
- providerName,
3582
- originalPrompt,
3583
- accumulatedContent,
3584
- startTime,
3585
- eventSequence,
3586
- });
3927
+ finally {
3928
+ cleanupListeners();
3929
+ // Finalize span now that the stream is fully consumed
3930
+ streamSpan.setAttribute("neurolink.response_time_ms", Date.now() - spanStartTime);
3931
+ streamSpan.setAttribute(ATTR.NL_OUTPUT_LENGTH, accumulatedContent.length);
3932
+ // When fallback took over, the primary provider's span must
3933
+ // reflect that it failed — never mark it as successful.
3934
+ const primaryFailed = !!(metadata.error || streamError);
3935
+ streamSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, primaryFailed ? "error" : "stop");
3936
+ if (metadata.fallbackAttempted) {
3937
+ streamSpan.setAttribute("neurolink.fallback_triggered", true);
3938
+ if (metadata.fallbackProvider) {
3939
+ streamSpan.setAttribute("neurolink.fallback_provider", metadata.fallbackProvider);
3940
+ }
3941
+ }
3942
+ if (primaryFailed) {
3943
+ streamSpan.setStatus({
3944
+ code: SpanStatusCode.ERROR,
3945
+ message: metadata.error ||
3946
+ (streamError instanceof Error
3947
+ ? streamError.message
3948
+ : String(streamError)),
3949
+ });
3950
+ }
3951
+ else {
3952
+ streamSpan.setStatus({ code: SpanStatusCode.OK });
3953
+ }
3954
+ streamSpan.end();
3955
+ if (accumulatedContent.trim()) {
3956
+ logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
3957
+ provider: providerName,
3958
+ model: enhancedOptions.model,
3959
+ responseTimeMs: Date.now() - startTime,
3960
+ contentLength: accumulatedContent.length,
3961
+ fallback: metadata.fallbackAttempted,
3962
+ });
3963
+ }
3964
+ await self.storeStreamConversationMemory({
3965
+ enhancedOptions,
3966
+ providerName,
3967
+ originalPrompt,
3968
+ accumulatedContent,
3969
+ startTime,
3970
+ eventSequence,
3971
+ });
3972
+ }
3973
+ })();
3974
+ const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
3975
+ const responseTime = Date.now() - startTime;
3976
+ // Accumulate session cost for budget tracking
3977
+ if (streamResult.analytics?.cost &&
3978
+ streamResult.analytics.cost > 0) {
3979
+ this._sessionCostUsd += streamResult.analytics.cost;
3587
3980
  }
3588
- })();
3589
- const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
3590
- const responseTime = Date.now() - startTime;
3591
- // Accumulate session cost for budget tracking
3592
- if (streamResult.analytics?.cost && streamResult.analytics.cost > 0) {
3593
- this._sessionCostUsd += streamResult.analytics.cost;
3981
+ this.emitStreamEndEvents(streamResult);
3982
+ return this.createStreamResponse(streamResult, processedStream, {
3983
+ providerName,
3984
+ options,
3985
+ startTime,
3986
+ responseTime,
3987
+ streamId,
3988
+ fallback: metadata.fallbackAttempted,
3989
+ guardrailsBlocked: metadata.guardrailsBlocked,
3990
+ error: metadata.error,
3991
+ events: eventSequence,
3992
+ });
3594
3993
  }
3595
- this.emitStreamEndEvents(streamResult);
3596
- return this.createStreamResponse(streamResult, processedStream, {
3597
- providerName,
3598
- options,
3599
- startTime,
3600
- responseTime,
3601
- streamId,
3602
- fallback: metadata.fallbackAttempted,
3603
- guardrailsBlocked: metadata.guardrailsBlocked,
3604
- error: metadata.error,
3605
- events: eventSequence,
3606
- });
3607
- }
3608
- catch (error) {
3609
- return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
3994
+ catch (error) {
3995
+ return this.handleStreamError(error, options, startTime, streamId, undefined, undefined);
3996
+ }
3997
+ });
3998
+ }
3999
+ catch (error) {
4000
+ // End span on error before re-throwing
4001
+ streamSpan.setStatus({
4002
+ code: SpanStatusCode.ERROR,
4003
+ message: error instanceof Error ? error.message : String(error),
4004
+ });
4005
+ if (error instanceof Error) {
4006
+ streamSpan.recordException(error);
3610
4007
  }
3611
- });
3612
- }
3613
- catch (error) {
3614
- // End span on error before re-throwing
3615
- streamSpan.setStatus({
3616
- code: SpanStatusCode.ERROR,
3617
- message: error instanceof Error ? error.message : String(error),
3618
- });
3619
- if (error instanceof Error) {
3620
- streamSpan.recordException(error);
4008
+ streamSpan.end();
4009
+ throw error;
3621
4010
  }
3622
- streamSpan.end();
3623
- throw error;
3624
- }
4011
+ }); // end metricsTraceContextStorage.run
3625
4012
  }
3626
4013
  /**
3627
4014
  * Prepare stream options: initialize memory, MCP, Mem0 retrieval, orchestration,
@@ -3828,6 +4215,25 @@ Current user's request: ${currentInput}`;
3828
4215
  metadata.fallbackAttempted = true;
3829
4216
  const errorMsg = "Stream completed with 0 chunks (possible guardrails block)";
3830
4217
  metadata.error = errorMsg;
4218
+ // Record a failed-provider span for the primary provider that returned 0 chunks
4219
+ try {
4220
+ const traceCtx = this._metricsTraceContext;
4221
+ let failedSpan = SpanSerializer.createGenerationSpan({
4222
+ provider: providerName,
4223
+ model: enhancedOptions.model || "unknown",
4224
+ name: `gen_ai.${providerName}.stream.failed`,
4225
+ traceId: traceCtx?.traceId,
4226
+ parentSpanId: traceCtx?.parentSpanId,
4227
+ });
4228
+ failedSpan = SpanSerializer.endSpan(failedSpan, SpanStatus.ERROR);
4229
+ failedSpan.statusMessage = errorMsg;
4230
+ failedSpan.durationMs = 0;
4231
+ this.metricsAggregator.recordSpan(failedSpan);
4232
+ getMetricsAggregator().recordSpan(failedSpan);
4233
+ }
4234
+ catch {
4235
+ /* non-blocking */
4236
+ }
3831
4237
  const fallbackRoute = ModelRouter.getFallbackRoute(originalPrompt || enhancedOptions.input.text || "", {
3832
4238
  provider: providerName,
3833
4239
  model: enhancedOptions.model || "gpt-4o",
@@ -3871,6 +4277,8 @@ Current user's request: ${currentInput}`;
3871
4277
  throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
3872
4278
  }
3873
4279
  // Fallback succeeded - likely guardrails blocked primary
4280
+ metadata.fallbackProvider = fallbackRoute.provider;
4281
+ metadata.fallbackModel = fallbackRoute.model;
3874
4282
  metadata.guardrailsBlocked = true;
3875
4283
  }
3876
4284
  catch (fallbackError) {
@@ -3913,6 +4321,7 @@ Current user's request: ${currentInput}`;
3913
4321
  model: enhancedOptions.model,
3914
4322
  };
3915
4323
  }
4324
+ const memStoreStart = Date.now();
3916
4325
  try {
3917
4326
  await this.conversationMemory.storeConversationTurn({
3918
4327
  sessionId,
@@ -3926,6 +4335,7 @@ Current user's request: ${currentInput}`;
3926
4335
  requestId: enhancedOptions.context
3927
4336
  ?.requestId,
3928
4337
  });
4338
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "stream" }, Date.now() - memStoreStart, SpanStatus.OK);
3929
4339
  logger.debug("[NeuroLink.stream] Stored conversation turn with events", {
3930
4340
  sessionId,
3931
4341
  eventCount: eventSequence.length,
@@ -3933,6 +4343,7 @@ Current user's request: ${currentInput}`;
3933
4343
  });
3934
4344
  }
3935
4345
  catch (error) {
4346
+ this.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "stream" }, Date.now() - memStoreStart, SpanStatus.ERROR, error instanceof Error ? error.message : String(error));
3936
4347
  logger.warn("Failed to store stream conversation turn", {
3937
4348
  error: error instanceof Error ? error.message : String(error),
3938
4349
  });
@@ -4002,6 +4413,8 @@ Current user's request: ${currentInput}`;
4002
4413
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
4003
4414
  this, // Pass SDK instance
4004
4415
  options.region);
4416
+ // Propagate trace context for parent-child span hierarchy
4417
+ provider._traceContext = this._metricsTraceContext;
4005
4418
  // Enable tool execution for the provider using BaseProvider method
4006
4419
  provider.setupToolExecutor({
4007
4420
  customTools: this.getCustomTools(),
@@ -4051,7 +4464,13 @@ Current user's request: ${currentInput}`;
4051
4464
  provider: providerName,
4052
4465
  systemPromptPassedLength: enhancedSystemPrompt.length,
4053
4466
  });
4054
- return { stream: streamResult.stream, provider: providerName };
4467
+ return {
4468
+ stream: streamResult.stream,
4469
+ provider: providerName,
4470
+ usage: streamResult.usage,
4471
+ model: streamResult.model || options.model,
4472
+ analytics: streamResult.analytics,
4473
+ };
4055
4474
  }
4056
4475
  /**
4057
4476
  * Process stream result
@@ -4110,6 +4529,27 @@ Current user's request: ${currentInput}`;
4110
4529
  logger.error("Stream generation failed, attempting fallback", {
4111
4530
  error: error instanceof Error ? error.message : String(error),
4112
4531
  });
4532
+ // Record a failed-provider span for the primary provider that threw
4533
+ try {
4534
+ const failedProvider = options.provider || "unknown";
4535
+ const traceCtx = this._metricsTraceContext;
4536
+ let failedSpan = SpanSerializer.createGenerationSpan({
4537
+ provider: failedProvider,
4538
+ model: options.model || "unknown",
4539
+ name: `gen_ai.${failedProvider}.stream.failed`,
4540
+ traceId: traceCtx?.traceId,
4541
+ parentSpanId: traceCtx?.parentSpanId,
4542
+ });
4543
+ failedSpan = SpanSerializer.endSpan(failedSpan, SpanStatus.ERROR);
4544
+ failedSpan.statusMessage =
4545
+ error instanceof Error ? error.message : String(error);
4546
+ failedSpan.durationMs = Date.now() - startTime;
4547
+ this.metricsAggregator.recordSpan(failedSpan);
4548
+ getMetricsAggregator().recordSpan(failedSpan);
4549
+ }
4550
+ catch {
4551
+ /* non-blocking */
4552
+ }
4113
4553
  const originalPrompt = options.input.text;
4114
4554
  const responseTime = Date.now() - startTime;
4115
4555
  const providerName = await getBestProvider(options.provider);
@@ -4159,6 +4599,7 @@ Current user's request: ${currentInput}`;
4159
4599
  model: options.model,
4160
4600
  };
4161
4601
  }
4602
+ const memStoreStart = Date.now();
4162
4603
  try {
4163
4604
  await self.conversationMemory.storeConversationTurn({
4164
4605
  sessionId: sessionId || options.context?.sessionId,
@@ -4172,8 +4613,10 @@ Current user's request: ${currentInput}`;
4172
4613
  options.context
4173
4614
  ?.requestId,
4174
4615
  });
4616
+ self.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "fallback-stream" }, Date.now() - memStoreStart, SpanStatus.OK);
4175
4617
  }
4176
4618
  catch (error) {
4619
+ self.recordMemorySpan("memory.store", { "memory.operation": "store", "memory.path": "fallback-stream" }, Date.now() - memStoreStart, SpanStatus.ERROR, error instanceof Error ? error.message : String(error));
4177
4620
  logger.warn("Failed to store fallback stream conversation turn", {
4178
4621
  error: error instanceof Error ? error.message : String(error),
4179
4622
  });