@juspay/neurolink 9.5.3 → 9.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (423) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +29 -25
  3. package/dist/adapters/video/vertexVideoHandler.js +3 -3
  4. package/dist/agent/directTools.d.ts +5 -5
  5. package/dist/cli/commands/config.d.ts +9 -9
  6. package/dist/cli/commands/serve.d.ts +37 -0
  7. package/dist/cli/commands/serve.js +302 -229
  8. package/dist/cli/commands/setup-anthropic.d.ts +2 -2
  9. package/dist/cli/commands/setup-azure.d.ts +2 -2
  10. package/dist/cli/commands/setup-bedrock.d.ts +2 -2
  11. package/dist/cli/commands/setup-gcp.d.ts +2 -2
  12. package/dist/cli/commands/setup-google-ai.d.ts +2 -2
  13. package/dist/cli/commands/setup-huggingface.d.ts +2 -2
  14. package/dist/cli/commands/setup-mistral.d.ts +2 -2
  15. package/dist/cli/commands/setup-openai.d.ts +2 -2
  16. package/dist/cli/commands/setup.d.ts +2 -2
  17. package/dist/cli/factories/commandFactory.js +16 -2
  18. package/dist/cli/loop/optionsSchema.d.ts +2 -2
  19. package/dist/cli/loop/optionsSchema.js +4 -0
  20. package/dist/cli/loop/session.d.ts +4 -0
  21. package/dist/cli/loop/session.js +49 -4
  22. package/dist/cli/utils/interactiveSetup.d.ts +4 -4
  23. package/dist/config/conversationMemory.d.ts +2 -0
  24. package/dist/config/conversationMemory.js +5 -5
  25. package/dist/constants/contextWindows.d.ts +46 -0
  26. package/dist/constants/contextWindows.js +156 -0
  27. package/dist/context/budgetChecker.d.ts +18 -0
  28. package/dist/context/budgetChecker.js +71 -0
  29. package/dist/context/contextCompactor.d.ts +22 -0
  30. package/dist/context/contextCompactor.js +106 -0
  31. package/dist/context/effectiveHistory.d.ts +52 -0
  32. package/dist/context/effectiveHistory.js +105 -0
  33. package/dist/context/errorDetection.d.ts +14 -0
  34. package/dist/context/errorDetection.js +124 -0
  35. package/dist/context/fileSummarizationService.d.ts +54 -0
  36. package/dist/context/fileSummarizationService.js +255 -0
  37. package/dist/context/fileSummarizer.d.ts +56 -0
  38. package/dist/context/fileSummarizer.js +145 -0
  39. package/dist/context/fileTokenBudget.d.ts +53 -0
  40. package/dist/context/fileTokenBudget.js +127 -0
  41. package/dist/context/prompts/summarizationPrompt.d.ts +17 -0
  42. package/dist/context/prompts/summarizationPrompt.js +110 -0
  43. package/dist/context/stages/fileReadDeduplicator.d.ts +10 -0
  44. package/dist/context/stages/fileReadDeduplicator.js +66 -0
  45. package/dist/context/stages/slidingWindowTruncator.d.ts +11 -0
  46. package/dist/context/stages/slidingWindowTruncator.js +42 -0
  47. package/dist/context/stages/structuredSummarizer.d.ts +10 -0
  48. package/dist/context/stages/structuredSummarizer.js +49 -0
  49. package/dist/context/stages/toolOutputPruner.d.ts +10 -0
  50. package/dist/context/stages/toolOutputPruner.js +52 -0
  51. package/dist/context/summarizationEngine.d.ts +45 -0
  52. package/dist/context/summarizationEngine.js +110 -0
  53. package/dist/context/toolOutputLimits.d.ts +17 -0
  54. package/dist/context/toolOutputLimits.js +84 -0
  55. package/dist/context/toolPairRepair.d.ts +16 -0
  56. package/dist/context/toolPairRepair.js +66 -0
  57. package/dist/core/analytics.js +11 -4
  58. package/dist/core/baseProvider.d.ts +6 -0
  59. package/dist/core/baseProvider.js +83 -14
  60. package/dist/core/conversationMemoryManager.d.ts +14 -11
  61. package/dist/core/conversationMemoryManager.js +36 -68
  62. package/dist/core/dynamicModels.js +3 -2
  63. package/dist/core/modules/GenerationHandler.js +2 -0
  64. package/dist/core/modules/MessageBuilder.d.ts +1 -1
  65. package/dist/core/modules/MessageBuilder.js +2 -0
  66. package/dist/core/modules/TelemetryHandler.d.ts +2 -3
  67. package/dist/core/modules/TelemetryHandler.js +3 -3
  68. package/dist/core/modules/ToolsManager.d.ts +2 -2
  69. package/dist/core/redisConversationMemoryManager.d.ts +19 -14
  70. package/dist/core/redisConversationMemoryManager.js +94 -86
  71. package/dist/factories/providerFactory.d.ts +2 -2
  72. package/dist/files/fileReferenceRegistry.d.ts +276 -0
  73. package/dist/files/fileReferenceRegistry.js +1543 -0
  74. package/dist/files/fileTools.d.ts +423 -0
  75. package/dist/files/fileTools.js +449 -0
  76. package/dist/files/index.d.ts +14 -0
  77. package/dist/files/index.js +13 -0
  78. package/dist/files/streamingReader.d.ts +93 -0
  79. package/dist/files/streamingReader.js +321 -0
  80. package/dist/files/types.d.ts +23 -0
  81. package/dist/files/types.js +23 -0
  82. package/dist/image-gen/imageGenTools.d.ts +2 -2
  83. package/dist/image-gen/types.d.ts +12 -12
  84. package/dist/index.d.ts +4 -0
  85. package/dist/index.js +5 -0
  86. package/dist/lib/adapters/video/vertexVideoHandler.js +3 -3
  87. package/dist/lib/agent/directTools.d.ts +7 -7
  88. package/dist/lib/config/conversationMemory.d.ts +2 -0
  89. package/dist/lib/config/conversationMemory.js +5 -5
  90. package/dist/lib/constants/contextWindows.d.ts +46 -0
  91. package/dist/lib/constants/contextWindows.js +157 -0
  92. package/dist/lib/context/budgetChecker.d.ts +18 -0
  93. package/dist/lib/context/budgetChecker.js +72 -0
  94. package/dist/lib/context/contextCompactor.d.ts +22 -0
  95. package/dist/lib/context/contextCompactor.js +107 -0
  96. package/dist/lib/context/effectiveHistory.d.ts +52 -0
  97. package/dist/lib/context/effectiveHistory.js +106 -0
  98. package/dist/lib/context/errorDetection.d.ts +14 -0
  99. package/dist/lib/context/errorDetection.js +125 -0
  100. package/dist/lib/context/fileSummarizationService.d.ts +54 -0
  101. package/dist/lib/context/fileSummarizationService.js +256 -0
  102. package/dist/lib/context/fileSummarizer.d.ts +56 -0
  103. package/dist/lib/context/fileSummarizer.js +146 -0
  104. package/dist/lib/context/fileTokenBudget.d.ts +53 -0
  105. package/dist/lib/context/fileTokenBudget.js +128 -0
  106. package/dist/lib/context/prompts/summarizationPrompt.d.ts +17 -0
  107. package/dist/lib/context/prompts/summarizationPrompt.js +111 -0
  108. package/dist/lib/context/stages/fileReadDeduplicator.d.ts +10 -0
  109. package/dist/lib/context/stages/fileReadDeduplicator.js +67 -0
  110. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +11 -0
  111. package/dist/lib/context/stages/slidingWindowTruncator.js +43 -0
  112. package/dist/lib/context/stages/structuredSummarizer.d.ts +10 -0
  113. package/dist/lib/context/stages/structuredSummarizer.js +50 -0
  114. package/dist/lib/context/stages/toolOutputPruner.d.ts +10 -0
  115. package/dist/lib/context/stages/toolOutputPruner.js +53 -0
  116. package/dist/lib/context/summarizationEngine.d.ts +45 -0
  117. package/dist/lib/context/summarizationEngine.js +111 -0
  118. package/dist/lib/context/toolOutputLimits.d.ts +17 -0
  119. package/dist/lib/context/toolOutputLimits.js +85 -0
  120. package/dist/lib/context/toolPairRepair.d.ts +16 -0
  121. package/dist/lib/context/toolPairRepair.js +67 -0
  122. package/dist/lib/core/analytics.js +11 -4
  123. package/dist/lib/core/baseProvider.d.ts +6 -0
  124. package/dist/lib/core/baseProvider.js +83 -14
  125. package/dist/lib/core/conversationMemoryManager.d.ts +14 -11
  126. package/dist/lib/core/conversationMemoryManager.js +36 -68
  127. package/dist/lib/core/dynamicModels.js +3 -2
  128. package/dist/lib/core/modules/GenerationHandler.js +2 -0
  129. package/dist/lib/core/modules/MessageBuilder.d.ts +1 -1
  130. package/dist/lib/core/modules/MessageBuilder.js +2 -0
  131. package/dist/lib/core/modules/TelemetryHandler.d.ts +2 -3
  132. package/dist/lib/core/modules/TelemetryHandler.js +3 -3
  133. package/dist/lib/core/modules/ToolsManager.d.ts +2 -2
  134. package/dist/lib/core/redisConversationMemoryManager.d.ts +19 -14
  135. package/dist/lib/core/redisConversationMemoryManager.js +94 -86
  136. package/dist/lib/factories/providerFactory.d.ts +2 -2
  137. package/dist/lib/files/fileReferenceRegistry.d.ts +276 -0
  138. package/dist/lib/files/fileReferenceRegistry.js +1544 -0
  139. package/dist/lib/files/fileTools.d.ts +423 -0
  140. package/dist/lib/files/fileTools.js +450 -0
  141. package/dist/lib/files/index.d.ts +14 -0
  142. package/dist/lib/files/index.js +14 -0
  143. package/dist/lib/files/streamingReader.d.ts +93 -0
  144. package/dist/lib/files/streamingReader.js +322 -0
  145. package/dist/lib/files/types.d.ts +23 -0
  146. package/dist/lib/files/types.js +24 -0
  147. package/dist/lib/image-gen/imageGenTools.d.ts +2 -2
  148. package/dist/lib/image-gen/types.d.ts +12 -12
  149. package/dist/lib/index.d.ts +4 -0
  150. package/dist/lib/index.js +5 -0
  151. package/dist/lib/mcp/httpRetryHandler.js +6 -2
  152. package/dist/lib/memory/mem0Initializer.d.ts +2 -2
  153. package/dist/lib/neurolink.d.ts +66 -2
  154. package/dist/lib/neurolink.js +777 -315
  155. package/dist/lib/processors/archive/ArchiveProcessor.d.ts +327 -0
  156. package/dist/lib/processors/archive/ArchiveProcessor.js +1309 -0
  157. package/dist/lib/processors/archive/index.d.ts +33 -0
  158. package/dist/lib/processors/archive/index.js +43 -0
  159. package/dist/lib/processors/base/BaseFileProcessor.js +2 -1
  160. package/dist/lib/processors/base/types.d.ts +70 -64
  161. package/dist/lib/processors/base/types.js +6 -0
  162. package/dist/lib/processors/cli/fileProcessorCli.d.ts +8 -8
  163. package/dist/lib/processors/cli/fileProcessorCli.js +5 -5
  164. package/dist/lib/processors/config/mimeTypes.js +25 -0
  165. package/dist/lib/processors/config/sizeLimits.d.ts +52 -40
  166. package/dist/lib/processors/config/sizeLimits.js +56 -44
  167. package/dist/lib/processors/document/ExcelProcessor.d.ts +14 -0
  168. package/dist/lib/processors/document/ExcelProcessor.js +72 -1
  169. package/dist/lib/processors/document/PptxProcessor.d.ts +63 -0
  170. package/dist/lib/processors/document/PptxProcessor.js +158 -0
  171. package/dist/lib/processors/document/index.d.ts +1 -0
  172. package/dist/lib/processors/document/index.js +6 -0
  173. package/dist/lib/processors/errors/FileErrorCode.d.ts +2 -2
  174. package/dist/lib/processors/errors/errorHelpers.d.ts +2 -2
  175. package/dist/lib/processors/errors/errorHelpers.js +12 -4
  176. package/dist/lib/processors/errors/errorSerializer.d.ts +4 -4
  177. package/dist/lib/processors/index.d.ts +8 -2
  178. package/dist/lib/processors/index.js +5 -2
  179. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +8 -8
  180. package/dist/lib/processors/integration/FileProcessorIntegration.js +7 -7
  181. package/dist/lib/processors/media/AudioProcessor.d.ts +328 -0
  182. package/dist/lib/processors/media/AudioProcessor.js +708 -0
  183. package/dist/lib/processors/media/VideoProcessor.d.ts +350 -0
  184. package/dist/lib/processors/media/VideoProcessor.js +992 -0
  185. package/dist/lib/processors/media/index.d.ts +27 -0
  186. package/dist/lib/processors/media/index.js +37 -0
  187. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +19 -5
  188. package/dist/lib/processors/registry/ProcessorRegistry.js +103 -8
  189. package/dist/lib/processors/registry/index.d.ts +1 -1
  190. package/dist/lib/processors/registry/index.js +1 -1
  191. package/dist/lib/processors/registry/types.d.ts +2 -2
  192. package/dist/lib/providers/amazonBedrock.js +2 -1
  193. package/dist/lib/providers/anthropic.js +2 -2
  194. package/dist/lib/providers/anthropicBaseProvider.js +10 -4
  195. package/dist/lib/providers/azureOpenai.js +14 -25
  196. package/dist/lib/providers/googleAiStudio.js +136 -457
  197. package/dist/lib/providers/googleNativeGemini3.d.ts +119 -0
  198. package/dist/lib/providers/googleNativeGemini3.js +264 -0
  199. package/dist/lib/providers/googleVertex.d.ts +15 -1
  200. package/dist/lib/providers/googleVertex.js +341 -775
  201. package/dist/lib/providers/huggingFace.js +20 -5
  202. package/dist/lib/providers/litellm.js +6 -4
  203. package/dist/lib/providers/mistral.js +3 -2
  204. package/dist/lib/providers/openAI.js +2 -2
  205. package/dist/lib/providers/openRouter.js +8 -7
  206. package/dist/lib/providers/openaiCompatible.js +10 -4
  207. package/dist/lib/providers/sagemaker/detection.d.ts +6 -6
  208. package/dist/lib/providers/sagemaker/diagnostics.d.ts +4 -4
  209. package/dist/lib/providers/sagemaker/parsers.d.ts +4 -4
  210. package/dist/lib/rag/chunkers/RecursiveChunker.js +2 -2
  211. package/dist/lib/rag/document/loaders.d.ts +6 -71
  212. package/dist/lib/rag/document/loaders.js +5 -5
  213. package/dist/lib/rag/graphRag/graphRAG.js +26 -9
  214. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
  215. package/dist/lib/rag/metadata/metadataExtractor.js +6 -3
  216. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +8 -126
  217. package/dist/lib/rag/pipeline/RAGPipeline.js +11 -11
  218. package/dist/lib/rag/pipeline/contextAssembly.d.ts +3 -42
  219. package/dist/lib/rag/pipeline/contextAssembly.js +6 -3
  220. package/dist/lib/rag/reranker/RerankerFactory.d.ts +5 -60
  221. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +3 -33
  222. package/dist/lib/rag/resilience/RetryHandler.d.ts +2 -21
  223. package/dist/lib/rag/resilience/RetryHandler.js +6 -2
  224. package/dist/lib/rag/retrieval/hybridSearch.d.ts +3 -41
  225. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +2 -13
  226. package/dist/lib/rag/retrieval/vectorQueryTool.js +4 -3
  227. package/dist/lib/rag/types.d.ts +3 -3
  228. package/dist/lib/sdk/toolRegistration.d.ts +2 -2
  229. package/dist/lib/server/middleware/cache.d.ts +2 -2
  230. package/dist/lib/server/middleware/rateLimit.d.ts +2 -2
  231. package/dist/lib/server/routes/mcpRoutes.js +277 -249
  232. package/dist/lib/server/routes/memoryRoutes.js +287 -281
  233. package/dist/lib/server/utils/validation.d.ts +10 -10
  234. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +24 -2
  235. package/dist/lib/services/server/ai/observability/instrumentation.js +12 -1
  236. package/dist/lib/session/globalSessionState.d.ts +2 -2
  237. package/dist/lib/telemetry/telemetryService.d.ts +2 -2
  238. package/dist/lib/types/common.d.ts +39 -0
  239. package/dist/lib/types/contextTypes.d.ts +255 -0
  240. package/dist/lib/types/contextTypes.js +0 -2
  241. package/dist/lib/types/conversation.d.ts +62 -0
  242. package/dist/lib/types/conversationMemoryInterface.d.ts +27 -0
  243. package/dist/lib/types/conversationMemoryInterface.js +7 -0
  244. package/dist/lib/types/fileReferenceTypes.d.ts +222 -0
  245. package/dist/lib/types/fileReferenceTypes.js +9 -0
  246. package/dist/lib/types/fileTypes.d.ts +26 -3
  247. package/dist/lib/types/generateTypes.d.ts +50 -1
  248. package/dist/lib/types/index.d.ts +4 -5
  249. package/dist/lib/types/index.js +8 -10
  250. package/dist/lib/types/modelTypes.d.ts +2 -2
  251. package/dist/lib/types/processorTypes.d.ts +597 -0
  252. package/dist/lib/types/processorTypes.js +91 -0
  253. package/dist/lib/types/ragTypes.d.ts +489 -0
  254. package/dist/lib/types/ragTypes.js +8 -0
  255. package/dist/lib/types/sdkTypes.d.ts +17 -18
  256. package/dist/lib/types/streamTypes.d.ts +24 -1
  257. package/dist/lib/utils/async/retry.d.ts +2 -2
  258. package/dist/lib/utils/async/withTimeout.js +3 -1
  259. package/dist/lib/utils/conversationMemory.d.ts +12 -6
  260. package/dist/lib/utils/conversationMemory.js +91 -36
  261. package/dist/lib/utils/errorHandling.d.ts +5 -0
  262. package/dist/lib/utils/errorHandling.js +19 -0
  263. package/dist/lib/utils/fileDetector.d.ts +62 -0
  264. package/dist/lib/utils/fileDetector.js +1014 -14
  265. package/dist/lib/utils/json/safeParse.d.ts +2 -2
  266. package/dist/lib/utils/messageBuilder.js +806 -153
  267. package/dist/lib/utils/modelChoices.d.ts +2 -2
  268. package/dist/lib/utils/multimodalOptionsBuilder.d.ts +2 -1
  269. package/dist/lib/utils/multimodalOptionsBuilder.js +1 -0
  270. package/dist/lib/utils/pricing.d.ts +12 -0
  271. package/dist/lib/utils/pricing.js +134 -0
  272. package/dist/lib/utils/rateLimiter.d.ts +2 -2
  273. package/dist/lib/utils/redis.d.ts +17 -0
  274. package/dist/lib/utils/redis.js +105 -0
  275. package/dist/lib/utils/sanitizers/filename.d.ts +4 -4
  276. package/dist/lib/utils/sanitizers/svg.d.ts +2 -2
  277. package/dist/lib/utils/thinkingConfig.d.ts +6 -6
  278. package/dist/lib/utils/timeout.d.ts +10 -0
  279. package/dist/lib/utils/timeout.js +15 -0
  280. package/dist/lib/utils/tokenEstimation.d.ts +68 -0
  281. package/dist/lib/utils/tokenEstimation.js +113 -0
  282. package/dist/lib/utils/tokenUtils.d.ts +4 -4
  283. package/dist/lib/utils/ttsProcessor.d.ts +2 -2
  284. package/dist/lib/workflow/config.d.ts +150 -150
  285. package/dist/mcp/httpRetryHandler.js +6 -2
  286. package/dist/memory/mem0Initializer.d.ts +2 -2
  287. package/dist/neurolink.d.ts +66 -2
  288. package/dist/neurolink.js +777 -315
  289. package/dist/processors/archive/ArchiveProcessor.d.ts +327 -0
  290. package/dist/processors/archive/ArchiveProcessor.js +1308 -0
  291. package/dist/processors/archive/index.d.ts +33 -0
  292. package/dist/processors/archive/index.js +42 -0
  293. package/dist/processors/base/BaseFileProcessor.js +2 -1
  294. package/dist/processors/base/types.d.ts +70 -64
  295. package/dist/processors/base/types.js +6 -0
  296. package/dist/processors/cli/fileProcessorCli.d.ts +8 -8
  297. package/dist/processors/cli/fileProcessorCli.js +5 -5
  298. package/dist/processors/config/mimeTypes.js +25 -0
  299. package/dist/processors/config/sizeLimits.d.ts +52 -40
  300. package/dist/processors/config/sizeLimits.js +56 -44
  301. package/dist/processors/document/ExcelProcessor.d.ts +14 -0
  302. package/dist/processors/document/ExcelProcessor.js +72 -1
  303. package/dist/processors/document/PptxProcessor.d.ts +63 -0
  304. package/dist/processors/document/PptxProcessor.js +157 -0
  305. package/dist/processors/document/index.d.ts +1 -0
  306. package/dist/processors/document/index.js +6 -0
  307. package/dist/processors/errors/FileErrorCode.d.ts +2 -2
  308. package/dist/processors/errors/errorHelpers.d.ts +2 -2
  309. package/dist/processors/errors/errorHelpers.js +12 -4
  310. package/dist/processors/errors/errorSerializer.d.ts +4 -4
  311. package/dist/processors/index.d.ts +8 -2
  312. package/dist/processors/index.js +5 -2
  313. package/dist/processors/integration/FileProcessorIntegration.d.ts +8 -8
  314. package/dist/processors/integration/FileProcessorIntegration.js +7 -7
  315. package/dist/processors/media/AudioProcessor.d.ts +328 -0
  316. package/dist/processors/media/AudioProcessor.js +707 -0
  317. package/dist/processors/media/VideoProcessor.d.ts +350 -0
  318. package/dist/processors/media/VideoProcessor.js +991 -0
  319. package/dist/processors/media/ffprobe-static.d.ts +4 -0
  320. package/dist/processors/media/index.d.ts +27 -0
  321. package/dist/processors/media/index.js +36 -0
  322. package/dist/processors/registry/ProcessorRegistry.d.ts +19 -5
  323. package/dist/processors/registry/ProcessorRegistry.js +103 -8
  324. package/dist/processors/registry/index.d.ts +1 -1
  325. package/dist/processors/registry/index.js +1 -1
  326. package/dist/processors/registry/types.d.ts +2 -2
  327. package/dist/providers/amazonBedrock.js +2 -1
  328. package/dist/providers/anthropic.js +2 -2
  329. package/dist/providers/anthropicBaseProvider.js +10 -4
  330. package/dist/providers/azureOpenai.js +14 -25
  331. package/dist/providers/googleAiStudio.js +136 -457
  332. package/dist/providers/googleNativeGemini3.d.ts +119 -0
  333. package/dist/providers/googleNativeGemini3.js +263 -0
  334. package/dist/providers/googleVertex.d.ts +15 -1
  335. package/dist/providers/googleVertex.js +341 -775
  336. package/dist/providers/huggingFace.js +20 -5
  337. package/dist/providers/litellm.js +6 -4
  338. package/dist/providers/mistral.js +3 -2
  339. package/dist/providers/openAI.js +2 -2
  340. package/dist/providers/openRouter.js +8 -7
  341. package/dist/providers/openaiCompatible.js +10 -4
  342. package/dist/providers/sagemaker/detection.d.ts +6 -6
  343. package/dist/providers/sagemaker/diagnostics.d.ts +4 -4
  344. package/dist/providers/sagemaker/parsers.d.ts +4 -4
  345. package/dist/rag/chunkers/RecursiveChunker.js +2 -2
  346. package/dist/rag/document/loaders.d.ts +6 -71
  347. package/dist/rag/document/loaders.js +5 -5
  348. package/dist/rag/graphRag/graphRAG.js +26 -9
  349. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +5 -55
  350. package/dist/rag/metadata/metadataExtractor.js +6 -3
  351. package/dist/rag/pipeline/RAGPipeline.d.ts +8 -126
  352. package/dist/rag/pipeline/RAGPipeline.js +11 -11
  353. package/dist/rag/pipeline/contextAssembly.d.ts +3 -42
  354. package/dist/rag/pipeline/contextAssembly.js +6 -3
  355. package/dist/rag/reranker/RerankerFactory.d.ts +5 -60
  356. package/dist/rag/resilience/CircuitBreaker.d.ts +3 -33
  357. package/dist/rag/resilience/RetryHandler.d.ts +2 -21
  358. package/dist/rag/resilience/RetryHandler.js +6 -2
  359. package/dist/rag/retrieval/hybridSearch.d.ts +3 -41
  360. package/dist/rag/retrieval/vectorQueryTool.d.ts +2 -13
  361. package/dist/rag/retrieval/vectorQueryTool.js +4 -3
  362. package/dist/rag/types.d.ts +3 -3
  363. package/dist/sdk/toolRegistration.d.ts +2 -2
  364. package/dist/server/middleware/cache.d.ts +2 -2
  365. package/dist/server/middleware/rateLimit.d.ts +2 -2
  366. package/dist/server/routes/mcpRoutes.js +277 -249
  367. package/dist/server/routes/memoryRoutes.js +287 -281
  368. package/dist/server/utils/validation.d.ts +4 -4
  369. package/dist/services/server/ai/observability/instrumentation.d.ts +24 -2
  370. package/dist/services/server/ai/observability/instrumentation.js +12 -1
  371. package/dist/session/globalSessionState.d.ts +2 -2
  372. package/dist/telemetry/telemetryService.d.ts +2 -2
  373. package/dist/types/common.d.ts +39 -0
  374. package/dist/types/contextTypes.d.ts +255 -0
  375. package/dist/types/contextTypes.js +0 -2
  376. package/dist/types/conversation.d.ts +62 -0
  377. package/dist/types/conversationMemoryInterface.d.ts +27 -0
  378. package/dist/types/conversationMemoryInterface.js +6 -0
  379. package/dist/types/fileReferenceTypes.d.ts +222 -0
  380. package/dist/types/fileReferenceTypes.js +8 -0
  381. package/dist/types/fileTypes.d.ts +26 -3
  382. package/dist/types/generateTypes.d.ts +50 -1
  383. package/dist/types/index.d.ts +4 -5
  384. package/dist/types/index.js +8 -10
  385. package/dist/types/processorTypes.d.ts +597 -0
  386. package/dist/types/processorTypes.js +90 -0
  387. package/dist/types/ragTypes.d.ts +489 -0
  388. package/dist/types/ragTypes.js +7 -0
  389. package/dist/types/sdkTypes.d.ts +17 -18
  390. package/dist/types/streamTypes.d.ts +24 -1
  391. package/dist/utils/async/retry.d.ts +2 -2
  392. package/dist/utils/async/withTimeout.js +3 -1
  393. package/dist/utils/conversationMemory.d.ts +12 -6
  394. package/dist/utils/conversationMemory.js +91 -36
  395. package/dist/utils/errorHandling.d.ts +5 -0
  396. package/dist/utils/errorHandling.js +19 -0
  397. package/dist/utils/fileDetector.d.ts +62 -0
  398. package/dist/utils/fileDetector.js +1014 -14
  399. package/dist/utils/json/safeParse.d.ts +2 -2
  400. package/dist/utils/messageBuilder.js +806 -153
  401. package/dist/utils/modelChoices.d.ts +2 -2
  402. package/dist/utils/multimodalOptionsBuilder.d.ts +2 -1
  403. package/dist/utils/multimodalOptionsBuilder.js +1 -0
  404. package/dist/utils/pricing.d.ts +12 -0
  405. package/dist/utils/pricing.js +133 -0
  406. package/dist/utils/rateLimiter.d.ts +2 -2
  407. package/dist/utils/redis.d.ts +17 -0
  408. package/dist/utils/redis.js +105 -0
  409. package/dist/utils/sanitizers/filename.d.ts +4 -4
  410. package/dist/utils/sanitizers/svg.d.ts +2 -2
  411. package/dist/utils/thinkingConfig.d.ts +6 -6
  412. package/dist/utils/timeout.d.ts +10 -0
  413. package/dist/utils/timeout.js +15 -0
  414. package/dist/utils/tokenEstimation.d.ts +68 -0
  415. package/dist/utils/tokenEstimation.js +112 -0
  416. package/dist/utils/tokenUtils.d.ts +4 -4
  417. package/dist/utils/ttsProcessor.d.ts +2 -2
  418. package/dist/workflow/config.d.ts +104 -104
  419. package/package.json +18 -6
  420. package/dist/lib/utils/conversationMemoryUtils.d.ts +0 -25
  421. package/dist/lib/utils/conversationMemoryUtils.js +0 -138
  422. package/dist/utils/conversationMemoryUtils.d.ts +0 -25
  423. package/dist/utils/conversationMemoryUtils.js +0 -137
@@ -14,12 +14,17 @@ catch {
14
14
  // Environment variables should be set externally in production
15
15
  }
16
16
  import { EventEmitter } from "events";
17
- import { isNonNullObject } from "./utils/typeUtils.js";
18
17
  import pLimit from "p-limit";
19
18
  import { CIRCUIT_BREAKER, CIRCUIT_BREAKER_RESET_MS, MEMORY_THRESHOLDS, NANOSECOND_TO_MS_DIVISOR, PERFORMANCE_THRESHOLDS, PROVIDER_TIMEOUTS, RETRY_ATTEMPTS, RETRY_DELAYS, TOOL_TIMEOUTS, } from "./constants/index.js";
19
+ import { checkContextBudget } from "./context/budgetChecker.js";
20
+ import { ContextCompactor, } from "./context/contextCompactor.js";
21
+ import { isContextOverflowError } from "./context/errorDetection.js";
22
+ import { repairToolPairs } from "./context/toolPairRepair.js";
20
23
  import { SYSTEM_LIMITS } from "./core/constants.js";
21
24
  import { AIProviderFactory } from "./core/factory.js";
22
25
  import { ProviderRegistry } from "./factories/providerRegistry.js";
26
+ import { FileReferenceRegistry } from "./files/fileReferenceRegistry.js";
27
+ import { createFileTools } from "./files/fileTools.js";
23
28
  import { HITLManager } from "./hitl/hitlManager.js";
24
29
  import { ExternalServerManager } from "./mcp/externalServerManager.js";
25
30
  // Import direct tools server for automatic registration
@@ -29,7 +34,7 @@ import { initializeMem0 } from "./memory/mem0Initializer.js";
29
34
  import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
30
35
  import { getConversationMessages, storeConversationTurn, } from "./utils/conversationMemory.js";
31
36
  // Enhanced error handling imports
32
- import { CircuitBreaker, ErrorFactory, isRetriableError, logStructuredError, NeuroLinkError, withRetry, withTimeout, } from "./utils/errorHandling.js";
37
+ import { CircuitBreaker, ErrorFactory, isAbortError, isRetriableError, logStructuredError, NeuroLinkError, withRetry, withTimeout, } from "./utils/errorHandling.js";
33
38
  // Factory processing imports
34
39
  import { createCleanStreamOptions, enhanceTextGenerationOptions, processFactoryOptions, processStreamingFactoryOptions, validateFactoryConfig, } from "./utils/factoryProcessing.js";
35
40
  import { logger, mcpLogger } from "./utils/logger.js";
@@ -42,9 +47,9 @@ import { BinaryTaskClassifier } from "./utils/taskClassifier.js";
42
47
  // Tool detection and execution imports
43
48
  // Transformation utilities
44
49
  import { extractToolNames, optimizeToolForCollection, transformAvailableTools, transformParamsForLogging, transformToolExecutions, transformToolExecutionsForMCP, transformToolsForMCP, transformToolsToDescriptions, transformToolsToExpectedFormat, } from "./utils/transformationUtils.js";
45
- import { runWorkflow } from "./workflow/core/workflowRunner.js";
50
+ import { isNonNullObject } from "./utils/typeUtils.js";
46
51
  import { getWorkflow } from "./workflow/core/workflowRegistry.js";
47
- // Core types imported from core/types.js
52
+ import { runWorkflow } from "./workflow/core/workflowRunner.js";
48
53
  /**
49
54
  * NeuroLink - Universal AI Development Platform
50
55
  *
@@ -166,6 +171,10 @@ export class NeuroLink {
166
171
  // Mem0 memory instance and config for conversation context
167
172
  mem0Instance;
168
173
  mem0Config;
174
+ // File Reference Registry for lazy on-demand file processing
175
+ fileRegistry;
176
+ // Cached file tools to avoid redundant createFileTools() calls per generate/stream
177
+ cachedFileTools = null;
169
178
  /**
170
179
  * Extract and set Langfuse context from options with proper async scoping
171
180
  */
@@ -288,6 +297,7 @@ export class NeuroLink {
288
297
  observabilityConfig;
289
298
  constructor(config) {
290
299
  this.toolRegistry = config?.toolRegistry || new MCPToolRegistry();
300
+ this.fileRegistry = new FileReferenceRegistry();
291
301
  this.observabilityConfig = config?.observability;
292
302
  // Initialize orchestration setting
293
303
  this.enableOrchestration = config?.enableOrchestration ?? false;
@@ -304,6 +314,7 @@ export class NeuroLink {
304
314
  this.initializeConversationMemory(config, constructorId, constructorStartTime, constructorHrTimeStart);
305
315
  this.initializeExternalServerManager(constructorId, constructorStartTime, constructorHrTimeStart);
306
316
  this.initializeHITL(config, constructorId, constructorStartTime, constructorHrTimeStart);
317
+ this.registerFileTools();
307
318
  this.initializeLangfuse(constructorId, constructorStartTime, constructorHrTimeStart);
308
319
  this.logConstructorComplete(constructorId, constructorStartTime, constructorHrTimeStart);
309
320
  }
@@ -457,6 +468,56 @@ export class NeuroLink {
457
468
  });
458
469
  }
459
470
  }
471
+ /**
472
+ * Register file reference tools with the MCP tool registry.
473
+ *
474
+ * Creates file access tools (list_attached_files, read_file_section,
475
+ * search_in_file, get_file_preview) bound to the FileReferenceRegistry
476
+ * and registers them as direct tools so they're available to LLMs.
477
+ */
478
+ registerFileTools() {
479
+ const fileTools = createFileTools(this.fileRegistry);
480
+ // Use void to handle async registration without blocking constructor
481
+ const registrations = Object.entries(fileTools).map(async ([toolName, toolDef]) => {
482
+ const toolId = `direct.${toolName}`;
483
+ const toolInfo = {
484
+ name: toolName,
485
+ description: toolDef.description || `File tool: ${toolName}`,
486
+ inputSchema: {},
487
+ serverId: "direct",
488
+ category: "built-in",
489
+ };
490
+ await this.toolRegistry.registerTool(toolId, toolInfo, {
491
+ execute: async (params) => {
492
+ try {
493
+ const result = await toolDef.execute(params, {
494
+ toolCallId: "file-tool",
495
+ messages: [],
496
+ });
497
+ return {
498
+ success: true,
499
+ data: result,
500
+ metadata: { toolName, serverId: "direct", executionTime: 0 },
501
+ };
502
+ }
503
+ catch (error) {
504
+ return {
505
+ success: false,
506
+ error: error instanceof Error ? error.message : String(error),
507
+ metadata: { toolName, serverId: "direct", executionTime: 0 },
508
+ };
509
+ }
510
+ },
511
+ description: toolDef.description,
512
+ inputSchema: {},
513
+ });
514
+ });
515
+ // Fire-and-forget: registrations complete before any generate/stream call
516
+ // because those calls await initializeMCP() which is slower
517
+ void Promise.all(registrations).then(() => {
518
+ logger.debug(`[NeuroLink] Registered ${Object.keys(fileTools).length} file reference tools`);
519
+ });
520
+ }
460
521
  /** Format memory context for prompt inclusion */
461
522
  formatMemoryContext(memoryContext, currentInput) {
462
523
  return `Context from previous conversations:
@@ -1507,6 +1568,8 @@ Current user's request: ${currentInput}`;
1507
1568
  output: options.output,
1508
1569
  tools: options.tools, // Includes RAG tools if rag config was provided
1509
1570
  disableTools: options.disableTools,
1571
+ toolFilter: options.toolFilter,
1572
+ excludeTools: options.excludeTools,
1510
1573
  enableAnalytics: options.enableAnalytics,
1511
1574
  enableEvaluation: options.enableEvaluation,
1512
1575
  context: options.context,
@@ -1515,6 +1578,9 @@ Current user's request: ${currentInput}`;
1515
1578
  input: options.input, // This includes text, images, and content arrays
1516
1579
  region: options.region,
1517
1580
  tts: options.tts,
1581
+ fileRegistry: this.fileRegistry,
1582
+ abortSignal: options.abortSignal,
1583
+ skipToolPromptInjection: options.skipToolPromptInjection,
1518
1584
  };
1519
1585
  // Apply factory enhancement using centralized utilities
1520
1586
  const textOptions = enhanceTextGenerationOptions(baseOptions, factoryResult);
@@ -1552,6 +1618,7 @@ Current user's request: ${currentInput}`;
1552
1618
  // Convert back to GenerateResult
1553
1619
  const generateResult = {
1554
1620
  content: textResult.content,
1621
+ finishReason: textResult.finishReason,
1555
1622
  provider: textResult.provider,
1556
1623
  model: textResult.model,
1557
1624
  usage: textResult.usage
@@ -1594,31 +1661,33 @@ Current user's request: ${currentInput}`;
1594
1661
  video: textResult.video,
1595
1662
  ppt: textResult.ppt,
1596
1663
  };
1597
- if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1598
- options.context?.userId &&
1599
- generateResult.content) {
1600
- // Non-blocking memory storage - run in background
1601
- setImmediate(async () => {
1602
- try {
1603
- const mem0 = await this.ensureMem0Ready();
1604
- if (mem0) {
1605
- await this.storeMem0ConversationTurn(mem0, originalPrompt, generateResult.content, options.context?.userId, {
1606
- timestamp: new Date().toISOString(),
1607
- provider: generateResult.provider,
1608
- model: generateResult.model,
1609
- type: "conversation_turn",
1610
- });
1611
- }
1612
- }
1613
- catch (error) {
1614
- // Non-blocking: Log error but don't fail the generation
1615
- logger.warn("Mem0 memory storage failed:", error);
1616
- }
1617
- });
1618
- }
1664
+ this.scheduleGenerateMem0Storage(options, originalPrompt, generateResult);
1619
1665
  return generateResult;
1620
1666
  });
1621
1667
  }
1668
+ /**
1669
+ * Schedule non-blocking Mem0 memory storage after generate completes.
1670
+ */
1671
+ scheduleGenerateMem0Storage(options, originalPrompt, generateResult) {
1672
+ if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
1673
+ options.context?.userId &&
1674
+ generateResult.content.trim()) {
1675
+ setImmediate(async () => {
1676
+ try {
1677
+ const mem0 = await this.ensureMem0Ready();
1678
+ if (mem0) {
1679
+ await this.storeMem0ConversationTurn(mem0, originalPrompt ?? "", generateResult.content.trim(), options.context?.userId, {
1680
+ timestamp: new Date().toISOString(),
1681
+ type: "conversation_turn_generate",
1682
+ });
1683
+ }
1684
+ }
1685
+ catch (error) {
1686
+ logger.warn("Mem0 memory storage failed:", error);
1687
+ }
1688
+ });
1689
+ }
1690
+ }
1622
1691
  /**
1623
1692
  * Generate with workflow engine integration
1624
1693
  * Returns both original and processed responses for AB testing
@@ -1894,21 +1963,108 @@ Current user's request: ${currentInput}`;
1894
1963
  await this.initializeConversationMemoryForGeneration(generateInternalId, generateInternalStartTime, generateInternalHrTimeStart);
1895
1964
  const mcpResult = await this.attemptMCPGeneration(options, generateInternalId, generateInternalStartTime, generateInternalHrTimeStart, functionTag);
1896
1965
  if (mcpResult) {
1966
+ logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS (MCP path)`, {
1967
+ provider: mcpResult.provider,
1968
+ model: mcpResult.model,
1969
+ responseTimeMs: Date.now() - generateInternalStartTime,
1970
+ tokensUsed: mcpResult.usage?.total || 0,
1971
+ toolsUsed: mcpResult.toolsUsed?.length || 0,
1972
+ });
1897
1973
  await storeConversationTurn(this.conversationMemory, options, mcpResult, new Date(generateInternalStartTime));
1898
1974
  this.emitter.emit("response:end", mcpResult.content || "");
1899
1975
  return mcpResult;
1900
1976
  }
1901
1977
  const directResult = await this.directProviderGeneration(options);
1902
1978
  logger.debug(`[${functionTag}] Direct generation successful`);
1979
+ logger.info(`[NeuroLink.generateTextInternal] generate() - COMPLETE SUCCESS`, {
1980
+ provider: directResult.provider,
1981
+ model: directResult.model,
1982
+ responseTimeMs: Date.now() - generateInternalStartTime,
1983
+ tokensUsed: directResult.usage?.total || 0,
1984
+ toolsUsed: directResult.toolsUsed?.length || 0,
1985
+ });
1903
1986
  await storeConversationTurn(this.conversationMemory, options, directResult, new Date(generateInternalStartTime));
1904
1987
  this.emitter.emit("response:end", directResult.content || "");
1905
1988
  this.emitter.emit("message", `Text generation completed successfully`);
1906
1989
  return directResult;
1907
1990
  }
1908
1991
  catch (error) {
1909
- logger.error(`[${functionTag}] All generation methods failed`, {
1910
- error: error instanceof Error ? error.message : String(error),
1911
- });
1992
+ // Check if this is a context overflow error - attempt recovery
1993
+ if (isContextOverflowError(error) && this.conversationMemory) {
1994
+ logger.warn(`[${functionTag}] Context overflow detected, attempting aggressive compaction`, {
1995
+ error: error instanceof Error ? error.message : String(error),
1996
+ });
1997
+ try {
1998
+ const conversationMessages = await getConversationMessages(this.conversationMemory, options);
1999
+ // Calculate a meaningful compaction target from the model's budget
2000
+ const recoveryBudget = checkContextBudget({
2001
+ provider: options.provider || "openai",
2002
+ model: options.model,
2003
+ maxTokens: options.maxTokens,
2004
+ currentPrompt: options.prompt,
2005
+ systemPrompt: options.systemPrompt,
2006
+ });
2007
+ const compactionTarget = Math.floor(recoveryBudget.availableInputTokens * 0.7);
2008
+ const compactor = new ContextCompactor({
2009
+ enableSummarize: false, // Skip LLM call for recovery
2010
+ truncationFraction: 0.75, // Aggressive truncation
2011
+ });
2012
+ const compactionResult = await compactor.compact(conversationMessages, compactionTarget);
2013
+ if (compactionResult.compacted) {
2014
+ const repairedResult = repairToolPairs(compactionResult.messages);
2015
+ logger.info(`[${functionTag}] Aggressive compaction complete, retrying`, {
2016
+ tokensSaved: compactionResult.tokensSaved,
2017
+ compactionTarget,
2018
+ });
2019
+ // Retry with compacted context - pass compacted messages to avoid re-fetching
2020
+ return await this.directProviderGeneration({
2021
+ ...options,
2022
+ conversationMessages: repairedResult.messages,
2023
+ });
2024
+ }
2025
+ }
2026
+ catch (retryError) {
2027
+ logger.error(`[${functionTag}] Recovery attempt also failed`, {
2028
+ error: retryError instanceof Error
2029
+ ? retryError.message
2030
+ : String(retryError),
2031
+ });
2032
+ }
2033
+ }
2034
+ // If the generation was aborted (e.g., coding task short-circuit via AbortController),
2035
+ // still store the conversation turn so that:
2036
+ // 1. The Redis conversation entry is created (if first turn)
2037
+ // 2. setImmediate triggers generateConversationTitle() for the session
2038
+ // 3. The caller's syncTitleFromRedis() can find the SDK-generated title
2039
+ if (isAbortError(error)) {
2040
+ logger.info(`[${functionTag}] Generation aborted — storing conversation turn for title generation`, {
2041
+ hasMemory: !!this.conversationMemory,
2042
+ memoryType: this.conversationMemory?.constructor?.name || "NONE",
2043
+ sessionId: options.context?.sessionId ||
2044
+ "unknown",
2045
+ });
2046
+ try {
2047
+ const abortedResult = {
2048
+ content: "[generation was interrupted]",
2049
+ provider: options.provider || "unknown",
2050
+ model: options.model || "unknown",
2051
+ responseTime: Date.now() - generateInternalStartTime,
2052
+ };
2053
+ await withTimeout(storeConversationTurn(this.conversationMemory, options, abortedResult, new Date(generateInternalStartTime)), 5000);
2054
+ }
2055
+ catch (storeError) {
2056
+ logger.warn(`[${functionTag}] Failed to store conversation turn after abort`, {
2057
+ error: storeError instanceof Error
2058
+ ? storeError.message
2059
+ : String(storeError),
2060
+ });
2061
+ }
2062
+ }
2063
+ else {
2064
+ logger.error(`[${functionTag}] All generation methods failed`, {
2065
+ error: error instanceof Error ? error.message : String(error),
2066
+ });
2067
+ }
1912
2068
  this.emitter.emit("response:end", "");
1913
2069
  this.emitter.emit("error", error instanceof Error ? error : new Error(String(error)));
1914
2070
  throw error;
@@ -2006,10 +2162,35 @@ Current user's request: ${currentInput}`;
2006
2162
  }
2007
2163
  }
2008
2164
  catch (error) {
2165
+ // Immediately propagate AbortError — never retry aborted requests
2166
+ if (isAbortError(error)) {
2167
+ logger.debug(`[${functionTag}] AbortError detected on attempt ${attempt}, stopping retries`);
2168
+ throw error;
2169
+ }
2009
2170
  logger.debug(`[${functionTag}] MCP generation failed on attempt ${attempt}/${maxAttempts}`, {
2010
2171
  error: error instanceof Error ? error.message : String(error),
2011
2172
  willRetry: attempt < maxAttempts,
2012
2173
  });
2174
+ // Check for non-retryable errors — skip remaining retries immediately
2175
+ // NoSuchToolError / InvalidToolArgumentsError from Vercel AI SDK are never
2176
+ // retryable — the model hallucinated a tool name or gave bad params, and
2177
+ // the same tools would be passed on every retry.
2178
+ const isToolError = error instanceof Error &&
2179
+ (error.name === "AI_NoSuchToolError" ||
2180
+ error.name === "AI_InvalidToolArgumentsError" ||
2181
+ error.message.includes("NoSuchToolError") ||
2182
+ error.message.includes("Model tried to call unavailable tool"));
2183
+ const isNonRetryable = isContextOverflowError(error) ||
2184
+ isToolError ||
2185
+ (error instanceof Error &&
2186
+ error.isRetryable ===
2187
+ false) ||
2188
+ (error instanceof Error &&
2189
+ error.statusCode === 400);
2190
+ if (isNonRetryable) {
2191
+ logger.debug(`[${functionTag}] Non-retryable error detected, skipping remaining retries`);
2192
+ break;
2193
+ }
2013
2194
  if (attempt >= maxAttempts) {
2014
2195
  logger.debug(`[${functionTag}] All MCP attempts exhausted, falling back to direct generation`);
2015
2196
  break;
@@ -2055,7 +2236,9 @@ Current user's request: ${currentInput}`;
2055
2236
  ? await getBestProvider()
2056
2237
  : options.provider;
2057
2238
  // Get available tools
2058
- const availableTools = await this.getAllAvailableTools();
2239
+ let availableTools = await this.getAllAvailableTools();
2240
+ // Apply per-call tool filtering for system prompt tool descriptions
2241
+ availableTools = this.applyToolInfoFiltering(availableTools, options);
2059
2242
  const targetTool = availableTools.find((t) => t.name.includes("SuccessRateSRByTime") ||
2060
2243
  t.name.includes("juspay-analytics"));
2061
2244
  logger.debug("Available tools for AI prompt generation", {
@@ -2070,15 +2253,51 @@ Current user's request: ${currentInput}`;
2070
2253
  }
2071
2254
  : null,
2072
2255
  });
2073
- // Create tool-aware system prompt
2074
- const enhancedSystemPrompt = this.createToolAwareSystemPrompt(options.systemPrompt, availableTools);
2256
+ // Create tool-aware system prompt (skip if skipToolPromptInjection is true)
2257
+ const enhancedSystemPrompt = options.skipToolPromptInjection
2258
+ ? options.systemPrompt || ""
2259
+ : this.createToolAwareSystemPrompt(options.systemPrompt, availableTools);
2075
2260
  logger.debug("Tool-aware system prompt created", {
2076
2261
  originalPromptLength: options.systemPrompt?.length || 0,
2077
2262
  enhancedPromptLength: enhancedSystemPrompt.length,
2263
+ skippedToolInjection: !!options.skipToolPromptInjection,
2078
2264
  enhancedPromptPreview: enhancedSystemPrompt.substring(0, 500) + "...",
2079
2265
  });
2080
2266
  // Get conversation messages for context
2081
- const conversationMessages = await getConversationMessages(this.conversationMemory, options);
2267
+ let conversationMessages = await getConversationMessages(this.conversationMemory, options);
2268
+ // Pre-generation budget check
2269
+ const budgetResult = checkContextBudget({
2270
+ provider: providerName,
2271
+ model: options.model,
2272
+ maxTokens: options.maxTokens,
2273
+ systemPrompt: enhancedSystemPrompt,
2274
+ conversationMessages: conversationMessages,
2275
+ currentPrompt: options.prompt,
2276
+ toolDefinitions: availableTools,
2277
+ });
2278
+ if (budgetResult.shouldCompact && this.conversationMemory) {
2279
+ logger.info("[NeuroLink] Context budget exceeded, triggering auto-compaction", {
2280
+ usageRatio: budgetResult.usageRatio,
2281
+ estimatedTokens: budgetResult.estimatedInputTokens,
2282
+ availableTokens: budgetResult.availableInputTokens,
2283
+ });
2284
+ const compactor = new ContextCompactor({
2285
+ provider: providerName,
2286
+ summarizationProvider: this.conversationMemoryConfig?.conversationMemory
2287
+ ?.summarizationProvider,
2288
+ summarizationModel: this.conversationMemoryConfig?.conversationMemory
2289
+ ?.summarizationModel,
2290
+ });
2291
+ const compactionResult = await compactor.compact(conversationMessages, budgetResult.availableInputTokens, this.conversationMemoryConfig?.conversationMemory);
2292
+ if (compactionResult.compacted) {
2293
+ const repairedResult = repairToolPairs(compactionResult.messages);
2294
+ conversationMessages = repairedResult.messages;
2295
+ logger.info("[NeuroLink] Context compacted successfully", {
2296
+ stagesUsed: compactionResult.stagesUsed,
2297
+ tokensSaved: compactionResult.tokensSaved,
2298
+ });
2299
+ }
2300
+ }
2082
2301
  // Create provider and generate
2083
2302
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
2084
2303
  this, // Pass SDK instance
@@ -2126,8 +2345,10 @@ Current user's request: ${currentInput}`;
2126
2345
  return {
2127
2346
  content: result.content || "", // Ensure content is never undefined
2128
2347
  provider: providerName,
2348
+ model: result.model,
2129
2349
  usage: result.usage,
2130
2350
  responseTime,
2351
+ finishReason: result.finishReason,
2131
2352
  toolsUsed: result.toolsUsed || [],
2132
2353
  toolExecutions: transformedToolExecutions,
2133
2354
  enhancedWithTools: Boolean(hasToolExecutions), // Mark as enhanced if tools were actually used
@@ -2141,6 +2362,26 @@ Current user's request: ${currentInput}`;
2141
2362
  };
2142
2363
  }
2143
2364
  catch (error) {
2365
+ // Immediately propagate AbortError — never swallow aborted requests
2366
+ if (isAbortError(error)) {
2367
+ mcpLogger.debug(`[${functionTag}] AbortError detected, rethrowing`);
2368
+ throw error;
2369
+ }
2370
+ // Propagate non-retryable errors (NoSuchToolError, InvalidToolArgumentsError)
2371
+ // so the caller's retry loop can detect them and break immediately instead
2372
+ // of retrying the same deterministic failure.
2373
+ const isToolError = error instanceof Error &&
2374
+ (error.name === "AI_NoSuchToolError" ||
2375
+ error.name === "AI_InvalidToolArgumentsError" ||
2376
+ (error.message &&
2377
+ (error.message.includes("NoSuchToolError") ||
2378
+ error.message.includes("Model tried to call unavailable tool"))));
2379
+ if (isToolError) {
2380
+ mcpLogger.warn(`[${functionTag}] Non-retryable tool error, rethrowing`, {
2381
+ error: error instanceof Error ? error.message : String(error),
2382
+ });
2383
+ throw error;
2384
+ }
2144
2385
  mcpLogger.warn(`[${functionTag}] MCP generation failed`, {
2145
2386
  error: error instanceof Error ? error.message : String(error),
2146
2387
  });
@@ -2192,8 +2433,32 @@ Current user's request: ${currentInput}`;
2192
2433
  for (const providerName of tryProviders) {
2193
2434
  try {
2194
2435
  logger.debug(`[${functionTag}] Attempting provider: ${providerName}`);
2195
- // Get conversation messages for context
2196
- const conversationMessages = await getConversationMessages(this.conversationMemory, options);
2436
+ // Get conversation messages for context (use pre-compacted if provided)
2437
+ const optionsWithMessages = options;
2438
+ let conversationMessages = optionsWithMessages.conversationMessages
2439
+ ?.length
2440
+ ? optionsWithMessages.conversationMessages
2441
+ : await getConversationMessages(this.conversationMemory, options);
2442
+ // Pre-generation budget check
2443
+ const budgetCheck = checkContextBudget({
2444
+ provider: providerName,
2445
+ model: options.model,
2446
+ maxTokens: options.maxTokens,
2447
+ systemPrompt: options.systemPrompt,
2448
+ conversationMessages: conversationMessages,
2449
+ currentPrompt: options.prompt,
2450
+ toolDefinitions: options.tools
2451
+ ? Object.values(options.tools)
2452
+ : undefined,
2453
+ });
2454
+ if (budgetCheck.shouldCompact && this.conversationMemory) {
2455
+ const compactor = new ContextCompactor({ provider: providerName });
2456
+ const compactionResult = await compactor.compact(conversationMessages, budgetCheck.availableInputTokens);
2457
+ if (compactionResult.compacted) {
2458
+ const repairedResult = repairToolPairs(compactionResult.messages);
2459
+ conversationMessages = repairedResult.messages;
2460
+ }
2461
+ }
2197
2462
  const provider = await AIProviderFactory.createProvider(providerName, options.model, !options.disableTools, // Pass disableTools as inverse of enableMCP
2198
2463
  this, // Pass SDK instance
2199
2464
  options.region);
@@ -2223,6 +2488,7 @@ Current user's request: ${currentInput}`;
2223
2488
  model: result.model,
2224
2489
  usage: result.usage,
2225
2490
  responseTime,
2491
+ finishReason: result.finishReason,
2226
2492
  toolsUsed: result.toolsUsed || [],
2227
2493
  enhancedWithTools: false,
2228
2494
  analytics: result.analytics,
@@ -2235,6 +2501,11 @@ Current user's request: ${currentInput}`;
2235
2501
  };
2236
2502
  }
2237
2503
  catch (error) {
2504
+ // Immediately propagate AbortError — never fall back to next provider on abort
2505
+ if (isAbortError(error)) {
2506
+ logger.debug(`[${functionTag}] AbortError detected on provider ${providerName}, stopping fallback`);
2507
+ throw error;
2508
+ }
2238
2509
  lastError = error instanceof Error ? error : new Error(String(error));
2239
2510
  logger.warn(`[${functionTag}] Provider ${providerName} failed`, {
2240
2511
  error: lastError.message,
@@ -2254,6 +2525,34 @@ Current user's request: ${currentInput}`;
2254
2525
  /**
2255
2526
  * Create tool-aware system prompt that informs AI about available tools
2256
2527
  */
2528
+ /**
2529
+ * Apply per-call tool filtering (whitelist/blacklist) to a ToolInfo array.
2530
+ * Used to filter the tool list before building the system prompt.
2531
+ */
2532
+ applyToolInfoFiltering(tools, options) {
2533
+ if ((!options.toolFilter || options.toolFilter.length === 0) &&
2534
+ (!options.excludeTools || options.excludeTools.length === 0)) {
2535
+ return tools;
2536
+ }
2537
+ let filtered = tools;
2538
+ if (options.toolFilter && options.toolFilter.length > 0) {
2539
+ const allowSet = new Set(options.toolFilter);
2540
+ filtered = filtered.filter((t) => allowSet.has(t.name));
2541
+ }
2542
+ if (options.excludeTools && options.excludeTools.length > 0) {
2543
+ const denySet = new Set(options.excludeTools);
2544
+ filtered = filtered.filter((t) => !denySet.has(t.name));
2545
+ }
2546
+ if (filtered.length !== tools.length) {
2547
+ logger.debug(`Tool info filtering applied for system prompt`, {
2548
+ beforeCount: tools.length,
2549
+ afterCount: filtered.length,
2550
+ toolFilter: options.toolFilter,
2551
+ excludeTools: options.excludeTools,
2552
+ });
2553
+ }
2554
+ return filtered;
2555
+ }
2257
2556
  createToolAwareSystemPrompt(originalSystemPrompt, availableTools) {
2258
2557
  // AI prompt generation with tool analysis and structured logging
2259
2558
  const promptGenerationData = {
@@ -2390,6 +2689,8 @@ Current user's request: ${currentInput}`;
2390
2689
  const hrTimeStart = process.hrtime.bigint();
2391
2690
  const streamId = `neurolink-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
2392
2691
  const originalPrompt = options.input.text; // Store the original prompt for memory storage
2692
+ // Inject file registry for lazy on-demand file processing
2693
+ options.fileRegistry = this.fileRegistry;
2393
2694
  await this.validateStreamInput(options);
2394
2695
  this.emitStreamStartEvents(options, startTime);
2395
2696
  // Check if workflow is requested
@@ -2398,171 +2699,14 @@ Current user's request: ${currentInput}`;
2398
2699
  }
2399
2700
  // Set session and user IDs from context for Langfuse spans and execute with proper async scoping
2400
2701
  return await this.setLangfuseContextFromOptions(options, async () => {
2401
- let enhancedOptions;
2402
- let factoryResult;
2403
2702
  try {
2404
- // Initialize conversation memory if needed (for lazy loading)
2405
- await this.initializeConversationMemoryForGeneration(streamId, startTime, hrTimeStart);
2406
- // Initialize MCP
2407
- await this.initializeMCP();
2408
- const _originalPrompt = options.input.text;
2409
- if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
2410
- options.context?.userId) {
2411
- try {
2412
- const mem0 = await this.ensureMem0Ready();
2413
- if (!mem0) {
2414
- // Continue without memories if mem0 is not available
2415
- logger.debug("Mem0 not available, continuing without memory retrieval");
2416
- }
2417
- else {
2418
- const memories = await mem0.search(options.input.text, {
2419
- user_id: options.context.userId,
2420
- limit: 5,
2421
- });
2422
- if (memories && memories.length > 0) {
2423
- // Enhance the input with memory context
2424
- const memoryContext = this.extractMemoryContext(memories);
2425
- options.input.text = this.formatMemoryContext(memoryContext, options.input.text);
2426
- }
2427
- }
2428
- }
2429
- catch (error) {
2430
- // Non-blocking: Log error but continue with streaming
2431
- logger.warn("Mem0 memory retrieval failed:", error);
2432
- }
2433
- }
2434
- // Apply orchestration if enabled and no specific provider/model requested
2435
- if (this.enableOrchestration && !options.provider && !options.model) {
2436
- try {
2437
- const orchestratedOptions = await this.applyStreamOrchestration(options);
2438
- logger.debug("Stream orchestration applied", {
2439
- originalProvider: options.provider || "auto",
2440
- orchestratedProvider: orchestratedOptions.provider,
2441
- orchestratedModel: orchestratedOptions.model,
2442
- prompt: options.input.text?.substring(0, 100),
2443
- });
2444
- // Use orchestrated options
2445
- Object.assign(options, orchestratedOptions);
2446
- }
2447
- catch (error) {
2448
- logger.warn("Stream orchestration failed, continuing with original options", {
2449
- error: error instanceof Error ? error.message : String(error),
2450
- originalProvider: options.provider || "auto",
2451
- });
2452
- // Continue with original options if orchestration fails
2453
- }
2454
- }
2455
- // 🔧 AUTO-DISABLE TOOLS: For Ollama models that don't support tools (same logic as generate())
2456
- // This prevents overwhelming smaller models with massive tool descriptions in the system message
2457
- if ((options.provider === "ollama" ||
2458
- options.provider?.toLowerCase().includes("ollama")) &&
2459
- !options.disableTools) {
2460
- const { ModelConfigurationManager } = await import("./core/modelConfiguration.js");
2461
- const modelConfig = ModelConfigurationManager.getInstance();
2462
- const ollamaConfig = modelConfig.getProviderConfiguration("ollama");
2463
- const toolCapableModels = ollamaConfig?.modelBehavior?.toolCapableModels || [];
2464
- // Only disable tools if we have explicit evidence the model doesn't support them
2465
- // If toolCapableModels is empty or model is not specified, don't make assumptions
2466
- const modelName = options.model;
2467
- if (toolCapableModels.length > 0 && modelName) {
2468
- const modelSupportsTools = toolCapableModels.some((capableModel) => modelName.toLowerCase().includes(capableModel.toLowerCase()));
2469
- if (!modelSupportsTools) {
2470
- options.disableTools = true;
2471
- logger.debug("Auto-disabled tools for Ollama model that doesn't support them (stream)", {
2472
- model: options.model,
2473
- toolCapableModels: toolCapableModels.slice(0, 3), // Show first 3 for brevity
2474
- });
2475
- }
2476
- }
2477
- }
2478
- // RAG Integration: If rag config is provided, prepare the RAG search tool (stream)
2479
- if (options.rag?.files?.length) {
2480
- try {
2481
- const { prepareRAGTool } = await import("./rag/ragIntegration.js");
2482
- const ragResult = await prepareRAGTool(options.rag, options.provider);
2483
- // Inject the RAG tool into the tools record
2484
- if (!options.tools) {
2485
- options.tools = {};
2486
- }
2487
- options.tools[ragResult.toolName] =
2488
- ragResult.tool;
2489
- // Inject RAG-aware system prompt so the AI uses the RAG tool first
2490
- const ragStreamInstruction = [
2491
- `\n\nIMPORTANT: You have a tool called "${ragResult.toolName}" that searches through`,
2492
- `${ragResult.filesLoaded} loaded document(s) containing ${ragResult.chunksIndexed} indexed chunks.`,
2493
- `ALWAYS use the "${ragResult.toolName}" tool FIRST to answer the user's question before using any other tools.`,
2494
- `This tool searches your local knowledge base of pre-loaded documents and is the primary source of truth.`,
2495
- `Do NOT use websearchGrounding or any web search tools when the answer can be found in the loaded documents.`,
2496
- ].join(" ");
2497
- options.systemPrompt =
2498
- (options.systemPrompt || "") + ragStreamInstruction;
2499
- logger.info("[RAG] Tool injected into stream()", {
2500
- toolName: ragResult.toolName,
2501
- filesLoaded: ragResult.filesLoaded,
2502
- chunksIndexed: ragResult.chunksIndexed,
2503
- });
2504
- }
2505
- catch (error) {
2506
- logger.warn("[RAG] Failed to prepare RAG tool, continuing without RAG", {
2507
- error: error instanceof Error ? error.message : String(error),
2508
- });
2509
- }
2510
- }
2511
- factoryResult = processStreamingFactoryOptions(options);
2512
- enhancedOptions = createCleanStreamOptions(options);
2513
- if (options.input?.text) {
2514
- const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
2515
- if (enhancedPrompt !== options.input.text) {
2516
- enhancedOptions.input.text = enhancedPrompt;
2517
- }
2518
- }
2703
+ // Prepare options: init memory, MCP, Mem0, orchestration, Ollama auto-disable, tool detection
2704
+ const { enhancedOptions, factoryResult } = await this.prepareStreamOptions(options, streamId, startTime, hrTimeStart);
2519
2705
  const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
2520
2706
  let accumulatedContent = "";
2521
2707
  let chunkCount = 0;
2522
- const eventSequence = [];
2523
- let eventSeqCounter = 0;
2524
- const captureEvent = (type, data) => {
2525
- eventSequence.push({
2526
- type,
2527
- seq: eventSeqCounter++,
2528
- timestamp: Date.now(),
2529
- ...(data && typeof data === "object" ? data : { data }),
2530
- });
2531
- };
2532
- const onResponseChunk = (...args) => {
2533
- const chunk = args[0];
2534
- captureEvent("response:chunk", { content: chunk });
2535
- };
2536
- const onToolStart = (...args) => {
2537
- const data = args[0];
2538
- captureEvent("tool:start", data);
2539
- };
2540
- const onToolEnd = (...args) => {
2541
- const data = args[0];
2542
- captureEvent("tool:end", data);
2543
- if (data.result && data.result.uiComponent === true) {
2544
- captureEvent("ui-component", {
2545
- toolName: data.toolName,
2546
- componentData: data.result,
2547
- timestamp: Date.now(),
2548
- });
2549
- }
2550
- };
2551
- const onUIComponent = (...args) => {
2552
- captureEvent("ui-component", args[0]);
2553
- };
2554
- const onHITLRequest = (...args) => {
2555
- captureEvent("hitl:confirmation-request", args[0]);
2556
- };
2557
- const onHITLResponse = (...args) => {
2558
- captureEvent("hitl:confirmation-response", args[0]);
2559
- };
2560
- this.emitter.on("response:chunk", onResponseChunk);
2561
- this.emitter.on("tool:start", onToolStart);
2562
- this.emitter.on("tool:end", onToolEnd);
2563
- this.emitter.on("ui-component", onUIComponent);
2564
- this.emitter.on("hitl:confirmation-request", onHITLRequest);
2565
- this.emitter.on("hitl:confirmation-response", onHITLResponse);
2708
+ // Set up event capture listeners
2709
+ const { eventSequence, cleanup: cleanupListeners } = this.setupStreamEventListeners();
2566
2710
  const metadata = {
2567
2711
  fallbackAttempted: false,
2568
2712
  guardrailsBlocked: false,
@@ -2582,127 +2726,30 @@ Current user's request: ${currentInput}`;
2582
2726
  yield chunk;
2583
2727
  }
2584
2728
  if (chunkCount === 0 && !metadata.fallbackAttempted) {
2585
- metadata.fallbackAttempted = true;
2586
- const errorMsg = "Stream completed with 0 chunks (possible guardrails block)";
2587
- metadata.error = errorMsg;
2588
- const fallbackRoute = ModelRouter.getFallbackRoute(originalPrompt || enhancedOptions.input.text || "", {
2589
- provider: providerName,
2590
- model: enhancedOptions.model || "gpt-4o",
2591
- reasoning: "primary failed",
2592
- confidence: 0.5,
2593
- }, { fallbackStrategy: "auto" });
2594
- logger.warn("Retrying with fallback provider", {
2595
- originalProvider: providerName,
2596
- fallbackProvider: fallbackRoute.provider,
2597
- reason: errorMsg,
2729
+ yield* self.handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, accumulatedContent, (content) => {
2730
+ accumulatedContent += content;
2598
2731
  });
2599
- try {
2600
- const fallbackProvider = await AIProviderFactory.createProvider(fallbackRoute.provider, fallbackRoute.model);
2601
- // Ensure fallback provider can execute tools
2602
- fallbackProvider.setupToolExecutor({
2603
- customTools: self.getCustomTools(),
2604
- executeTool: self.executeTool.bind(self),
2605
- }, "NeuroLink.fallbackStream");
2606
- // Get conversation messages for context (same as primary stream)
2607
- const conversationMessages = await getConversationMessages(self.conversationMemory, {
2608
- prompt: enhancedOptions.input.text,
2609
- context: enhancedOptions.context,
2610
- });
2611
- const fallbackResult = await fallbackProvider.stream({
2612
- ...enhancedOptions,
2613
- model: fallbackRoute.model,
2614
- conversationMessages,
2615
- });
2616
- let fallbackChunkCount = 0;
2617
- for await (const fallbackChunk of fallbackResult.stream) {
2618
- fallbackChunkCount++;
2619
- if (fallbackChunk &&
2620
- "content" in fallbackChunk &&
2621
- typeof fallbackChunk.content === "string") {
2622
- accumulatedContent += fallbackChunk.content;
2623
- self.emitter.emit("response:chunk", fallbackChunk.content);
2624
- }
2625
- yield fallbackChunk;
2626
- }
2627
- if (fallbackChunkCount === 0) {
2628
- throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
2629
- }
2630
- // Fallback succeeded - likely guardrails blocked primary
2631
- metadata.guardrailsBlocked = true;
2632
- }
2633
- catch (fallbackError) {
2634
- const fallbackErrorMsg = fallbackError instanceof Error
2635
- ? fallbackError.message
2636
- : String(fallbackError);
2637
- metadata.error = `${errorMsg}; Fallback failed: ${fallbackErrorMsg}`;
2638
- logger.error("Fallback provider failed", {
2639
- fallbackProvider: fallbackRoute.provider,
2640
- error: fallbackErrorMsg,
2641
- });
2642
- throw fallbackError;
2643
- }
2644
2732
  }
2645
2733
  }
2646
2734
  finally {
2647
- self.emitter.off("response:chunk", onResponseChunk);
2648
- self.emitter.off("tool:start", onToolStart);
2649
- self.emitter.off("tool:end", onToolEnd);
2650
- self.emitter.off("ui-component", onUIComponent);
2651
- self.emitter.off("hitl:confirmation-request", onHITLRequest);
2652
- self.emitter.off("hitl:confirmation-response", onHITLResponse);
2653
- // Store memory after stream consumption is complete
2654
- if (self.conversationMemory && enhancedOptions.context?.sessionId) {
2655
- const sessionId = enhancedOptions.context?.sessionId;
2656
- const userId = enhancedOptions.context?.userId;
2657
- let providerDetails;
2658
- if (enhancedOptions.model) {
2659
- providerDetails = {
2660
- provider: providerName,
2661
- model: enhancedOptions.model,
2662
- };
2663
- }
2664
- try {
2665
- await self.conversationMemory.storeConversationTurn({
2666
- sessionId,
2667
- userId,
2668
- userMessage: originalPrompt ?? "",
2669
- aiResponse: accumulatedContent,
2670
- startTimeStamp: new Date(startTime),
2671
- providerDetails,
2672
- enableSummarization: enhancedOptions.enableSummarization,
2673
- events: eventSequence.length > 0 ? eventSequence : undefined,
2674
- });
2675
- logger.debug("[NeuroLink.stream] Stored conversation turn with events", {
2676
- sessionId,
2677
- eventCount: eventSequence.length,
2678
- eventTypes: [...new Set(eventSequence.map((e) => e.type))],
2679
- });
2680
- }
2681
- catch (error) {
2682
- logger.warn("Failed to store stream conversation turn", {
2683
- error: error instanceof Error ? error.message : String(error),
2684
- });
2685
- }
2686
- }
2687
- if (self.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
2688
- enhancedOptions.context?.userId &&
2689
- accumulatedContent.trim()) {
2690
- // Non-blocking memory storage - run in background
2691
- setImmediate(async () => {
2692
- try {
2693
- const mem0 = await self.ensureMem0Ready();
2694
- if (mem0) {
2695
- await self.storeMem0ConversationTurn(mem0, originalPrompt, accumulatedContent.trim(), enhancedOptions.context?.userId, {
2696
- timestamp: new Date().toISOString(),
2697
- type: "conversation_turn_stream",
2698
- });
2699
- }
2700
- }
2701
- catch (error) {
2702
- logger.warn("Mem0 memory storage failed:", error);
2703
- }
2735
+ cleanupListeners();
2736
+ if (accumulatedContent.trim()) {
2737
+ logger.info(`[NeuroLink.stream] stream() - COMPLETE SUCCESS`, {
2738
+ provider: providerName,
2739
+ model: enhancedOptions.model,
2740
+ responseTimeMs: Date.now() - startTime,
2741
+ contentLength: accumulatedContent.length,
2742
+ fallback: metadata.fallbackAttempted,
2704
2743
  });
2705
2744
  }
2745
+ await self.storeStreamConversationMemory({
2746
+ enhancedOptions,
2747
+ providerName,
2748
+ originalPrompt,
2749
+ accumulatedContent,
2750
+ startTime,
2751
+ eventSequence,
2752
+ });
2706
2753
  }
2707
2754
  })();
2708
2755
  const streamResult = await this.processStreamResult(processedStream, enhancedOptions, factoryResult);
@@ -2725,6 +2772,294 @@ Current user's request: ${currentInput}`;
2725
2772
  }
2726
2773
  });
2727
2774
  }
2775
+ /**
2776
+ * Prepare stream options: initialize memory, MCP, Mem0 retrieval, orchestration,
2777
+ * Ollama tool auto-disable, factory processing, and tool detection.
2778
+ */
2779
+ async prepareStreamOptions(options, streamId, startTime, hrTimeStart) {
2780
+ // Initialize conversation memory if needed (for lazy loading)
2781
+ await this.initializeConversationMemoryForGeneration(streamId, startTime, hrTimeStart);
2782
+ // Initialize MCP
2783
+ await this.initializeMCP();
2784
+ if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
2785
+ options.context?.userId) {
2786
+ try {
2787
+ const mem0 = await this.ensureMem0Ready();
2788
+ if (!mem0) {
2789
+ // Continue without memories if mem0 is not available
2790
+ logger.debug("Mem0 not available, continuing without memory retrieval");
2791
+ }
2792
+ else {
2793
+ const memories = await mem0.search(options.input.text, {
2794
+ user_id: options.context.userId,
2795
+ limit: 5,
2796
+ });
2797
+ if (memories && memories.length > 0) {
2798
+ // Enhance the input with memory context
2799
+ const memoryContext = this.extractMemoryContext(memories);
2800
+ options.input.text = this.formatMemoryContext(memoryContext, options.input.text);
2801
+ }
2802
+ }
2803
+ }
2804
+ catch (error) {
2805
+ // Non-blocking: Log error but continue with streaming
2806
+ logger.warn("Mem0 memory retrieval failed:", error);
2807
+ }
2808
+ }
2809
+ // Apply orchestration if enabled and no specific provider/model requested
2810
+ if (this.enableOrchestration && !options.provider && !options.model) {
2811
+ try {
2812
+ const orchestratedOptions = await this.applyStreamOrchestration(options);
2813
+ logger.debug("Stream orchestration applied", {
2814
+ originalProvider: options.provider || "auto",
2815
+ orchestratedProvider: orchestratedOptions.provider,
2816
+ orchestratedModel: orchestratedOptions.model,
2817
+ prompt: options.input.text?.substring(0, 100),
2818
+ });
2819
+ // Use orchestrated options
2820
+ Object.assign(options, orchestratedOptions);
2821
+ }
2822
+ catch (error) {
2823
+ logger.warn("Stream orchestration failed, continuing with original options", {
2824
+ error: error instanceof Error ? error.message : String(error),
2825
+ originalProvider: options.provider || "auto",
2826
+ });
2827
+ // Continue with original options if orchestration fails
2828
+ }
2829
+ }
2830
+ // Auto-disable tools for Ollama models that don't support them
2831
+ await this.autoDisableOllamaStreamTools(options);
2832
+ const factoryResult = processStreamingFactoryOptions(options);
2833
+ const enhancedOptions = createCleanStreamOptions(options);
2834
+ if (options.input?.text) {
2835
+ const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
2836
+ if (enhancedPrompt !== options.input.text) {
2837
+ enhancedOptions.input.text = enhancedPrompt;
2838
+ }
2839
+ }
2840
+ return { enhancedOptions, factoryResult };
2841
+ }
2842
+ /**
2843
+ * Auto-disable tools for Ollama models that don't support them (stream mode).
2844
+ * Prevents overwhelming smaller models with massive tool descriptions in the system message.
2845
+ */
2846
+ async autoDisableOllamaStreamTools(options) {
2847
+ if ((options.provider === "ollama" ||
2848
+ options.provider?.toLowerCase().includes("ollama")) &&
2849
+ !options.disableTools) {
2850
+ const { ModelConfigurationManager } = await import("./core/modelConfiguration.js");
2851
+ const modelConfig = ModelConfigurationManager.getInstance();
2852
+ const ollamaConfig = modelConfig.getProviderConfiguration("ollama");
2853
+ const toolCapableModels = ollamaConfig?.modelBehavior?.toolCapableModels || [];
2854
+ // Only disable tools if we have explicit evidence the model doesn't support them
2855
+ // If toolCapableModels is empty or model is not specified, don't make assumptions
2856
+ const modelName = options.model;
2857
+ if (toolCapableModels.length > 0 && modelName) {
2858
+ const modelSupportsTools = toolCapableModels.some((capableModel) => modelName.toLowerCase().includes(capableModel.toLowerCase()));
2859
+ if (!modelSupportsTools) {
2860
+ options.disableTools = true;
2861
+ logger.debug("Auto-disabled tools for Ollama model that doesn't support them (stream)", {
2862
+ model: options.model,
2863
+ toolCapableModels: toolCapableModels.slice(0, 3), // Show first 3 for brevity
2864
+ });
2865
+ }
2866
+ }
2867
+ }
2868
+ }
2869
+ /**
2870
+ * Set up event listeners for stream event capture (tool calls, HITL, UI components).
2871
+ * Returns the shared event sequence array and a cleanup function to remove all listeners.
2872
+ */
2873
+ setupStreamEventListeners() {
2874
+ const eventSequence = [];
2875
+ let eventSeqCounter = 0;
2876
+ const captureEvent = (type, data) => {
2877
+ eventSequence.push({
2878
+ type,
2879
+ seq: eventSeqCounter++,
2880
+ timestamp: Date.now(),
2881
+ ...(data && typeof data === "object" ? data : { data }),
2882
+ });
2883
+ };
2884
+ const onResponseChunk = (...args) => {
2885
+ const chunk = args[0];
2886
+ captureEvent("response:chunk", { content: chunk });
2887
+ };
2888
+ const onToolStart = (...args) => {
2889
+ const data = args[0];
2890
+ captureEvent("tool:start", data);
2891
+ };
2892
+ const onToolEnd = (...args) => {
2893
+ const data = args[0];
2894
+ captureEvent("tool:end", data);
2895
+ if (data.result && data.result.uiComponent === true) {
2896
+ captureEvent("ui-component", {
2897
+ toolName: data.toolName,
2898
+ componentData: data.result,
2899
+ timestamp: Date.now(),
2900
+ });
2901
+ }
2902
+ };
2903
+ const onUIComponent = (...args) => {
2904
+ captureEvent("ui-component", args[0]);
2905
+ };
2906
+ const onHITLRequest = (...args) => {
2907
+ captureEvent("hitl:confirmation-request", args[0]);
2908
+ };
2909
+ const onHITLResponse = (...args) => {
2910
+ captureEvent("hitl:confirmation-response", args[0]);
2911
+ };
2912
+ this.emitter.on("response:chunk", onResponseChunk);
2913
+ this.emitter.on("tool:start", onToolStart);
2914
+ this.emitter.on("tool:end", onToolEnd);
2915
+ this.emitter.on("ui-component", onUIComponent);
2916
+ this.emitter.on("hitl:confirmation-request", onHITLRequest);
2917
+ this.emitter.on("hitl:confirmation-response", onHITLResponse);
2918
+ const cleanup = () => {
2919
+ this.emitter.off("response:chunk", onResponseChunk);
2920
+ this.emitter.off("tool:start", onToolStart);
2921
+ this.emitter.off("tool:end", onToolEnd);
2922
+ this.emitter.off("ui-component", onUIComponent);
2923
+ this.emitter.off("hitl:confirmation-request", onHITLRequest);
2924
+ this.emitter.off("hitl:confirmation-response", onHITLResponse);
2925
+ };
2926
+ return { eventSequence, cleanup };
2927
+ }
2928
+ /**
2929
+ * Handle fallback when the primary stream returns 0 chunks.
2930
+ * Yields chunks from a fallback provider and updates metadata accordingly.
2931
+ */
2932
+ async *handleStreamFallback(metadata, originalPrompt, enhancedOptions, providerName, _accumulatedContent, appendContent) {
2933
+ metadata.fallbackAttempted = true;
2934
+ const errorMsg = "Stream completed with 0 chunks (possible guardrails block)";
2935
+ metadata.error = errorMsg;
2936
+ const fallbackRoute = ModelRouter.getFallbackRoute(originalPrompt || enhancedOptions.input.text || "", {
2937
+ provider: providerName,
2938
+ model: enhancedOptions.model || "gpt-4o",
2939
+ reasoning: "primary failed",
2940
+ confidence: 0.5,
2941
+ }, { fallbackStrategy: "auto" });
2942
+ logger.warn("Retrying with fallback provider", {
2943
+ originalProvider: providerName,
2944
+ fallbackProvider: fallbackRoute.provider,
2945
+ reason: errorMsg,
2946
+ });
2947
+ try {
2948
+ const fallbackProvider = await AIProviderFactory.createProvider(fallbackRoute.provider, fallbackRoute.model);
2949
+ // Ensure fallback provider can execute tools
2950
+ fallbackProvider.setupToolExecutor({
2951
+ customTools: this.getCustomTools(),
2952
+ executeTool: this.executeTool.bind(this),
2953
+ }, "NeuroLink.fallbackStream");
2954
+ // Get conversation messages for context (same as primary stream)
2955
+ const conversationMessages = await getConversationMessages(this.conversationMemory, {
2956
+ prompt: enhancedOptions.input.text,
2957
+ context: enhancedOptions.context,
2958
+ });
2959
+ const fallbackResult = await fallbackProvider.stream({
2960
+ ...enhancedOptions,
2961
+ model: fallbackRoute.model,
2962
+ conversationMessages,
2963
+ });
2964
+ let fallbackChunkCount = 0;
2965
+ for await (const fallbackChunk of fallbackResult.stream) {
2966
+ fallbackChunkCount++;
2967
+ if (fallbackChunk &&
2968
+ "content" in fallbackChunk &&
2969
+ typeof fallbackChunk.content === "string") {
2970
+ appendContent(fallbackChunk.content);
2971
+ this.emitter.emit("response:chunk", fallbackChunk.content);
2972
+ }
2973
+ yield fallbackChunk;
2974
+ }
2975
+ if (fallbackChunkCount === 0) {
2976
+ throw new Error(`Fallback provider ${fallbackRoute.provider} also returned 0 chunks`);
2977
+ }
2978
+ // Fallback succeeded - likely guardrails blocked primary
2979
+ metadata.guardrailsBlocked = true;
2980
+ }
2981
+ catch (fallbackError) {
2982
+ const fallbackErrorMsg = fallbackError instanceof Error
2983
+ ? fallbackError.message
2984
+ : String(fallbackError);
2985
+ metadata.error = `${errorMsg}; Fallback failed: ${fallbackErrorMsg}`;
2986
+ logger.error("Fallback provider failed", {
2987
+ fallbackProvider: fallbackRoute.provider,
2988
+ error: fallbackErrorMsg,
2989
+ });
2990
+ throw fallbackError;
2991
+ }
2992
+ }
2993
+ /**
2994
+ * Store conversation memory after stream consumption is complete (called from finally block).
2995
+ * Handles both conversation memory storage and Mem0 background storage.
2996
+ */
2997
+ async storeStreamConversationMemory(params) {
2998
+ const { enhancedOptions, providerName, originalPrompt, accumulatedContent, startTime, eventSequence, } = params;
2999
+ // Guard: skip storing if no meaningful content was produced (no text AND no tool activity)
3000
+ const hasToolEvents = eventSequence.some((e) => e.type === "tool:start" || e.type === "tool:end");
3001
+ if (!accumulatedContent.trim() && !hasToolEvents) {
3002
+ logger.warn("[NeuroLink.stream] Skipping conversation turn storage — no text content or tool activity", {
3003
+ sessionId: enhancedOptions.context
3004
+ ?.sessionId,
3005
+ });
3006
+ return;
3007
+ }
3008
+ // Store memory after stream consumption is complete
3009
+ if (this.conversationMemory && enhancedOptions.context?.sessionId) {
3010
+ const sessionId = enhancedOptions.context
3011
+ ?.sessionId;
3012
+ const userId = enhancedOptions.context
3013
+ ?.userId;
3014
+ let providerDetails;
3015
+ if (enhancedOptions.model) {
3016
+ providerDetails = {
3017
+ provider: providerName,
3018
+ model: enhancedOptions.model,
3019
+ };
3020
+ }
3021
+ try {
3022
+ await this.conversationMemory.storeConversationTurn({
3023
+ sessionId,
3024
+ userId,
3025
+ userMessage: originalPrompt ?? "",
3026
+ aiResponse: accumulatedContent,
3027
+ startTimeStamp: new Date(startTime),
3028
+ providerDetails,
3029
+ enableSummarization: enhancedOptions.enableSummarization,
3030
+ events: eventSequence.length > 0 ? eventSequence : undefined,
3031
+ });
3032
+ logger.debug("[NeuroLink.stream] Stored conversation turn with events", {
3033
+ sessionId,
3034
+ eventCount: eventSequence.length,
3035
+ eventTypes: [...new Set(eventSequence.map((e) => e.type))],
3036
+ });
3037
+ }
3038
+ catch (error) {
3039
+ logger.warn("Failed to store stream conversation turn", {
3040
+ error: error instanceof Error ? error.message : String(error),
3041
+ });
3042
+ }
3043
+ }
3044
+ if (this.conversationMemoryConfig?.conversationMemory?.mem0Enabled &&
3045
+ enhancedOptions.context?.userId &&
3046
+ accumulatedContent.trim()) {
3047
+ setImmediate(async () => {
3048
+ try {
3049
+ const mem0 = await this.ensureMem0Ready();
3050
+ if (mem0) {
3051
+ await this.storeMem0ConversationTurn(mem0, originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, {
3052
+ timestamp: new Date().toISOString(),
3053
+ type: "conversation_turn_stream",
3054
+ });
3055
+ }
3056
+ }
3057
+ catch (error) {
3058
+ logger.warn("Mem0 memory storage failed:", error);
3059
+ }
3060
+ });
3061
+ }
3062
+ }
2728
3063
  /**
2729
3064
  * Validate stream input with comprehensive error reporting
2730
3065
  */
@@ -2772,14 +3107,37 @@ Current user's request: ${currentInput}`;
2772
3107
  }, "NeuroLink.createMCPStream");
2773
3108
  // 🔧 FIX: Get available tools and create tool-aware system prompt
2774
3109
  // Use SAME pattern as tryMCPGeneration (generate mode)
2775
- const availableTools = await this.getAllAvailableTools();
2776
- const enhancedSystemPrompt = this.createToolAwareSystemPrompt(options.systemPrompt, availableTools);
3110
+ let availableTools = await this.getAllAvailableTools();
3111
+ // Apply per-call tool filtering for system prompt tool descriptions
3112
+ availableTools = this.applyToolInfoFiltering(availableTools, options);
3113
+ // Skip tool prompt injection if skipToolPromptInjection is true
3114
+ const enhancedSystemPrompt = options.skipToolPromptInjection
3115
+ ? options.systemPrompt || ""
3116
+ : this.createToolAwareSystemPrompt(options.systemPrompt, availableTools);
2777
3117
  // Get conversation messages for context
2778
- const conversationMessages = await getConversationMessages(this.conversationMemory, {
3118
+ let conversationMessages = await getConversationMessages(this.conversationMemory, {
2779
3119
  ...options,
2780
3120
  prompt: options.input.text,
2781
3121
  context: options.context,
2782
3122
  });
3123
+ // Pre-generation budget check for streaming
3124
+ const streamBudget = checkContextBudget({
3125
+ provider: providerName,
3126
+ model: options.model,
3127
+ maxTokens: options.maxTokens,
3128
+ systemPrompt: enhancedSystemPrompt,
3129
+ conversationMessages: conversationMessages,
3130
+ currentPrompt: options.input.text,
3131
+ toolDefinitions: availableTools,
3132
+ });
3133
+ if (streamBudget.shouldCompact && this.conversationMemory) {
3134
+ const compactor = new ContextCompactor({ provider: providerName });
3135
+ const compactionResult = await compactor.compact(conversationMessages, streamBudget.availableInputTokens);
3136
+ if (compactionResult.compacted) {
3137
+ const repairedResult = repairToolPairs(compactionResult.messages);
3138
+ conversationMessages = repairedResult.messages;
3139
+ }
3140
+ }
2783
3141
  // 🔧 FIX: Pass enhanced system prompt to real streaming
2784
3142
  // Tools will be accessed through the streamText call in executeStream
2785
3143
  const streamResult = await provider.stream({
@@ -2876,8 +3234,19 @@ Current user's request: ${currentInput}`;
2876
3234
  }
2877
3235
  }
2878
3236
  finally {
3237
+ if (fallbackAccumulatedContent.trim()) {
3238
+ logger.info(`[NeuroLink.handleStreamError] stream() - COMPLETE SUCCESS (fallback)`, {
3239
+ provider: providerName,
3240
+ model: options.model,
3241
+ responseTimeMs: Date.now() - startTime,
3242
+ contentLength: fallbackAccumulatedContent.length,
3243
+ });
3244
+ }
2879
3245
  // Store memory after fallback stream consumption is complete
2880
- if (self.conversationMemory && enhancedOptions?.context?.sessionId) {
3246
+ // Guard: skip storing if fallback accumulated content is empty
3247
+ if (self.conversationMemory &&
3248
+ enhancedOptions?.context?.sessionId &&
3249
+ fallbackAccumulatedContent.trim()) {
2881
3250
  const sessionId = enhancedOptions?.context?.sessionId;
2882
3251
  const userId = enhancedOptions?.context
2883
3252
  ?.userId;
@@ -3436,6 +3805,36 @@ Current user's request: ${currentInput}`;
3436
3805
  },
3437
3806
  });
3438
3807
  }
3808
+ // Inject file reference tools so they reach the Vercel AI SDK's tools parameter.
3809
+ // These tools are bound to this.fileRegistry and allow the LLM to read/search
3810
+ // files on demand instead of having all file content dumped into the prompt.
3811
+ //
3812
+ // createFileTools() returns Vercel AI SDK tool() objects with Zod `parameters`.
3813
+ // We pass `parameters` as `inputSchema` so processCustomTools() in ToolsManager
3814
+ // recognises it as a Zod schema (priority 2) and serialises it correctly for
3815
+ // every provider — including Vertex AI which rejects bare `{}` schemas.
3816
+ // Cache to avoid redundant allocations per generate/stream call (FRT-6).
3817
+ if (!this.cachedFileTools) {
3818
+ this.cachedFileTools = createFileTools(this.fileRegistry);
3819
+ }
3820
+ const fileTools = this.cachedFileTools;
3821
+ for (const [toolName, toolDef] of Object.entries(fileTools)) {
3822
+ if (!toolMap.has(toolName)) {
3823
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3824
+ const toolParams = toolDef.parameters;
3825
+ toolMap.set(toolName, {
3826
+ name: toolName,
3827
+ description: toolDef.description || `File tool: ${toolName}`,
3828
+ inputSchema: toolParams ?? { type: "object", properties: {} },
3829
+ execute: async (params) => {
3830
+ return await toolDef.execute(params, {
3831
+ toolCallId: `file-tool-${Date.now()}`,
3832
+ messages: [],
3833
+ });
3834
+ },
3835
+ });
3836
+ }
3837
+ }
3439
3838
  return toolMap;
3440
3839
  }
3441
3840
  /**
@@ -5012,6 +5411,69 @@ Current user's request: ${currentInput}`;
5012
5411
  getToolRegistry() {
5013
5412
  return this.toolRegistry;
5014
5413
  }
5414
+ /**
5415
+ * Manually trigger context compaction for a session.
5416
+ * Runs the full 4-stage compaction pipeline.
5417
+ */
5418
+ async compactSession(sessionId, config) {
5419
+ if (!this.conversationMemory) {
5420
+ return null;
5421
+ }
5422
+ const messages = await this.conversationMemory.buildContextMessages(sessionId);
5423
+ if (!messages || messages.length === 0) {
5424
+ return null;
5425
+ }
5426
+ const compactor = new ContextCompactor(config);
5427
+ const targetTokens = Math.floor(messages.length * 100); // Rough target
5428
+ const result = await compactor.compact(messages, targetTokens, this.conversationMemoryConfig?.conversationMemory);
5429
+ if (result.compacted) {
5430
+ repairToolPairs(result.messages);
5431
+ }
5432
+ return result;
5433
+ }
5434
+ /**
5435
+ * Get context usage statistics for a session.
5436
+ * Returns token counts, usage ratio, and breakdown by category.
5437
+ */
5438
+ async getContextStats(sessionId, provider, model) {
5439
+ if (!this.conversationMemory) {
5440
+ return null;
5441
+ }
5442
+ const messages = await this.conversationMemory.buildContextMessages(sessionId);
5443
+ if (!messages || messages.length === 0) {
5444
+ return null;
5445
+ }
5446
+ const budgetResult = checkContextBudget({
5447
+ provider: provider || "openai",
5448
+ model,
5449
+ conversationMessages: messages,
5450
+ });
5451
+ return {
5452
+ estimatedInputTokens: budgetResult.estimatedInputTokens,
5453
+ availableInputTokens: budgetResult.availableInputTokens,
5454
+ usageRatio: budgetResult.usageRatio,
5455
+ shouldCompact: budgetResult.shouldCompact,
5456
+ messageCount: messages.length,
5457
+ };
5458
+ }
5459
+ /**
5460
+ * Check if a session needs compaction.
5461
+ */
5462
+ needsCompaction(sessionId, provider, model) {
5463
+ if (!this.conversationMemory) {
5464
+ return false;
5465
+ }
5466
+ const session = this.conversationMemory.getSession?.(sessionId);
5467
+ if (!session) {
5468
+ return false;
5469
+ }
5470
+ const budgetResult = checkContextBudget({
5471
+ provider: provider || "openai",
5472
+ model,
5473
+ conversationMessages: session.messages,
5474
+ });
5475
+ return budgetResult.shouldCompact;
5476
+ }
5015
5477
  /**
5016
5478
  * Get the external server manager instance
5017
5479
  * Used internally by server adapters for external MCP server management