@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,756 @@
1
+ /**
2
+ * RAG CLI Commands for NeuroLink
3
+ *
4
+ * Implements commands for RAG document processing:
5
+ * - neurolink rag chunk <file> - Chunk a document
6
+ * - neurolink rag index <file> - Index a document for retrieval
7
+ * - neurolink rag query <query> - Query indexed documents
8
+ */
9
+ import chalk from "chalk";
10
+ import { existsSync } from "fs";
11
+ import { readFile, writeFile } from "fs/promises";
12
+ import ora from "ora";
13
+ import { basename, extname, resolve } from "path";
14
+ import { ProviderFactory } from "../../lib/factories/providerFactory.js";
15
+ import { ProviderRegistry } from "../../lib/factories/providerRegistry.js";
16
+ import { ChunkerRegistry } from "../../lib/rag/chunking/chunkerRegistry.js";
17
+ import { GraphRAG } from "../../lib/rag/graphRag/graphRAG.js";
18
+ import { LLMMetadataExtractor } from "../../lib/rag/metadata/metadataExtractor.js";
19
+ import { createHybridSearch, InMemoryBM25Index, } from "../../lib/rag/retrieval/hybridSearch.js";
20
+ import { InMemoryVectorStore } from "../../lib/rag/retrieval/vectorQueryTool.js";
21
+ import { globalSession } from "../../lib/session/globalSessionState.js";
22
+ import { logger } from "../../lib/utils/logger.js";
23
+ import { getBestProvider } from "../../lib/utils/providerUtils.js";
24
+ /**
25
+ * Ensure the NeuroLink SDK is initialized (which registers all providers)
26
+ * This follows the same pattern as the 'generate' command
27
+ */
28
+ async function ensureSDKInitialized() {
29
+ // Getting or creating the NeuroLink instance ensures proper SDK initialization
30
+ // This registers all providers via the ProviderRegistry
31
+ globalSession.getOrCreateNeuroLink();
32
+ // Also ensure providers are registered (belt and suspenders approach)
33
+ if (!ProviderRegistry.isRegistered()) {
34
+ await ProviderRegistry.registerAllProviders();
35
+ }
36
+ }
37
+ /**
38
+ * Default embedding models for each provider
39
+ * These are dedicated embedding models that support the embed() method
40
+ */
41
+ const DEFAULT_EMBEDDING_MODELS = {
42
+ vertex: "text-embedding-004",
43
+ google: "text-embedding-004",
44
+ "google-vertex": "text-embedding-004",
45
+ openai: "text-embedding-3-small",
46
+ azure: "text-embedding-3-small",
47
+ "azure-openai": "text-embedding-3-small",
48
+ bedrock: "amazon.titan-embed-text-v2:0",
49
+ "amazon-bedrock": "amazon.titan-embed-text-v2:0",
50
+ };
51
+ /**
52
+ * Provider-specific embedding model environment variables
53
+ * Maps provider names to their embedding model env var names
54
+ */
55
+ const EMBEDDING_ENV_VARS = {
56
+ vertex: ["VERTEX_EMBEDDING_MODEL", "GOOGLE_EMBEDDING_MODEL"],
57
+ google: ["GOOGLE_EMBEDDING_MODEL", "VERTEX_EMBEDDING_MODEL"],
58
+ "google-vertex": ["VERTEX_EMBEDDING_MODEL", "GOOGLE_EMBEDDING_MODEL"],
59
+ openai: ["OPENAI_EMBEDDING_MODEL"],
60
+ azure: ["AZURE_EMBEDDING_MODEL", "AZURE_OPENAI_EMBEDDING_MODEL"],
61
+ "azure-openai": ["AZURE_OPENAI_EMBEDDING_MODEL", "AZURE_EMBEDDING_MODEL"],
62
+ bedrock: ["BEDROCK_EMBEDDING_MODEL", "AWS_EMBEDDING_MODEL"],
63
+ "amazon-bedrock": ["BEDROCK_EMBEDDING_MODEL", "AWS_EMBEDDING_MODEL"],
64
+ };
65
+ /**
66
+ * Provider-specific default model environment variables (for generation)
67
+ * Used to check if user has set an embedding model in these vars
68
+ */
69
+ const PROVIDER_MODEL_ENV_VARS = {
70
+ vertex: ["VERTEX_MODEL"],
71
+ google: ["GOOGLE_AI_MODEL"],
72
+ "google-vertex": ["VERTEX_MODEL"],
73
+ openai: ["OPENAI_MODEL"],
74
+ azure: ["AZURE_OPENAI_MODEL"],
75
+ "azure-openai": ["AZURE_OPENAI_MODEL"],
76
+ bedrock: ["BEDROCK_MODEL", "BEDROCK_MODEL_ID"],
77
+ "amazon-bedrock": ["BEDROCK_MODEL", "BEDROCK_MODEL_ID"],
78
+ };
79
+ /**
80
+ * Check if a model name is an embedding model
81
+ */
82
+ function isEmbeddingModel(modelName) {
83
+ const embeddingPatterns = [
84
+ /embed/i,
85
+ /text-embedding/i,
86
+ /titan-embed/i,
87
+ /gecko/i,
88
+ ];
89
+ return embeddingPatterns.some((pattern) => pattern.test(modelName));
90
+ }
91
+ /**
92
+ * Get the appropriate embedding model for a provider
93
+ *
94
+ * Resolution order:
95
+ * 1. CLI --model flag (if it's an embedding model)
96
+ * 2. NEUROLINK_EMBEDDING_MODEL env var
97
+ * 3. Provider-specific embedding env vars (e.g., VERTEX_EMBEDDING_MODEL)
98
+ * 4. Provider's default model env var (if it's an embedding model)
99
+ * 5. Provider-specific default embedding model
100
+ * 6. Fallback to OpenAI text-embedding-3-small
101
+ */
102
+ async function getEmbeddingModel(provider, model) {
103
+ // Resolve provider using the same logic as generate/stream commands
104
+ // This automatically detects available providers and falls back appropriately
105
+ let resolvedProvider;
106
+ if (provider) {
107
+ // User explicitly specified a provider
108
+ resolvedProvider = provider;
109
+ }
110
+ else {
111
+ // Use getBestProvider() to automatically detect the best available provider
112
+ // This is the same logic used by generate/stream commands
113
+ try {
114
+ resolvedProvider = await getBestProvider();
115
+ logger.debug(`Auto-detected best available provider: ${resolvedProvider}`);
116
+ }
117
+ catch {
118
+ // If no provider is available at all, throw a helpful error
119
+ throw new Error(`No AI providers available for embeddings. Please configure at least one provider:\n` +
120
+ ` - OpenAI: Set OPENAI_API_KEY\n` +
121
+ ` - Google Vertex: Set GOOGLE_CLOUD_PROJECT_ID and authenticate with gcloud\n` +
122
+ ` - Amazon Bedrock: Configure AWS credentials\n` +
123
+ `Or specify a provider explicitly with --provider`);
124
+ }
125
+ }
126
+ const normalizedProvider = resolvedProvider.toLowerCase();
127
+ // Priority 1: CLI --model flag (if it's an embedding model)
128
+ if (model && isEmbeddingModel(model)) {
129
+ logger.debug(`Using CLI-provided embedding model: ${model}`);
130
+ return { provider: resolvedProvider, model };
131
+ }
132
+ // Priority 2: Global NEUROLINK_EMBEDDING_MODEL env var
133
+ const globalEmbeddingModel = process.env.NEUROLINK_EMBEDDING_MODEL;
134
+ if (globalEmbeddingModel) {
135
+ logger.debug(`Using NEUROLINK_EMBEDDING_MODEL: ${globalEmbeddingModel}`);
136
+ return { provider: resolvedProvider, model: globalEmbeddingModel };
137
+ }
138
+ // Priority 3: Provider-specific embedding env vars
139
+ const embeddingEnvVars = EMBEDDING_ENV_VARS[normalizedProvider];
140
+ if (embeddingEnvVars) {
141
+ for (const envVar of embeddingEnvVars) {
142
+ const envModel = process.env[envVar];
143
+ if (envModel) {
144
+ logger.debug(`Using ${envVar}: ${envModel}`);
145
+ return { provider: resolvedProvider, model: envModel };
146
+ }
147
+ }
148
+ }
149
+ // Priority 4: Check if provider's default model is an embedding model
150
+ const providerModelEnvVars = PROVIDER_MODEL_ENV_VARS[normalizedProvider];
151
+ if (providerModelEnvVars) {
152
+ for (const envVar of providerModelEnvVars) {
153
+ const envModel = process.env[envVar];
154
+ if (envModel && isEmbeddingModel(envModel)) {
155
+ logger.debug(`Using ${envVar} (detected as embedding model): ${envModel}`);
156
+ return { provider: resolvedProvider, model: envModel };
157
+ }
158
+ }
159
+ }
160
+ // Priority 5: Provider-specific default embedding model
161
+ const defaultEmbeddingModel = DEFAULT_EMBEDDING_MODELS[normalizedProvider];
162
+ if (defaultEmbeddingModel) {
163
+ logger.debug(`Using default embedding model for ${resolvedProvider}: ${defaultEmbeddingModel}`);
164
+ return { provider: resolvedProvider, model: defaultEmbeddingModel };
165
+ }
166
+ // Priority 6: Fallback to OpenAI's embedding model if provider not found
167
+ logger.warn(`No default embedding model for provider ${resolvedProvider}, falling back to OpenAI text-embedding-3-small`);
168
+ return { provider: "openai", model: "text-embedding-3-small" };
169
+ }
170
+ /**
171
+ * In-memory storage for indexed documents
172
+ * In production, this would be persisted to a vector database
173
+ */
174
+ const indexedDocuments = new Map();
175
+ /**
176
+ * Detect document type from file extension
177
+ */
178
+ function detectDocumentType(filePath) {
179
+ const ext = extname(filePath).toLowerCase();
180
+ const typeMap = {
181
+ ".md": "markdown",
182
+ ".markdown": "markdown",
183
+ ".html": "html",
184
+ ".htm": "html",
185
+ ".json": "json",
186
+ ".tex": "latex",
187
+ ".latex": "latex",
188
+ ".txt": "recursive",
189
+ ".csv": "recursive",
190
+ ".pdf": "recursive",
191
+ };
192
+ return typeMap[ext] || "recursive";
193
+ }
194
+ /**
195
+ * Format chunks for display
196
+ */
197
+ function formatChunks(chunks, format) {
198
+ if (format === "json") {
199
+ return JSON.stringify(chunks, null, 2);
200
+ }
201
+ if (format === "table") {
202
+ const rows = chunks.map((chunk, i) => ({
203
+ "#": i + 1,
204
+ ID: chunk.id.slice(0, 8),
205
+ Length: chunk.text.length,
206
+ Preview: chunk.text.slice(0, 50).replace(/\n/g, " ") + "...",
207
+ }));
208
+ // Simple table formatting
209
+ const headers = Object.keys(rows[0] || {});
210
+ const colWidths = headers.map((h) => Math.max(h.length, ...rows.map((r) => String(r[h]).length)));
211
+ let output = headers.map((h, i) => h.padEnd(colWidths[i])).join(" | ") + "\n";
212
+ output += colWidths.map((w) => "-".repeat(w)).join("-+-") + "\n";
213
+ output += rows
214
+ .map((row) => headers
215
+ .map((h, i) => String(row[h]).padEnd(colWidths[i]))
216
+ .join(" | "))
217
+ .join("\n");
218
+ return output;
219
+ }
220
+ // Default text format
221
+ return chunks
222
+ .map((chunk, i) => `--- Chunk ${i + 1} (${chunk.text.length} chars) ---\n${chunk.text}\n`)
223
+ .join("\n");
224
+ }
225
+ /**
226
+ * Create the chunk subcommand
227
+ */
228
+ function createChunkCommand() {
229
+ return {
230
+ command: "chunk <file>",
231
+ describe: "Chunk a document into smaller pieces for processing",
232
+ builder: (yargs) => yargs
233
+ .positional("file", {
234
+ describe: "Path to the file to chunk",
235
+ type: "string",
236
+ demandOption: true,
237
+ })
238
+ .option("strategy", {
239
+ alias: "s",
240
+ describe: "Chunking strategy to use",
241
+ choices: [
242
+ "character",
243
+ "recursive",
244
+ "sentence",
245
+ "token",
246
+ "markdown",
247
+ "html",
248
+ "json",
249
+ "latex",
250
+ "semantic",
251
+ "semantic-markdown",
252
+ ],
253
+ type: "string",
254
+ })
255
+ .option("maxSize", {
256
+ alias: "m",
257
+ describe: "Maximum chunk size",
258
+ type: "number",
259
+ default: 1000,
260
+ })
261
+ .option("overlap", {
262
+ alias: "o",
263
+ describe: "Overlap between chunks",
264
+ type: "number",
265
+ default: 200,
266
+ })
267
+ .option("format", {
268
+ alias: "f",
269
+ describe: "Output format",
270
+ choices: ["json", "text", "table"],
271
+ default: "text",
272
+ })
273
+ .option("output", {
274
+ describe: "Output file path (optional)",
275
+ type: "string",
276
+ })
277
+ .option("extract", {
278
+ alias: "e",
279
+ describe: "Extract metadata (title, summary, keywords)",
280
+ type: "boolean",
281
+ default: false,
282
+ })
283
+ .option("provider", {
284
+ alias: "p",
285
+ describe: "Provider for semantic chunking/metadata extraction (uses default from config/env if not specified)",
286
+ type: "string",
287
+ })
288
+ .option("model", {
289
+ describe: "Model for semantic chunking/metadata extraction (uses default from config/env if not specified)",
290
+ type: "string",
291
+ })
292
+ .option("verbose", {
293
+ alias: "v",
294
+ describe: "Enable verbose output",
295
+ type: "boolean",
296
+ default: false,
297
+ }),
298
+ handler: async (args) => {
299
+ const spinner = ora("Processing document...").start();
300
+ try {
301
+ // Validate file exists
302
+ const filePath = resolve(args.file);
303
+ if (!existsSync(filePath)) {
304
+ spinner.fail(chalk.red(`File not found: ${filePath}`));
305
+ process.exit(1);
306
+ }
307
+ // Read file content
308
+ const content = await readFile(filePath, "utf-8");
309
+ const fileName = basename(filePath);
310
+ // Determine strategy
311
+ const strategy = args.strategy || detectDocumentType(filePath);
312
+ spinner.text = `Chunking with ${strategy} strategy...`;
313
+ // Validate chunk parameters
314
+ const maxSize = args.maxSize ?? 1000;
315
+ const overlap = args.overlap ?? 200;
316
+ if (maxSize <= 0) {
317
+ spinner.fail(chalk.red("maxSize must be greater than 0"));
318
+ process.exit(1);
319
+ }
320
+ if (overlap >= maxSize) {
321
+ spinner.fail(chalk.red("overlap must be less than maxSize"));
322
+ process.exit(1);
323
+ }
324
+ // Get chunker and chunk the document
325
+ const chunker = ChunkerRegistry.get(strategy);
326
+ const chunks = await chunker.chunk(content, {
327
+ maxSize,
328
+ overlap,
329
+ metadata: { source: fileName },
330
+ });
331
+ spinner.succeed(chalk.green(`Created ${chunks.length} chunks from ${fileName}`));
332
+ // Extract metadata if requested
333
+ if (args.extract) {
334
+ // Ensure providers are registered for metadata extraction
335
+ await ensureSDKInitialized();
336
+ spinner.start("Extracting metadata...");
337
+ const extractor = new LLMMetadataExtractor({
338
+ provider: args.provider,
339
+ modelName: args.model,
340
+ });
341
+ const results = await extractor.extract(chunks, {
342
+ title: true,
343
+ summary: true,
344
+ keywords: true,
345
+ });
346
+ // Merge metadata into chunks
347
+ for (let i = 0; i < chunks.length && i < results.length; i++) {
348
+ const result = results[i];
349
+ if (result.title) {
350
+ chunks[i].metadata.title = result.title;
351
+ }
352
+ if (result.summary) {
353
+ chunks[i].metadata.summary = result.summary;
354
+ }
355
+ if (result.keywords) {
356
+ chunks[i].metadata.keywords = result.keywords;
357
+ }
358
+ }
359
+ spinner.succeed(chalk.green("Metadata extracted"));
360
+ }
361
+ // Format output
362
+ const output = formatChunks(chunks, args.format || "text");
363
+ // Write to file or stdout
364
+ if (args.output) {
365
+ await writeFile(args.output, output, "utf-8");
366
+ logger.always(chalk.green(`Output written to ${args.output}`));
367
+ }
368
+ else {
369
+ logger.always("\n" + output);
370
+ }
371
+ // Show summary
372
+ if (args.verbose) {
373
+ logger.always(chalk.dim("\n--- Summary ---"));
374
+ logger.always(chalk.dim(`Strategy: ${strategy}`));
375
+ logger.always(chalk.dim(`Total chunks: ${chunks.length}`));
376
+ logger.always(chalk.dim(`Avg chunk size: ${Math.round(chunks.reduce((sum, c) => sum + c.text.length, 0) / chunks.length)} chars`));
377
+ }
378
+ }
379
+ catch (error) {
380
+ spinner.fail(chalk.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
381
+ process.exit(1);
382
+ }
383
+ },
384
+ };
385
+ }
386
+ /**
387
+ * Create the index subcommand
388
+ */
389
+ function createIndexCommand() {
390
+ return {
391
+ command: "index <file>",
392
+ describe: "Index a document for semantic search",
393
+ builder: (yargs) => yargs
394
+ .positional("file", {
395
+ describe: "Path to the file to index",
396
+ type: "string",
397
+ demandOption: true,
398
+ })
399
+ .option("indexName", {
400
+ alias: "n",
401
+ describe: "Name for the index",
402
+ type: "string",
403
+ })
404
+ .option("strategy", {
405
+ alias: "s",
406
+ describe: "Chunking strategy to use",
407
+ choices: [
408
+ "character",
409
+ "recursive",
410
+ "sentence",
411
+ "token",
412
+ "markdown",
413
+ "html",
414
+ "json",
415
+ "latex",
416
+ "semantic",
417
+ "semantic-markdown",
418
+ ],
419
+ type: "string",
420
+ })
421
+ .option("maxSize", {
422
+ alias: "m",
423
+ describe: "Maximum chunk size",
424
+ type: "number",
425
+ default: 1000,
426
+ })
427
+ .option("overlap", {
428
+ alias: "o",
429
+ describe: "Overlap between chunks",
430
+ type: "number",
431
+ default: 200,
432
+ })
433
+ .option("provider", {
434
+ alias: "p",
435
+ describe: "Provider for embeddings (uses default from config/env if not specified)",
436
+ type: "string",
437
+ })
438
+ .option("model", {
439
+ describe: "Model for embeddings (uses default from config/env if not specified)",
440
+ type: "string",
441
+ })
442
+ .option("graph", {
443
+ alias: "g",
444
+ describe: "Build Graph RAG index",
445
+ type: "boolean",
446
+ default: false,
447
+ })
448
+ .option("verbose", {
449
+ alias: "v",
450
+ describe: "Enable verbose output",
451
+ type: "boolean",
452
+ default: false,
453
+ }),
454
+ handler: async (args) => {
455
+ const spinner = ora("Indexing document...").start();
456
+ try {
457
+ // Ensure providers are registered before use
458
+ await ensureSDKInitialized();
459
+ // Validate file exists
460
+ const filePath = resolve(args.file);
461
+ if (!existsSync(filePath)) {
462
+ spinner.fail(chalk.red(`File not found: ${filePath}`));
463
+ process.exit(1);
464
+ }
465
+ // Read file content
466
+ const content = await readFile(filePath, "utf-8");
467
+ const fileName = basename(filePath);
468
+ const indexName = args.indexName || fileName.replace(/\.[^.]+$/, "");
469
+ // Determine strategy
470
+ const strategy = args.strategy || detectDocumentType(filePath);
471
+ spinner.text = `Chunking with ${strategy} strategy...`;
472
+ // Validate chunk parameters
473
+ const maxSize = args.maxSize ?? 1000;
474
+ const overlap = args.overlap ?? 200;
475
+ if (maxSize <= 0) {
476
+ spinner.fail(chalk.red("maxSize must be greater than 0"));
477
+ process.exit(1);
478
+ }
479
+ if (overlap >= maxSize) {
480
+ spinner.fail(chalk.red("overlap must be less than maxSize"));
481
+ process.exit(1);
482
+ }
483
+ // Chunk the document
484
+ const chunker = ChunkerRegistry.get(strategy);
485
+ const chunks = await chunker.chunk(content, {
486
+ maxSize,
487
+ overlap,
488
+ metadata: { source: fileName },
489
+ });
490
+ spinner.text = `Generating embeddings for ${chunks.length} chunks...`;
491
+ // Get embedding provider with smart model detection
492
+ // Automatically uses the appropriate embedding model for the provider
493
+ // Uses getBestProvider() to auto-detect available providers (same as generate/stream)
494
+ const { provider: embeddingProviderName, model: embeddingModelName } = await getEmbeddingModel(args.provider, args.model);
495
+ if (args.verbose) {
496
+ logger.always(chalk.dim(`Using embedding provider: ${embeddingProviderName}, model: ${embeddingModelName}`));
497
+ }
498
+ const embeddingProvider = await ProviderFactory.createProvider(embeddingProviderName, embeddingModelName);
499
+ // Verify the provider has an embed method
500
+ if (typeof embeddingProvider.embed !==
501
+ "function") {
502
+ spinner.fail(chalk.red(`Provider ${embeddingProviderName} with model ${embeddingModelName} does not support embeddings. ` +
503
+ `Please use an embedding model like text-embedding-004 (Vertex) or text-embedding-3-small (OpenAI).`));
504
+ process.exit(1);
505
+ }
506
+ // Generate embeddings
507
+ const embeddings = [];
508
+ for (const chunk of chunks) {
509
+ const embedding = await embeddingProvider.embed(chunk.text);
510
+ embeddings.push(embedding);
511
+ chunk.embedding = embedding;
512
+ }
513
+ // Create indices
514
+ const vectorStore = new InMemoryVectorStore();
515
+ const bm25Index = new InMemoryBM25Index();
516
+ const graphRag = new GraphRAG({ threshold: 0.7 });
517
+ // Index in vector store
518
+ await vectorStore.upsert(indexName, chunks.map((chunk, i) => ({
519
+ id: chunk.id,
520
+ vector: embeddings[i],
521
+ metadata: { ...chunk.metadata, text: chunk.text },
522
+ })));
523
+ // Index in BM25
524
+ await bm25Index.addDocuments(chunks.map((chunk) => ({
525
+ id: chunk.id,
526
+ text: chunk.text,
527
+ metadata: chunk.metadata,
528
+ })));
529
+ // Build Graph RAG if requested
530
+ if (args.graph) {
531
+ spinner.text = "Building knowledge graph...";
532
+ graphRag.createGraph(chunks.map((c) => ({ text: c.text, metadata: c.metadata })), embeddings.map((v) => ({ vector: v })));
533
+ }
534
+ // Store in memory
535
+ indexedDocuments.set(indexName, {
536
+ vectorStore,
537
+ bm25Index,
538
+ graphRag,
539
+ chunks,
540
+ });
541
+ spinner.succeed(chalk.green(`Indexed ${chunks.length} chunks as "${indexName}"${args.graph ? " with Graph RAG" : ""}`));
542
+ if (args.verbose) {
543
+ logger.always(chalk.dim("\n--- Index Summary ---"));
544
+ logger.always(chalk.dim(`Index name: ${indexName}`));
545
+ logger.always(chalk.dim(`Total chunks: ${chunks.length}`));
546
+ logger.always(chalk.dim(`Embedding dimension: ${embeddings[0]?.length || 0}`));
547
+ if (args.graph) {
548
+ const stats = graphRag.getStats();
549
+ logger.always(chalk.dim(`Graph nodes: ${stats.nodeCount}`));
550
+ logger.always(chalk.dim(`Graph edges: ${stats.edgeCount}`));
551
+ }
552
+ }
553
+ }
554
+ catch (error) {
555
+ spinner.fail(chalk.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
556
+ process.exit(1);
557
+ }
558
+ },
559
+ };
560
+ }
561
+ /**
562
+ * Create the query subcommand
563
+ */
564
+ function createQueryCommand() {
565
+ return {
566
+ command: "query <query>",
567
+ describe: "Query indexed documents",
568
+ builder: (yargs) => yargs
569
+ .positional("query", {
570
+ describe: "Search query",
571
+ type: "string",
572
+ demandOption: true,
573
+ })
574
+ .option("indexName", {
575
+ alias: "n",
576
+ describe: "Name of the index to query",
577
+ type: "string",
578
+ })
579
+ .option("topK", {
580
+ alias: "k",
581
+ describe: "Number of results to return",
582
+ type: "number",
583
+ default: 5,
584
+ })
585
+ .option("hybrid", {
586
+ alias: "h",
587
+ describe: "Use hybrid search (vector + BM25)",
588
+ type: "boolean",
589
+ default: false,
590
+ })
591
+ .option("graph", {
592
+ alias: "g",
593
+ describe: "Use Graph RAG search",
594
+ type: "boolean",
595
+ default: false,
596
+ })
597
+ .option("provider", {
598
+ alias: "p",
599
+ describe: "Provider for embeddings (uses default from config/env if not specified)",
600
+ type: "string",
601
+ })
602
+ .option("model", {
603
+ describe: "Model for embeddings (uses default from config/env if not specified)",
604
+ type: "string",
605
+ })
606
+ .option("format", {
607
+ alias: "f",
608
+ describe: "Output format",
609
+ choices: ["json", "text", "table"],
610
+ default: "text",
611
+ })
612
+ .option("verbose", {
613
+ alias: "v",
614
+ describe: "Enable verbose output",
615
+ type: "boolean",
616
+ default: false,
617
+ }),
618
+ handler: async (args) => {
619
+ const spinner = ora("Searching...").start();
620
+ try {
621
+ // Ensure providers are registered before use
622
+ await ensureSDKInitialized();
623
+ // Find index
624
+ const indexName = args.indexName || Array.from(indexedDocuments.keys())[0];
625
+ if (!indexName) {
626
+ spinner.fail(chalk.red("No indexed documents found. Run 'neurolink rag index' first."));
627
+ process.exit(1);
628
+ }
629
+ const indexed = indexedDocuments.get(indexName);
630
+ if (!indexed) {
631
+ spinner.fail(chalk.red(`Index "${indexName}" not found.`));
632
+ process.exit(1);
633
+ }
634
+ const { vectorStore, bm25Index, graphRag } = indexed;
635
+ // Generate query embedding with smart model detection
636
+ // Uses getBestProvider() to auto-detect available providers (same as generate/stream)
637
+ const { provider: embeddingProviderName, model: embeddingModelName } = await getEmbeddingModel(args.provider, args.model);
638
+ if (args.verbose) {
639
+ logger.always(chalk.dim(`Using embedding provider: ${embeddingProviderName}, model: ${embeddingModelName}`));
640
+ }
641
+ const embeddingProvider = await ProviderFactory.createProvider(embeddingProviderName, embeddingModelName);
642
+ // Verify the provider has an embed method
643
+ if (typeof embeddingProvider.embed !==
644
+ "function") {
645
+ spinner.fail(chalk.red(`Provider ${embeddingProviderName} with model ${embeddingModelName} does not support embeddings. ` +
646
+ `Please use an embedding model like text-embedding-004 (Vertex) or text-embedding-3-small (OpenAI).`));
647
+ process.exit(1);
648
+ }
649
+ const queryEmbedding = await embeddingProvider.embed(args.query);
650
+ let results;
651
+ if (args.graph) {
652
+ // Graph RAG search
653
+ spinner.text = "Searching knowledge graph...";
654
+ const graphResults = graphRag.query({
655
+ query: queryEmbedding,
656
+ topK: args.topK || 5,
657
+ });
658
+ results = graphResults.map((r) => ({
659
+ id: r.id,
660
+ score: r.score,
661
+ text: r.content,
662
+ }));
663
+ }
664
+ else if (args.hybrid) {
665
+ // Hybrid search
666
+ spinner.text = "Performing hybrid search...";
667
+ const hybridSearch = createHybridSearch({
668
+ vectorStore,
669
+ bm25Index,
670
+ indexName,
671
+ embeddingModel: {
672
+ provider: embeddingProviderName,
673
+ modelName: embeddingModelName,
674
+ },
675
+ });
676
+ const hybridResults = await hybridSearch(args.query, {
677
+ topK: args.topK || 5,
678
+ });
679
+ results = hybridResults.map((r) => ({
680
+ id: r.id,
681
+ score: r.score,
682
+ text: r.text,
683
+ }));
684
+ }
685
+ else {
686
+ // Vector search
687
+ spinner.text = "Performing vector search...";
688
+ const vectorResults = await vectorStore.query({
689
+ indexName,
690
+ queryVector: queryEmbedding,
691
+ topK: args.topK || 5,
692
+ });
693
+ results = vectorResults.map((r) => ({
694
+ id: r.id,
695
+ score: r.score || 0,
696
+ text: r.metadata?.text || r.text || "",
697
+ }));
698
+ }
699
+ spinner.succeed(chalk.green(`Found ${results.length} results`));
700
+ // Format and display results
701
+ if (args.format === "json") {
702
+ logger.always(JSON.stringify(results, null, 2));
703
+ }
704
+ else if (args.format === "table") {
705
+ logger.always("\n" + chalk.bold("Search Results:"));
706
+ results.forEach((r, i) => {
707
+ logger.always(chalk.cyan(`\n[${i + 1}] Score: ${r.score.toFixed(4)}`));
708
+ logger.always(r.text.slice(0, 200) + "...");
709
+ });
710
+ }
711
+ else {
712
+ logger.always("\n" + chalk.bold("Search Results:"));
713
+ results.forEach((r, i) => {
714
+ logger.always(chalk.cyan(`\n--- Result ${i + 1} (Score: ${r.score.toFixed(4)}) ---`));
715
+ logger.always(r.text);
716
+ });
717
+ }
718
+ if (args.verbose) {
719
+ logger.always(chalk.dim("\n--- Query Info ---"));
720
+ logger.always(chalk.dim(`Index: ${indexName}`));
721
+ logger.always(chalk.dim(`Query: ${args.query}`));
722
+ logger.always(chalk.dim(`Search type: ${args.graph ? "Graph RAG" : args.hybrid ? "Hybrid" : "Vector"}`));
723
+ }
724
+ }
725
+ catch (error) {
726
+ spinner.fail(chalk.red(`Error: ${error instanceof Error ? error.message : String(error)}`));
727
+ process.exit(1);
728
+ }
729
+ },
730
+ };
731
+ }
732
+ /**
733
+ * RAG CLI command factory
734
+ */
735
+ export class RAGCommandFactory {
736
+ /**
737
+ * Create the main RAG command with subcommands
738
+ */
739
+ static createRAGCommands() {
740
+ return {
741
+ command: "rag <subcommand>",
742
+ describe: "RAG document processing commands",
743
+ builder: (yargs) => yargs
744
+ .command(createChunkCommand())
745
+ .command(createIndexCommand())
746
+ .command(createQueryCommand())
747
+ .demandCommand(1, "Please specify a subcommand"),
748
+ handler: () => {
749
+ // Parent command handler - not called when subcommand is specified
750
+ },
751
+ };
752
+ }
753
+ }
754
+ // Export for CLI registration
755
+ export const ragCommand = RAGCommandFactory.createRAGCommands();
756
+ //# sourceMappingURL=rag.js.map