@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,402 @@
1
+ /**
2
+ * RAG Pipeline Orchestrator
3
+ *
4
+ * Provides a complete end-to-end RAG pipeline that orchestrates:
5
+ * - Document loading and preprocessing
6
+ * - Chunking with configurable strategies
7
+ * - Embedding generation
8
+ * - Vector storage and retrieval
9
+ * - Context assembly for LLM queries
10
+ * - Response generation with citations
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * const pipeline = new RAGPipeline({
15
+ * vectorStore: myVectorStore,
16
+ * embeddingModel: { provider: 'openai', modelName: 'text-embedding-3-small' },
17
+ * generationModel: { provider: 'openai', modelName: 'gpt-4o-mini' }
18
+ * });
19
+ *
20
+ * // Ingest documents
21
+ * await pipeline.ingest(['/path/to/doc1.md', '/path/to/doc2.pdf']);
22
+ *
23
+ * // Query with RAG
24
+ * const response = await pipeline.query('What are the key features?');
25
+ * console.log(response.answer, response.sources);
26
+ * ```
27
+ */
28
+ import { randomUUID } from "crypto";
29
+ import { MDocument } from "../document/MDocument.js";
30
+ import { loadDocument } from "../document/loaders.js";
31
+ import { InMemoryVectorStore, } from "../retrieval/vectorQueryTool.js";
32
+ import { InMemoryBM25Index, createHybridSearch, } from "../retrieval/hybridSearch.js";
33
+ import { GraphRAG } from "../graphRag/graphRAG.js";
34
+ import { rerank } from "../reranker/reranker.js";
35
+ import { ProviderFactory } from "../../factories/providerFactory.js";
36
+ import { logger } from "../../utils/logger.js";
37
+ /**
38
+ * RAG Pipeline Orchestrator
39
+ *
40
+ * Complete end-to-end pipeline for Retrieval-Augmented Generation.
41
+ */
42
+ export class RAGPipeline {
43
+ id;
44
+ config;
45
+ vectorStore;
46
+ bm25Index;
47
+ graphRAG;
48
+ embeddingProvider;
49
+ generationProvider;
50
+ hybridSearch;
51
+ documents = new Map();
52
+ allChunks = [];
53
+ constructor(config) {
54
+ this.id = config.id || `rag-pipeline-${randomUUID().slice(0, 8)}`;
55
+ this.config = {
56
+ indexName: "default",
57
+ defaultChunkingStrategy: "recursive",
58
+ defaultChunkSize: 1000,
59
+ defaultChunkOverlap: 200,
60
+ enableHybridSearch: false,
61
+ enableGraphRAG: false,
62
+ graphThreshold: 0.7,
63
+ defaultTopK: 5,
64
+ enableReranking: false,
65
+ ...config,
66
+ };
67
+ // Initialize stores
68
+ this.vectorStore = config.vectorStore || new InMemoryVectorStore();
69
+ this.bm25Index = config.bm25Index || new InMemoryBM25Index();
70
+ this.graphRAG = new GraphRAG({ threshold: this.config.graphThreshold });
71
+ logger.info("[RAGPipeline] Pipeline initialized", {
72
+ id: this.id,
73
+ indexName: this.config.indexName,
74
+ embeddingModel: this.config.embeddingModel,
75
+ });
76
+ }
77
+ /**
78
+ * Initialize the pipeline (lazy loading of providers)
79
+ */
80
+ async initialize() {
81
+ // Initialize embedding provider
82
+ this.embeddingProvider = await ProviderFactory.createProvider(this.config.embeddingModel.provider, this.config.embeddingModel.modelName);
83
+ // Initialize generation provider if configured
84
+ if (this.config.generationModel) {
85
+ this.generationProvider = await ProviderFactory.createProvider(this.config.generationModel.provider, this.config.generationModel.modelName);
86
+ }
87
+ // Initialize hybrid search if enabled
88
+ if (this.config.enableHybridSearch) {
89
+ this.hybridSearch = createHybridSearch({
90
+ vectorStore: this.vectorStore,
91
+ bm25Index: this.bm25Index,
92
+ indexName: this.config.indexName,
93
+ embeddingModel: this.config.embeddingModel,
94
+ });
95
+ }
96
+ logger.info("[RAGPipeline] Pipeline initialized", { id: this.id });
97
+ }
98
+ /**
99
+ * Ingest documents into the pipeline
100
+ *
101
+ * @param sources - Array of file paths, URLs, or MDocument instances
102
+ * @param options - Ingestion options
103
+ */
104
+ async ingest(sources, options) {
105
+ await this.ensureInitialized();
106
+ const strategy = options?.strategy || this.config.defaultChunkingStrategy;
107
+ const chunkSize = options?.chunkSize || this.config.defaultChunkSize;
108
+ const chunkOverlap = options?.chunkOverlap || this.config.defaultChunkOverlap;
109
+ let documentsProcessed = 0;
110
+ let chunksCreated = 0;
111
+ for (const source of sources) {
112
+ try {
113
+ // Load document if string
114
+ const doc = source instanceof MDocument
115
+ ? source
116
+ : await loadDocument(source, { metadata: options?.metadata });
117
+ // Chunk the document
118
+ await doc.chunk({
119
+ strategy,
120
+ config: {
121
+ maxSize: chunkSize,
122
+ overlap: chunkOverlap,
123
+ metadata: options?.metadata,
124
+ },
125
+ });
126
+ // Extract metadata if requested
127
+ if (options?.extractMetadata) {
128
+ await doc.extractMetadata({
129
+ title: true,
130
+ summary: true,
131
+ keywords: true,
132
+ });
133
+ }
134
+ // Generate embeddings
135
+ await doc.embed(this.config.embeddingModel.provider, this.config.embeddingModel.modelName);
136
+ const chunks = doc.getChunks();
137
+ const embeddings = doc.getEmbeddings();
138
+ // Store in vector store
139
+ await this.vectorStore.query({
140
+ indexName: this.config.indexName,
141
+ queryVector: embeddings[0],
142
+ topK: 1,
143
+ }); // Warm up
144
+ // Upsert into vector store
145
+ if ("upsert" in this.vectorStore) {
146
+ await this.vectorStore.upsert(this.config.indexName, chunks.map((chunk, i) => ({
147
+ id: chunk.id,
148
+ vector: embeddings[i],
149
+ metadata: { ...chunk.metadata, text: chunk.text },
150
+ })));
151
+ }
152
+ // Add to BM25 index
153
+ await this.bm25Index.addDocuments(chunks.map((chunk) => ({
154
+ id: chunk.id,
155
+ text: chunk.text,
156
+ metadata: chunk.metadata,
157
+ })));
158
+ // Update Graph RAG if enabled
159
+ if (this.config.enableGraphRAG) {
160
+ this.graphRAG.createGraph([...this.allChunks, ...chunks].map((c) => ({
161
+ text: c.text,
162
+ metadata: c.metadata,
163
+ })), [...this.allChunks, ...chunks].map((c) => ({
164
+ vector: c.embedding || [],
165
+ })));
166
+ }
167
+ // Track documents and chunks
168
+ this.documents.set(doc.getId(), doc);
169
+ this.allChunks.push(...chunks);
170
+ documentsProcessed++;
171
+ chunksCreated += chunks.length;
172
+ logger.debug("[RAGPipeline] Document ingested", {
173
+ documentId: doc.getId(),
174
+ chunks: chunks.length,
175
+ });
176
+ }
177
+ catch (error) {
178
+ logger.error("[RAGPipeline] Failed to ingest document", {
179
+ source: typeof source === "string" ? source : source.getId(),
180
+ error: error instanceof Error ? error.message : String(error),
181
+ });
182
+ }
183
+ }
184
+ logger.info("[RAGPipeline] Ingestion complete", {
185
+ documentsProcessed,
186
+ chunksCreated,
187
+ });
188
+ return { documentsProcessed, chunksCreated };
189
+ }
190
+ /**
191
+ * Query the pipeline
192
+ *
193
+ * @param query - Search query
194
+ * @param options - Query options
195
+ * @returns RAG response with retrieved context and optional generated answer
196
+ */
197
+ async query(query, options) {
198
+ await this.ensureInitialized();
199
+ const startTime = Date.now();
200
+ const topK = options?.topK || this.config.defaultTopK;
201
+ const useHybrid = options?.hybrid ?? this.config.enableHybridSearch;
202
+ const useGraph = options?.graph ?? this.config.enableGraphRAG;
203
+ const useRerank = options?.rerank ?? this.config.enableReranking;
204
+ let results;
205
+ let retrievalMethod = "vector";
206
+ // Generate query embedding
207
+ const queryEmbedding = await this.generateEmbedding(query);
208
+ if (useGraph && this.config.enableGraphRAG) {
209
+ // Graph RAG search
210
+ retrievalMethod = "graph";
211
+ const graphResults = this.graphRAG.query({
212
+ query: queryEmbedding,
213
+ topK: topK * 2, // Get more for potential reranking
214
+ });
215
+ results = graphResults.map((r) => ({
216
+ id: r.id,
217
+ text: r.content,
218
+ score: r.score,
219
+ metadata: r.metadata,
220
+ }));
221
+ }
222
+ else if (useHybrid && this.hybridSearch) {
223
+ // Hybrid search
224
+ retrievalMethod = "hybrid";
225
+ const hybridResults = await this.hybridSearch(query, { topK: topK * 2 });
226
+ results = hybridResults.map((r) => ({
227
+ id: r.id,
228
+ text: r.text,
229
+ score: r.score,
230
+ metadata: r.metadata,
231
+ }));
232
+ }
233
+ else {
234
+ // Vector search
235
+ results = await this.vectorStore.query({
236
+ indexName: this.config.indexName,
237
+ queryVector: queryEmbedding,
238
+ topK: topK * 2,
239
+ filter: options?.filter,
240
+ });
241
+ }
242
+ // Apply reranking if enabled
243
+ let reranked = false;
244
+ if (useRerank && this.config.rerankingModel && results.length > 0) {
245
+ const rerankModel = await ProviderFactory.createProvider(this.config.rerankingModel.provider, this.config.rerankingModel.modelName);
246
+ const rerankedResults = await rerank(results, query, rerankModel, {
247
+ topK,
248
+ queryEmbedding,
249
+ });
250
+ results = rerankedResults.map((r) => r.result);
251
+ reranked = true;
252
+ }
253
+ // Take top K results
254
+ results = results.slice(0, topK);
255
+ // Assemble context
256
+ const context = this.assembleContext(results);
257
+ // Format sources
258
+ const sources = results.map((r) => ({
259
+ id: r.id,
260
+ text: r.text || r.metadata?.text || "",
261
+ score: r.score || 0,
262
+ metadata: r.metadata,
263
+ }));
264
+ // Generate answer if requested
265
+ let answer;
266
+ if (options?.generate !== false && this.generationProvider) {
267
+ answer = await this.generateAnswer(query, context, options?.systemPrompt, options?.temperature);
268
+ }
269
+ const queryTime = Date.now() - startTime;
270
+ logger.info("[RAGPipeline] Query completed", {
271
+ query: query.slice(0, 50),
272
+ retrievalMethod,
273
+ resultsCount: results.length,
274
+ reranked,
275
+ queryTime,
276
+ });
277
+ return {
278
+ answer,
279
+ context,
280
+ sources,
281
+ metadata: {
282
+ queryTime,
283
+ retrievalMethod,
284
+ chunksRetrieved: results.length,
285
+ reranked,
286
+ },
287
+ };
288
+ }
289
+ /**
290
+ * Get pipeline statistics
291
+ */
292
+ getStats() {
293
+ return {
294
+ totalDocuments: this.documents.size,
295
+ totalChunks: this.allChunks.length,
296
+ indexName: this.config.indexName,
297
+ embeddingDimension: this.allChunks[0]?.embedding?.length,
298
+ hybridSearchEnabled: this.config.enableHybridSearch,
299
+ graphRAGEnabled: this.config.enableGraphRAG,
300
+ };
301
+ }
302
+ /**
303
+ * Get pipeline ID
304
+ */
305
+ getId() {
306
+ return this.id;
307
+ }
308
+ /**
309
+ * Clear all indexed data
310
+ */
311
+ async clear() {
312
+ this.documents.clear();
313
+ this.allChunks = [];
314
+ this.graphRAG = new GraphRAG({ threshold: this.config.graphThreshold });
315
+ if ("delete" in this.vectorStore) {
316
+ // Clear vector store if supported
317
+ // Note: InMemoryVectorStore doesn't have a clear method
318
+ }
319
+ logger.info("[RAGPipeline] Pipeline cleared", { id: this.id });
320
+ }
321
+ // ============================================================================
322
+ // Private Methods
323
+ // ============================================================================
324
+ /**
325
+ * Ensure pipeline is initialized
326
+ */
327
+ async ensureInitialized() {
328
+ if (!this.embeddingProvider) {
329
+ await this.initialize();
330
+ }
331
+ }
332
+ /**
333
+ * Generate embedding for text
334
+ */
335
+ async generateEmbedding(text) {
336
+ if (!this.embeddingProvider) {
337
+ throw new Error("Embedding provider not initialized");
338
+ }
339
+ if (typeof this.embeddingProvider
340
+ .embed !== "function") {
341
+ throw new Error(`Provider ${this.config.embeddingModel.provider} does not support embeddings`);
342
+ }
343
+ return await this.embeddingProvider.embed(text);
344
+ }
345
+ /**
346
+ * Assemble context from results
347
+ */
348
+ assembleContext(results) {
349
+ return results
350
+ .map((r, i) => {
351
+ const text = r.text || r.metadata?.text || "";
352
+ const source = r.metadata?.source || `chunk-${i + 1}`;
353
+ return `[Source ${i + 1}: ${source}]\n${text}`;
354
+ })
355
+ .join("\n\n---\n\n");
356
+ }
357
+ /**
358
+ * Generate answer using LLM
359
+ */
360
+ async generateAnswer(query, context, customSystemPrompt, temperature) {
361
+ if (!this.generationProvider) {
362
+ throw new Error("Generation provider not configured");
363
+ }
364
+ const systemPrompt = customSystemPrompt ||
365
+ `You are a helpful assistant that answers questions based on the provided context.
366
+ Use only the information from the context to answer the question.
367
+ If the context doesn't contain relevant information, say so.
368
+ Cite sources when possible using [Source N] format.`;
369
+ const prompt = `Context:\n${context}\n\nQuestion: ${query}\n\nAnswer:`;
370
+ const result = await this.generationProvider.generate({
371
+ prompt,
372
+ systemPrompt,
373
+ temperature: temperature ?? this.config.generationModel?.temperature ?? 0.7,
374
+ maxTokens: this.config.generationModel?.maxTokens ?? 1000,
375
+ });
376
+ return result?.content || "";
377
+ }
378
+ }
379
+ /**
380
+ * Create a simple RAG pipeline with sensible defaults
381
+ *
382
+ * @param options - Basic configuration options
383
+ * @returns Configured RAGPipeline instance
384
+ */
385
+ export function createRAGPipeline(options) {
386
+ const provider = options.provider || "openai";
387
+ return new RAGPipeline({
388
+ embeddingModel: {
389
+ provider,
390
+ modelName: options.embeddingModel || "text-embedding-3-small",
391
+ },
392
+ generationModel: options.generationModel
393
+ ? {
394
+ provider,
395
+ modelName: options.generationModel,
396
+ }
397
+ : undefined,
398
+ enableHybridSearch: options.enableHybrid,
399
+ enableGraphRAG: options.enableGraph,
400
+ });
401
+ }
402
+ //# sourceMappingURL=RAGPipeline.js.map
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Context Assembly Utilities
3
+ *
4
+ * Provides utilities for assembling, formatting, and optimizing context
5
+ * from retrieved chunks for LLM consumption.
6
+ *
7
+ * Features:
8
+ * - Context window management (token-aware truncation)
9
+ * - Citation formatting
10
+ * - Context deduplication
11
+ * - Relevance-based ordering
12
+ * - Context summarization
13
+ */
14
+ import type { Chunk, VectorQueryResult } from "../types.js";
15
+ /**
16
+ * Citation format options
17
+ */
18
+ export type CitationFormat = "inline" | "footnote" | "numbered" | "none";
19
+ /**
20
+ * Context assembly options
21
+ */
22
+ export interface ContextAssemblyOptions {
23
+ /** Maximum characters in assembled context */
24
+ maxChars?: number;
25
+ /** Maximum tokens (approximate, 4 chars/token) */
26
+ maxTokens?: number;
27
+ /** Citation format to use */
28
+ citationFormat?: CitationFormat;
29
+ /** Separator between chunks */
30
+ separator?: string;
31
+ /** Include chunk metadata in context */
32
+ includeMetadata?: boolean;
33
+ /** Deduplicate overlapping content */
34
+ deduplicate?: boolean;
35
+ /** Similarity threshold for deduplication (0-1) */
36
+ dedupeThreshold?: number;
37
+ /** Order by relevance score */
38
+ orderByRelevance?: boolean;
39
+ /** Include section headers */
40
+ includeSectionHeaders?: boolean;
41
+ /** Header template (use {index}, {source}, {score} placeholders) */
42
+ headerTemplate?: string;
43
+ }
44
+ /**
45
+ * Context window representation
46
+ */
47
+ export interface ContextWindow {
48
+ /** Assembled context text */
49
+ text: string;
50
+ /** Number of chunks included */
51
+ chunkCount: number;
52
+ /** Total character count */
53
+ charCount: number;
54
+ /** Estimated token count */
55
+ tokenCount: number;
56
+ /** Chunks that were truncated/excluded */
57
+ truncatedChunks: number;
58
+ /** Citation map (id -> citation text) */
59
+ citations: Map<string, string>;
60
+ }
61
+ /**
62
+ * Assemble context from retrieved results
63
+ *
64
+ * Combines multiple chunks into a coherent context string
65
+ * suitable for LLM consumption.
66
+ *
67
+ * @param results - Retrieved chunks or query results
68
+ * @param options - Assembly options
69
+ * @returns Assembled context string
70
+ *
71
+ * @example
72
+ * ```typescript
73
+ * const context = assembleContext(results, {
74
+ * maxTokens: 4000,
75
+ * citationFormat: 'numbered',
76
+ * deduplicate: true
77
+ * });
78
+ * ```
79
+ */
80
+ export declare function assembleContext(results: Array<Chunk | VectorQueryResult>, options?: ContextAssemblyOptions): string;
81
+ /**
82
+ * Format context with inline citations
83
+ *
84
+ * @param results - Retrieved results
85
+ * @param options - Formatting options
86
+ * @returns Context with citations and citation list
87
+ */
88
+ export declare function formatContextWithCitations(results: Array<Chunk | VectorQueryResult>, options?: ContextAssemblyOptions & {
89
+ returnCitations?: boolean;
90
+ }): {
91
+ context: string;
92
+ citations: string[];
93
+ };
94
+ /**
95
+ * Create a context window with detailed tracking
96
+ *
97
+ * @param results - Retrieved results
98
+ * @param options - Assembly options
99
+ * @returns Context window with metadata
100
+ */
101
+ export declare function createContextWindow(results: Array<Chunk | VectorQueryResult>, options?: ContextAssemblyOptions): ContextWindow;
102
+ /**
103
+ * Summarize context using LLM
104
+ *
105
+ * @param context - Context to summarize
106
+ * @param maxLength - Maximum summary length
107
+ * @param provider - LLM provider instance
108
+ * @returns Summarized context
109
+ */
110
+ export declare function summarizeContext(context: string, maxLength?: number, provider?: {
111
+ generate: (params: {
112
+ prompt: string;
113
+ maxTokens: number;
114
+ temperature: number;
115
+ }) => Promise<{
116
+ content?: string;
117
+ } | null>;
118
+ }): Promise<string>;
119
+ /**
120
+ * Order chunks by document structure (if available)
121
+ */
122
+ export declare function orderByDocumentStructure(chunks: Chunk[]): Chunk[];
123
+ /**
124
+ * Extract key sentences from chunks for summary
125
+ */
126
+ export declare function extractKeySentences(text: string, count?: number): string[];