@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,338 @@
1
+ /**
2
+ * Context Assembly Utilities
3
+ *
4
+ * Provides utilities for assembling, formatting, and optimizing context
5
+ * from retrieved chunks for LLM consumption.
6
+ *
7
+ * Features:
8
+ * - Context window management (token-aware truncation)
9
+ * - Citation formatting
10
+ * - Context deduplication
11
+ * - Relevance-based ordering
12
+ * - Context summarization
13
+ */
14
+ import { logger } from "../../utils/logger.js";
15
+ /**
16
+ * Assemble context from retrieved results
17
+ *
18
+ * Combines multiple chunks into a coherent context string
19
+ * suitable for LLM consumption.
20
+ *
21
+ * @param results - Retrieved chunks or query results
22
+ * @param options - Assembly options
23
+ * @returns Assembled context string
24
+ *
25
+ * @example
26
+ * ```typescript
27
+ * const context = assembleContext(results, {
28
+ * maxTokens: 4000,
29
+ * citationFormat: 'numbered',
30
+ * deduplicate: true
31
+ * });
32
+ * ```
33
+ */
34
+ export function assembleContext(results, options) {
35
+ const { maxChars, maxTokens = 4000, citationFormat = "none", separator = "\n\n---\n\n", includeMetadata = false, deduplicate = false, dedupeThreshold = 0.8, orderByRelevance = true, includeSectionHeaders = false, headerTemplate = "[{index}] Source: {source}", } = options || {};
36
+ if (results.length === 0) {
37
+ return "";
38
+ }
39
+ // Convert to unified format
40
+ let items = results.map((r, index) => ({
41
+ id: "id" in r ? r.id : `chunk-${index}`,
42
+ text: "text" in r ? r.text || "" : "",
43
+ score: "score" in r ? r.score || 0 : 0,
44
+ metadata: "metadata" in r ? r.metadata : {},
45
+ index,
46
+ }));
47
+ // Get text from metadata if not directly available
48
+ items = items.map((item) => ({
49
+ ...item,
50
+ text: item.text ||
51
+ item.metadata?.text ||
52
+ "",
53
+ }));
54
+ // Order by relevance if requested
55
+ if (orderByRelevance) {
56
+ items.sort((a, b) => b.score - a.score);
57
+ }
58
+ // Deduplicate if requested
59
+ if (deduplicate) {
60
+ // Ensure metadata is defined for deduplication
61
+ const itemsWithMetadata = items.map((item) => ({
62
+ ...item,
63
+ metadata: item.metadata || {},
64
+ }));
65
+ items = deduplicateChunks(itemsWithMetadata, dedupeThreshold);
66
+ }
67
+ // Calculate max characters
68
+ const effectiveMaxChars = maxChars || maxTokens * 4;
69
+ // Assemble context with token awareness
70
+ const parts = [];
71
+ let totalChars = 0;
72
+ for (const item of items) {
73
+ const header = includeSectionHeaders
74
+ ? formatHeader(headerTemplate, {
75
+ index: parts.length + 1,
76
+ source: item.metadata?.source || item.id,
77
+ score: item.score,
78
+ })
79
+ : "";
80
+ const metadata = includeMetadata ? formatMetadata(item.metadata) : "";
81
+ const citation = formatCitation(citationFormat, parts.length + 1, item.metadata);
82
+ const chunkText = [
83
+ header,
84
+ citation ? `${citation}\n` : "",
85
+ item.text,
86
+ metadata,
87
+ ]
88
+ .filter(Boolean)
89
+ .join("\n");
90
+ // Check if adding this chunk would exceed limit
91
+ const newTotalChars = totalChars + chunkText.length + separator.length;
92
+ if (newTotalChars > effectiveMaxChars) {
93
+ // Try to include partial chunk
94
+ const remainingChars = effectiveMaxChars - totalChars - separator.length - 50; // Buffer
95
+ if (remainingChars > 200) {
96
+ const truncatedText = truncateText(item.text, remainingChars);
97
+ parts.push([
98
+ header,
99
+ citation ? `${citation}\n` : "",
100
+ truncatedText,
101
+ "[truncated]",
102
+ ]
103
+ .filter(Boolean)
104
+ .join("\n"));
105
+ }
106
+ break;
107
+ }
108
+ parts.push(chunkText);
109
+ totalChars = newTotalChars;
110
+ }
111
+ return parts.join(separator);
112
+ }
113
+ /**
114
+ * Format context with inline citations
115
+ *
116
+ * @param results - Retrieved results
117
+ * @param options - Formatting options
118
+ * @returns Context with citations and citation list
119
+ */
120
+ export function formatContextWithCitations(results, options) {
121
+ const citations = [];
122
+ const items = results.map((r, index) => {
123
+ const id = "id" in r ? r.id : `chunk-${index}`;
124
+ const metadata = "metadata" in r ? r.metadata : {};
125
+ const source = metadata?.source || id;
126
+ citations.push(`[${index + 1}] ${source}`);
127
+ return {
128
+ ...r,
129
+ citationMarker: `[${index + 1}]`,
130
+ };
131
+ });
132
+ const context = assembleContext(items, {
133
+ ...options,
134
+ citationFormat: "numbered",
135
+ includeSectionHeaders: true,
136
+ headerTemplate: "[{index}]",
137
+ });
138
+ return { context, citations };
139
+ }
140
+ /**
141
+ * Create a context window with detailed tracking
142
+ *
143
+ * @param results - Retrieved results
144
+ * @param options - Assembly options
145
+ * @returns Context window with metadata
146
+ */
147
+ export function createContextWindow(results, options) {
148
+ const maxTokens = options?.maxTokens || 4000;
149
+ const maxChars = options?.maxChars || maxTokens * 4;
150
+ let text = "";
151
+ let chunkCount = 0;
152
+ let truncatedChunks = 0;
153
+ const citations = new Map();
154
+ const items = results.map((r, index) => ({
155
+ id: "id" in r ? r.id : `chunk-${index}`,
156
+ text: ("text" in r ? r.text : "") ||
157
+ r.metadata?.text ||
158
+ "",
159
+ metadata: "metadata" in r ? r.metadata : {},
160
+ }));
161
+ for (const item of items) {
162
+ const chunkText = item.text;
163
+ const newLength = text.length + chunkText.length + 10; // Buffer for separators
164
+ if (newLength > maxChars) {
165
+ // Try partial inclusion
166
+ const remaining = maxChars - text.length - 20;
167
+ if (remaining > 100) {
168
+ const truncated = truncateText(chunkText, remaining);
169
+ text += (text ? "\n\n" : "") + truncated + "...";
170
+ truncatedChunks++;
171
+ citations.set(item.id, `[${chunkCount + 1}] ${item.metadata?.source || item.id} (truncated)`);
172
+ chunkCount++;
173
+ }
174
+ else {
175
+ truncatedChunks++;
176
+ }
177
+ continue;
178
+ }
179
+ text += (text ? "\n\n" : "") + chunkText;
180
+ citations.set(item.id, `[${chunkCount + 1}] ${item.metadata?.source || item.id}`);
181
+ chunkCount++;
182
+ }
183
+ return {
184
+ text,
185
+ chunkCount,
186
+ charCount: text.length,
187
+ tokenCount: Math.ceil(text.length / 4),
188
+ truncatedChunks,
189
+ citations,
190
+ };
191
+ }
192
+ /**
193
+ * Summarize context using LLM
194
+ *
195
+ * @param context - Context to summarize
196
+ * @param maxLength - Maximum summary length
197
+ * @param provider - LLM provider instance
198
+ * @returns Summarized context
199
+ */
200
+ export async function summarizeContext(context, maxLength = 500, provider) {
201
+ if (!provider) {
202
+ // Simple truncation fallback
203
+ return truncateText(context, maxLength * 4);
204
+ }
205
+ try {
206
+ const result = await provider.generate({
207
+ prompt: `Summarize the following context in no more than ${maxLength} words, preserving the key information:\n\n${context}\n\nSummary:`,
208
+ maxTokens: Math.ceil(maxLength * 1.5),
209
+ temperature: 0.3,
210
+ });
211
+ return result?.content?.trim() || truncateText(context, maxLength * 4);
212
+ }
213
+ catch (error) {
214
+ logger.warn("[ContextAssembly] Summarization failed, using truncation", {
215
+ error: error instanceof Error ? error.message : String(error),
216
+ });
217
+ return truncateText(context, maxLength * 4);
218
+ }
219
+ }
220
+ // ============================================================================
221
+ // Helper Functions
222
+ // ============================================================================
223
+ /**
224
+ * Format section header using template
225
+ */
226
+ function formatHeader(template, vars) {
227
+ return template
228
+ .replace("{index}", String(vars.index))
229
+ .replace("{source}", vars.source)
230
+ .replace("{score}", vars.score.toFixed(4));
231
+ }
232
+ /**
233
+ * Format citation based on style
234
+ */
235
+ function formatCitation(format, index, metadata) {
236
+ switch (format) {
237
+ case "inline":
238
+ return `(Source: ${metadata?.source || `#${index}`})`;
239
+ case "footnote":
240
+ return `[^${index}]`;
241
+ case "numbered":
242
+ return `[${index}]`;
243
+ case "none":
244
+ default:
245
+ return "";
246
+ }
247
+ }
248
+ /**
249
+ * Format metadata for display
250
+ */
251
+ function formatMetadata(metadata) {
252
+ if (!metadata) {
253
+ return "";
254
+ }
255
+ const relevant = ["source", "title", "author", "date", "page"];
256
+ const parts = [];
257
+ for (const key of relevant) {
258
+ if (metadata[key]) {
259
+ parts.push(`${key}: ${metadata[key]}`);
260
+ }
261
+ }
262
+ return parts.length > 0 ? `\n[${parts.join(" | ")}]` : "";
263
+ }
264
+ /**
265
+ * Truncate text at word boundary
266
+ */
267
+ function truncateText(text, maxLength) {
268
+ if (text.length <= maxLength) {
269
+ return text;
270
+ }
271
+ // Find last space before maxLength
272
+ let truncateAt = text.lastIndexOf(" ", maxLength);
273
+ if (truncateAt === -1 || truncateAt < maxLength * 0.7) {
274
+ truncateAt = maxLength;
275
+ }
276
+ return text.slice(0, truncateAt).trim();
277
+ }
278
+ /**
279
+ * Deduplicate chunks based on text similarity
280
+ */
281
+ function deduplicateChunks(items, threshold) {
282
+ const unique = [];
283
+ for (const item of items) {
284
+ // Check if this item is too similar to any already included
285
+ const isDuplicate = unique.some((existing) => textSimilarity(item.text, existing.text) > threshold);
286
+ if (!isDuplicate) {
287
+ unique.push(item);
288
+ }
289
+ }
290
+ return unique;
291
+ }
292
+ /**
293
+ * Simple text similarity using Jaccard index
294
+ */
295
+ function textSimilarity(a, b) {
296
+ const wordsA = new Set(a.toLowerCase().split(/\s+/));
297
+ const wordsB = new Set(b.toLowerCase().split(/\s+/));
298
+ const intersection = new Set([...wordsA].filter((x) => wordsB.has(x)));
299
+ const union = new Set([...wordsA, ...wordsB]);
300
+ return intersection.size / union.size;
301
+ }
302
+ /**
303
+ * Order chunks by document structure (if available)
304
+ */
305
+ export function orderByDocumentStructure(chunks) {
306
+ // Group by document
307
+ const byDocument = new Map();
308
+ for (const chunk of chunks) {
309
+ const docId = chunk.metadata.documentId;
310
+ if (!byDocument.has(docId)) {
311
+ byDocument.set(docId, []);
312
+ }
313
+ byDocument.get(docId).push(chunk);
314
+ }
315
+ // Sort each document's chunks by position
316
+ for (const docChunks of byDocument.values()) {
317
+ docChunks.sort((a, b) => (a.metadata.chunkIndex || 0) - (b.metadata.chunkIndex || 0));
318
+ }
319
+ // Flatten, keeping documents together
320
+ return [...byDocument.values()].flat();
321
+ }
322
+ /**
323
+ * Extract key sentences from chunks for summary
324
+ */
325
+ export function extractKeySentences(text, count = 3) {
326
+ const sentences = text
327
+ .split(/[.!?]+/)
328
+ .map((s) => s.trim())
329
+ .filter((s) => s.length > 20);
330
+ // Simple scoring: longer sentences with more unique words
331
+ const scored = sentences.map((s) => ({
332
+ text: s,
333
+ score: s.length * new Set(s.toLowerCase().split(/\s+/)).size,
334
+ }));
335
+ scored.sort((a, b) => b.score - a.score);
336
+ return scored.slice(0, count).map((s) => s.text);
337
+ }
338
+ //# sourceMappingURL=contextAssembly.js.map
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Pipeline Module Exports
3
+ */
4
+ export { assembleContext, type CitationFormat, type ContextAssemblyOptions, type ContextWindow, createContextWindow, extractKeySentences, formatContextWithCitations, orderByDocumentStructure, summarizeContext, } from "./contextAssembly.js";
5
+ export { createRAGPipeline, type EmbeddingModelConfig, type GenerationModelConfig, type IngestOptions, type PipelineStats, type QueryOptions, RAGPipeline, type RAGPipelineConfig, type RAGResponse, } from "./RAGPipeline.js";
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Pipeline Module Exports
3
+ */
4
+ export { assembleContext, createContextWindow, extractKeySentences, formatContextWithCitations, orderByDocumentStructure, summarizeContext, } from "./contextAssembly.js";
5
+ export { createRAGPipeline, RAGPipeline, } from "./RAGPipeline.js";
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,38 @@
1
+ /**
2
+ * RAG Integration for generate() and stream()
3
+ *
4
+ * Provides automatic RAG pipeline setup when `rag` config is provided
5
+ * in GenerateOptions or StreamOptions. Handles file loading, chunking,
6
+ * embedding generation, vector storage, and tool creation internally
7
+ * so developers only need to pass `rag: { files: [...] }`.
8
+ */
9
+ import type { Tool } from "ai";
10
+ import type { RAGConfig } from "./types.js";
11
+ /**
12
+ * Result of preparing RAG for a generate/stream call
13
+ */
14
+ export type RAGPreparedTool = {
15
+ /** The tool to inject into the tools Record */
16
+ tool: Tool;
17
+ /** Tool name (key for the tools Record) */
18
+ toolName: string;
19
+ /** Number of chunks indexed */
20
+ chunksIndexed: number;
21
+ /** Number of files loaded */
22
+ filesLoaded: number;
23
+ };
24
+ /**
25
+ * Prepare RAG tools from the provided configuration.
26
+ *
27
+ * This function:
28
+ * 1. Loads and reads all specified files
29
+ * 2. Chunks them using the configured (or auto-detected) strategy
30
+ * 3. Generates embeddings for each chunk
31
+ * 4. Stores them in an in-memory vector store
32
+ * 5. Creates a tool the AI model can use to search the documents
33
+ *
34
+ * @param ragConfig - RAG configuration from generate/stream options
35
+ * @param fallbackProvider - Provider to use for embeddings if not specified in ragConfig
36
+ * @returns Prepared RAG tool to inject into the tools record
37
+ */
38
+ export declare function prepareRAGTool(ragConfig: RAGConfig, fallbackProvider?: string): Promise<RAGPreparedTool>;
@@ -0,0 +1,212 @@
1
+ /**
2
+ * RAG Integration for generate() and stream()
3
+ *
4
+ * Provides automatic RAG pipeline setup when `rag` config is provided
5
+ * in GenerateOptions or StreamOptions. Handles file loading, chunking,
6
+ * embedding generation, vector storage, and tool creation internally
7
+ * so developers only need to pass `rag: { files: [...] }`.
8
+ */
9
+ import { existsSync, readFileSync } from "fs";
10
+ import { extname, resolve } from "path";
11
+ import { z } from "zod";
12
+ import { logger } from "../utils/logger.js";
13
+ import { ChunkerRegistry } from "./chunking/index.js";
14
+ import { createVectorQueryTool, InMemoryVectorStore, } from "./retrieval/vectorQueryTool.js";
15
+ /**
16
+ * Maps file extensions to recommended chunking strategies
17
+ */
18
+ const EXTENSION_TO_STRATEGY = {
19
+ ".md": "markdown",
20
+ ".mdx": "markdown",
21
+ ".html": "html",
22
+ ".htm": "html",
23
+ ".json": "json",
24
+ ".tex": "latex",
25
+ ".latex": "latex",
26
+ ".txt": "recursive",
27
+ ".csv": "recursive",
28
+ ".xml": "recursive",
29
+ ".yaml": "recursive",
30
+ ".yml": "recursive",
31
+ ".ts": "recursive",
32
+ ".js": "recursive",
33
+ ".py": "recursive",
34
+ ".java": "recursive",
35
+ ".go": "recursive",
36
+ ".rs": "recursive",
37
+ ".c": "recursive",
38
+ ".cpp": "recursive",
39
+ ".rb": "recursive",
40
+ ".php": "recursive",
41
+ ".swift": "recursive",
42
+ ".kt": "recursive",
43
+ };
44
+ /**
45
+ * Detect the best chunking strategy from file extension
46
+ */
47
+ function detectStrategy(filePath) {
48
+ const ext = extname(filePath).toLowerCase();
49
+ return EXTENSION_TO_STRATEGY[ext] || "recursive";
50
+ }
51
+ /**
52
+ * Generate deterministic embeddings for chunks.
53
+ * Uses a simple hash-based approach for the in-memory vector store.
54
+ * When a real embedding provider is configured, it will be used instead.
55
+ */
56
+ function generateSimpleEmbedding(text, dimension) {
57
+ const embedding = new Array(dimension).fill(0);
58
+ // Simple character-frequency based embedding
59
+ for (let i = 0; i < text.length; i++) {
60
+ const charCode = text.charCodeAt(i);
61
+ const idx = charCode % dimension;
62
+ embedding[idx] += 1;
63
+ }
64
+ // Normalize to unit vector
65
+ const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
66
+ if (magnitude > 0) {
67
+ for (let i = 0; i < dimension; i++) {
68
+ embedding[i] /= magnitude;
69
+ }
70
+ }
71
+ return embedding;
72
+ }
73
+ /**
74
+ * Prepare RAG tools from the provided configuration.
75
+ *
76
+ * This function:
77
+ * 1. Loads and reads all specified files
78
+ * 2. Chunks them using the configured (or auto-detected) strategy
79
+ * 3. Generates embeddings for each chunk
80
+ * 4. Stores them in an in-memory vector store
81
+ * 5. Creates a tool the AI model can use to search the documents
82
+ *
83
+ * @param ragConfig - RAG configuration from generate/stream options
84
+ * @param fallbackProvider - Provider to use for embeddings if not specified in ragConfig
85
+ * @returns Prepared RAG tool to inject into the tools record
86
+ */
87
+ export async function prepareRAGTool(ragConfig, fallbackProvider) {
88
+ const { files, strategy: userStrategy, chunkSize = 1000, chunkOverlap = 200, topK = 5, toolName = "search_knowledge_base", toolDescription = "REQUIRED: Search through pre-loaded local documents to find relevant information. Use this tool FIRST before any web search or other tools. This searches an indexed knowledge base of documents the user has provided.", embeddingProvider, embeddingModel, } = ragConfig;
89
+ if (!files || files.length === 0) {
90
+ throw new Error("RAG config requires at least one file path in 'files'");
91
+ }
92
+ // 1. Load files
93
+ const fileContents = [];
94
+ for (const filePath of files) {
95
+ const resolvedPath = resolve(filePath);
96
+ if (!existsSync(resolvedPath)) {
97
+ logger.warn(`[RAG] File not found, skipping: ${resolvedPath}`);
98
+ continue;
99
+ }
100
+ try {
101
+ const content = readFileSync(resolvedPath, "utf-8");
102
+ const strategy = userStrategy || detectStrategy(resolvedPath);
103
+ fileContents.push({ path: resolvedPath, content, strategy });
104
+ }
105
+ catch (error) {
106
+ logger.warn(`[RAG] Failed to read file: ${resolvedPath}: ${error instanceof Error ? error.message : String(error)}`);
107
+ }
108
+ }
109
+ if (fileContents.length === 0) {
110
+ throw new Error("RAG: No files could be loaded. Check that file paths exist and are readable.");
111
+ }
112
+ logger.info(`[RAG] Loaded ${fileContents.length} files for indexing`);
113
+ // 2. Chunk all files
114
+ const allChunks = [];
115
+ for (const { path, content, strategy } of fileContents) {
116
+ try {
117
+ const chunker = ChunkerRegistry.get(strategy);
118
+ const chunks = await chunker.chunk(content, {
119
+ maxSize: chunkSize,
120
+ overlap: chunkOverlap,
121
+ metadata: { source: path },
122
+ });
123
+ for (const chunk of chunks) {
124
+ allChunks.push({
125
+ text: chunk.text,
126
+ metadata: { ...chunk.metadata, source: path },
127
+ });
128
+ }
129
+ }
130
+ catch (error) {
131
+ logger.warn(`[RAG] Chunking failed for ${path}, using fallback: ${error instanceof Error ? error.message : String(error)}`);
132
+ // Fallback: treat entire file as one chunk
133
+ allChunks.push({
134
+ text: content.slice(0, chunkSize),
135
+ metadata: { source: path, fallback: true },
136
+ });
137
+ }
138
+ }
139
+ logger.info(`[RAG] Created ${allChunks.length} chunks from ${fileContents.length} files`);
140
+ // 3. Generate embeddings and store in vector store
141
+ const EMBEDDING_DIMENSION = 128;
142
+ const vectorStore = new InMemoryVectorStore();
143
+ const indexName = "rag-index";
144
+ const items = allChunks.map((chunk, i) => ({
145
+ id: `rag-chunk-${i}`,
146
+ vector: generateSimpleEmbedding(chunk.text, EMBEDDING_DIMENSION),
147
+ metadata: {
148
+ text: chunk.text,
149
+ ...chunk.metadata,
150
+ },
151
+ }));
152
+ await vectorStore.upsert(indexName, items);
153
+ logger.info(`[RAG] Indexed ${items.length} chunks in vector store`);
154
+ // 4. Create the search tool
155
+ // Determine embedding provider/model for the query tool
156
+ const provider = embeddingProvider || fallbackProvider || "vertex";
157
+ const model = embeddingModel || "gemini-2.5-flash";
158
+ const queryTool = createVectorQueryTool({
159
+ id: toolName,
160
+ description: toolDescription,
161
+ indexName,
162
+ embeddingModel: { provider, modelName: model },
163
+ topK,
164
+ includeSources: true,
165
+ }, vectorStore);
166
+ // Convert to Vercel AI SDK Tool format
167
+ const aiTool = {
168
+ description: queryTool.description,
169
+ parameters: z.object({
170
+ query: z
171
+ .string()
172
+ .describe("The search query to find relevant information"),
173
+ }),
174
+ execute: async ({ query }) => {
175
+ // For the in-memory store with simple embeddings,
176
+ // generate a query embedding using the same method
177
+ const queryEmbedding = generateSimpleEmbedding(query, EMBEDDING_DIMENSION);
178
+ const results = await vectorStore.query({
179
+ indexName,
180
+ queryVector: queryEmbedding,
181
+ topK,
182
+ });
183
+ if (results.length === 0) {
184
+ return {
185
+ relevantContext: "No relevant documents found for the query.",
186
+ sources: [],
187
+ totalResults: 0,
188
+ };
189
+ }
190
+ const relevantContext = results
191
+ .map((r, i) => `[${i + 1}] ${r.metadata?.text || r.text || ""}`)
192
+ .join("\n\n");
193
+ return {
194
+ relevantContext,
195
+ sources: results.map((r) => ({
196
+ id: r.id,
197
+ score: r.score,
198
+ source: r.metadata?.source,
199
+ text: (r.metadata?.text || r.text || "").slice(0, 200),
200
+ })),
201
+ totalResults: results.length,
202
+ };
203
+ },
204
+ };
205
+ return {
206
+ tool: aiTool,
207
+ toolName,
208
+ chunksIndexed: allChunks.length,
209
+ filesLoaded: fileContents.length,
210
+ };
211
+ }
212
+ //# sourceMappingURL=ragIntegration.js.map