@juspay/neurolink 9.1.1 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (555) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +106 -37
  3. package/dist/agent/directTools.d.ts +11 -11
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/image-gen/ImageGenService.d.ts +143 -0
  32. package/dist/image-gen/ImageGenService.js +345 -0
  33. package/dist/image-gen/imageGenTools.d.ts +126 -0
  34. package/dist/image-gen/imageGenTools.js +304 -0
  35. package/dist/image-gen/index.d.ts +46 -0
  36. package/dist/image-gen/index.js +48 -0
  37. package/dist/image-gen/types.d.ts +237 -0
  38. package/dist/image-gen/types.js +24 -0
  39. package/dist/index.d.ts +46 -12
  40. package/dist/index.js +88 -36
  41. package/dist/lib/agent/directTools.d.ts +8 -8
  42. package/dist/lib/core/baseProvider.d.ts +43 -30
  43. package/dist/lib/core/baseProvider.js +98 -138
  44. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  45. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  46. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  47. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  48. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  49. package/dist/lib/core/infrastructure/baseError.js +23 -0
  50. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  51. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  52. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  53. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  54. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  55. package/dist/lib/core/infrastructure/index.js +6 -0
  56. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  57. package/dist/lib/core/infrastructure/retry.js +21 -0
  58. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  59. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  60. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  61. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  62. package/dist/lib/factories/providerFactory.d.ts +5 -3
  63. package/dist/lib/factories/providerFactory.js +31 -24
  64. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  65. package/dist/lib/image-gen/ImageGenService.js +346 -0
  66. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  67. package/dist/lib/image-gen/imageGenTools.js +305 -0
  68. package/dist/lib/image-gen/index.d.ts +46 -0
  69. package/dist/lib/image-gen/index.js +49 -0
  70. package/dist/lib/image-gen/types.d.ts +237 -0
  71. package/dist/lib/image-gen/types.js +25 -0
  72. package/dist/lib/index.d.ts +46 -12
  73. package/dist/lib/index.js +88 -36
  74. package/dist/lib/mcp/index.d.ts +6 -5
  75. package/dist/lib/mcp/index.js +7 -5
  76. package/dist/lib/neurolink.d.ts +11 -13
  77. package/dist/lib/neurolink.js +95 -29
  78. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  79. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  80. package/dist/lib/processors/base/index.d.ts +14 -0
  81. package/dist/lib/processors/base/index.js +20 -0
  82. package/dist/lib/processors/base/types.d.ts +593 -0
  83. package/dist/lib/processors/base/types.js +77 -0
  84. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  85. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  86. package/dist/lib/processors/cli/index.d.ts +37 -0
  87. package/dist/lib/processors/cli/index.js +50 -0
  88. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  89. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  90. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  91. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  92. package/dist/lib/processors/code/index.d.ts +44 -0
  93. package/dist/lib/processors/code/index.js +61 -0
  94. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  95. package/dist/lib/processors/config/fileTypes.js +521 -0
  96. package/dist/lib/processors/config/index.d.ts +32 -0
  97. package/dist/lib/processors/config/index.js +93 -0
  98. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  99. package/dist/lib/processors/config/languageMap.js +411 -0
  100. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  101. package/dist/lib/processors/config/mimeTypes.js +339 -0
  102. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  103. package/dist/lib/processors/config/sizeLimits.js +247 -0
  104. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  105. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  106. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  107. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  108. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  109. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  110. package/dist/lib/processors/data/index.d.ts +49 -0
  111. package/dist/lib/processors/data/index.js +77 -0
  112. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  113. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  114. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  115. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  116. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  117. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  118. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  119. package/dist/lib/processors/document/WordProcessor.js +354 -0
  120. package/dist/lib/processors/document/index.d.ts +54 -0
  121. package/dist/lib/processors/document/index.js +91 -0
  122. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  123. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  124. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  125. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  126. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  127. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  128. package/dist/lib/processors/errors/index.d.ts +46 -0
  129. package/dist/lib/processors/errors/index.js +50 -0
  130. package/dist/lib/processors/index.d.ts +76 -0
  131. package/dist/lib/processors/index.js +113 -0
  132. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  133. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  134. package/dist/lib/processors/integration/index.d.ts +42 -0
  135. package/dist/lib/processors/integration/index.js +45 -0
  136. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  137. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  138. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  139. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  140. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  141. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  142. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  143. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  144. package/dist/lib/processors/markup/index.d.ts +66 -0
  145. package/dist/lib/processors/markup/index.js +103 -0
  146. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  147. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  148. package/dist/lib/processors/registry/index.d.ts +12 -0
  149. package/dist/lib/processors/registry/index.js +17 -0
  150. package/dist/lib/processors/registry/types.d.ts +53 -0
  151. package/dist/lib/processors/registry/types.js +11 -0
  152. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  153. package/dist/lib/providers/amazonBedrock.js +65 -8
  154. package/dist/lib/providers/anthropic.d.ts +3 -3
  155. package/dist/lib/providers/anthropic.js +10 -7
  156. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  157. package/dist/lib/providers/googleAiStudio.js +10 -7
  158. package/dist/lib/providers/googleVertex.d.ts +16 -4
  159. package/dist/lib/providers/googleVertex.js +72 -16
  160. package/dist/lib/providers/litellm.d.ts +3 -3
  161. package/dist/lib/providers/litellm.js +10 -10
  162. package/dist/lib/providers/mistral.d.ts +3 -3
  163. package/dist/lib/providers/mistral.js +7 -6
  164. package/dist/lib/providers/ollama.d.ts +3 -4
  165. package/dist/lib/providers/ollama.js +7 -8
  166. package/dist/lib/providers/openAI.d.ts +14 -2
  167. package/dist/lib/providers/openAI.js +60 -6
  168. package/dist/lib/providers/openRouter.d.ts +2 -2
  169. package/dist/lib/providers/openRouter.js +10 -6
  170. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  171. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  172. package/dist/lib/rag/ChunkerFactory.js +321 -0
  173. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  174. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  175. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  176. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  177. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  178. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  179. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  180. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  181. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  182. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  183. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  184. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  185. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  186. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  187. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  188. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  189. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  190. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  191. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  192. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  193. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  194. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  195. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  196. package/dist/lib/rag/chunkers/index.js +16 -0
  197. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  198. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  199. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  200. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  201. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  202. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  203. package/dist/lib/rag/chunking/index.d.ts +15 -0
  204. package/dist/lib/rag/chunking/index.js +18 -0
  205. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  206. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  207. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  208. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  209. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  210. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  211. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  212. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  213. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  214. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  215. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  216. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  217. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  218. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  219. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  220. package/dist/lib/rag/document/MDocument.js +393 -0
  221. package/dist/lib/rag/document/index.d.ts +5 -0
  222. package/dist/lib/rag/document/index.js +6 -0
  223. package/dist/lib/rag/document/loaders.d.ts +201 -0
  224. package/dist/lib/rag/document/loaders.js +501 -0
  225. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  226. package/dist/lib/rag/errors/RAGError.js +275 -0
  227. package/dist/lib/rag/errors/index.d.ts +6 -0
  228. package/dist/lib/rag/errors/index.js +7 -0
  229. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  230. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  231. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  232. package/dist/lib/rag/graphRag/index.js +5 -0
  233. package/dist/lib/rag/index.d.ts +103 -0
  234. package/dist/lib/rag/index.js +142 -0
  235. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  236. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  237. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  238. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  239. package/dist/lib/rag/metadata/index.d.ts +6 -0
  240. package/dist/lib/rag/metadata/index.js +10 -0
  241. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  242. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  243. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  244. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  245. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  246. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  247. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  248. package/dist/lib/rag/pipeline/index.js +6 -0
  249. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  250. package/dist/lib/rag/ragIntegration.js +212 -0
  251. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  252. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  253. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  254. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  255. package/dist/lib/rag/reranker/index.d.ts +6 -0
  256. package/dist/lib/rag/reranker/index.js +10 -0
  257. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  258. package/dist/lib/rag/reranker/reranker.js +278 -0
  259. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  260. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  261. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  262. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  263. package/dist/lib/rag/resilience/index.d.ts +7 -0
  264. package/dist/lib/rag/resilience/index.js +8 -0
  265. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  266. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  267. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  268. package/dist/lib/rag/retrieval/index.js +6 -0
  269. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  270. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  271. package/dist/lib/rag/types.d.ts +768 -0
  272. package/dist/lib/rag/types.js +9 -0
  273. package/dist/lib/server/index.d.ts +15 -11
  274. package/dist/lib/server/index.js +55 -51
  275. package/dist/lib/server/utils/validation.d.ts +2 -2
  276. package/dist/lib/types/common.d.ts +0 -1
  277. package/dist/lib/types/fileTypes.d.ts +1 -1
  278. package/dist/lib/types/generateTypes.d.ts +42 -8
  279. package/dist/lib/types/generateTypes.js +1 -1
  280. package/dist/lib/types/index.d.ts +25 -24
  281. package/dist/lib/types/index.js +21 -20
  282. package/dist/lib/types/modelTypes.d.ts +16 -16
  283. package/dist/lib/types/pptTypes.d.ts +14 -2
  284. package/dist/lib/types/pptTypes.js +16 -0
  285. package/dist/lib/types/streamTypes.d.ts +28 -8
  286. package/dist/lib/types/streamTypes.js +1 -1
  287. package/dist/lib/utils/async/delay.d.ts +40 -0
  288. package/dist/lib/utils/async/delay.js +43 -0
  289. package/dist/lib/utils/async/index.d.ts +23 -0
  290. package/dist/lib/utils/async/index.js +24 -0
  291. package/dist/lib/utils/async/retry.d.ts +141 -0
  292. package/dist/lib/utils/async/retry.js +172 -0
  293. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  294. package/dist/lib/utils/async/withTimeout.js +97 -0
  295. package/dist/lib/utils/fileDetector.d.ts +7 -1
  296. package/dist/lib/utils/fileDetector.js +91 -18
  297. package/dist/lib/utils/json/extract.d.ts +103 -0
  298. package/dist/lib/utils/json/extract.js +249 -0
  299. package/dist/lib/utils/json/index.d.ts +36 -0
  300. package/dist/lib/utils/json/index.js +37 -0
  301. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  302. package/dist/lib/utils/json/safeParse.js +191 -0
  303. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  304. package/dist/lib/utils/messageBuilder.js +15 -7
  305. package/dist/lib/utils/modelRouter.d.ts +4 -4
  306. package/dist/lib/utils/modelRouter.js +4 -4
  307. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  308. package/dist/lib/utils/sanitizers/filename.js +366 -0
  309. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  310. package/dist/lib/utils/sanitizers/html.js +326 -0
  311. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  312. package/dist/lib/utils/sanitizers/index.js +30 -0
  313. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  314. package/dist/lib/utils/sanitizers/svg.js +483 -0
  315. package/dist/mcp/index.d.ts +6 -5
  316. package/dist/mcp/index.js +7 -5
  317. package/dist/neurolink.d.ts +11 -13
  318. package/dist/neurolink.js +95 -29
  319. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  320. package/dist/processors/base/BaseFileProcessor.js +613 -0
  321. package/dist/processors/base/index.d.ts +14 -0
  322. package/dist/processors/base/index.js +19 -0
  323. package/dist/processors/base/types.d.ts +593 -0
  324. package/dist/processors/base/types.js +76 -0
  325. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  326. package/dist/processors/cli/fileProcessorCli.js +388 -0
  327. package/dist/processors/cli/index.d.ts +37 -0
  328. package/dist/processors/cli/index.js +49 -0
  329. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  330. package/dist/processors/code/ConfigProcessor.js +400 -0
  331. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  332. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  333. package/dist/processors/code/index.d.ts +44 -0
  334. package/dist/processors/code/index.js +60 -0
  335. package/dist/processors/config/fileTypes.d.ts +283 -0
  336. package/dist/processors/config/fileTypes.js +520 -0
  337. package/dist/processors/config/index.d.ts +32 -0
  338. package/dist/processors/config/index.js +92 -0
  339. package/dist/processors/config/languageMap.d.ts +66 -0
  340. package/dist/processors/config/languageMap.js +410 -0
  341. package/dist/processors/config/mimeTypes.d.ts +376 -0
  342. package/dist/processors/config/mimeTypes.js +338 -0
  343. package/dist/processors/config/sizeLimits.d.ts +194 -0
  344. package/dist/processors/config/sizeLimits.js +246 -0
  345. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  346. package/dist/processors/data/JsonProcessor.js +203 -0
  347. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  348. package/dist/processors/data/XmlProcessor.js +283 -0
  349. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  350. package/dist/processors/data/YamlProcessor.js +294 -0
  351. package/dist/processors/data/index.d.ts +49 -0
  352. package/dist/processors/data/index.js +76 -0
  353. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  354. package/dist/processors/document/ExcelProcessor.js +519 -0
  355. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  356. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  357. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  358. package/dist/processors/document/RtfProcessor.js +361 -0
  359. package/dist/processors/document/WordProcessor.d.ts +168 -0
  360. package/dist/processors/document/WordProcessor.js +353 -0
  361. package/dist/processors/document/index.d.ts +54 -0
  362. package/dist/processors/document/index.js +90 -0
  363. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  364. package/dist/processors/errors/FileErrorCode.js +255 -0
  365. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  366. package/dist/processors/errors/errorHelpers.js +378 -0
  367. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  368. package/dist/processors/errors/errorSerializer.js +507 -0
  369. package/dist/processors/errors/index.d.ts +46 -0
  370. package/dist/processors/errors/index.js +49 -0
  371. package/dist/processors/index.d.ts +76 -0
  372. package/dist/processors/index.js +112 -0
  373. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  374. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  375. package/dist/processors/integration/index.d.ts +42 -0
  376. package/dist/processors/integration/index.js +44 -0
  377. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  378. package/dist/processors/markup/HtmlProcessor.js +249 -0
  379. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  380. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  381. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  382. package/dist/processors/markup/SvgProcessor.js +240 -0
  383. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  384. package/dist/processors/markup/TextProcessor.js +188 -0
  385. package/dist/processors/markup/index.d.ts +66 -0
  386. package/dist/processors/markup/index.js +102 -0
  387. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  388. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  389. package/dist/processors/registry/index.d.ts +12 -0
  390. package/dist/processors/registry/index.js +16 -0
  391. package/dist/processors/registry/types.d.ts +53 -0
  392. package/dist/processors/registry/types.js +10 -0
  393. package/dist/providers/amazonBedrock.d.ts +15 -2
  394. package/dist/providers/amazonBedrock.js +65 -8
  395. package/dist/providers/anthropic.d.ts +3 -3
  396. package/dist/providers/anthropic.js +10 -7
  397. package/dist/providers/googleAiStudio.d.ts +5 -5
  398. package/dist/providers/googleAiStudio.js +10 -7
  399. package/dist/providers/googleVertex.d.ts +16 -4
  400. package/dist/providers/googleVertex.js +72 -16
  401. package/dist/providers/litellm.d.ts +3 -3
  402. package/dist/providers/litellm.js +10 -10
  403. package/dist/providers/mistral.d.ts +3 -3
  404. package/dist/providers/mistral.js +7 -6
  405. package/dist/providers/ollama.d.ts +3 -4
  406. package/dist/providers/ollama.js +7 -8
  407. package/dist/providers/openAI.d.ts +14 -2
  408. package/dist/providers/openAI.js +60 -6
  409. package/dist/providers/openRouter.d.ts +2 -2
  410. package/dist/providers/openRouter.js +10 -6
  411. package/dist/rag/ChunkerFactory.d.ts +91 -0
  412. package/dist/rag/ChunkerFactory.js +320 -0
  413. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  414. package/dist/rag/ChunkerRegistry.js +421 -0
  415. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  416. package/dist/rag/chunkers/BaseChunker.js +143 -0
  417. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  418. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  419. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  420. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  421. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  422. package/dist/rag/chunkers/JSONChunker.js +68 -0
  423. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  424. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  425. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  426. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  427. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  428. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  429. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  430. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  431. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  432. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  433. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  434. package/dist/rag/chunkers/TokenChunker.js +61 -0
  435. package/dist/rag/chunkers/index.d.ts +15 -0
  436. package/dist/rag/chunkers/index.js +15 -0
  437. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  438. package/dist/rag/chunking/characterChunker.js +142 -0
  439. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  440. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  441. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  442. package/dist/rag/chunking/htmlChunker.js +247 -0
  443. package/dist/rag/chunking/index.d.ts +15 -0
  444. package/dist/rag/chunking/index.js +17 -0
  445. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  446. package/dist/rag/chunking/jsonChunker.js +281 -0
  447. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  448. package/dist/rag/chunking/latexChunker.js +251 -0
  449. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  450. package/dist/rag/chunking/markdownChunker.js +201 -0
  451. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  452. package/dist/rag/chunking/recursiveChunker.js +148 -0
  453. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  454. package/dist/rag/chunking/semanticChunker.js +306 -0
  455. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  456. package/dist/rag/chunking/sentenceChunker.js +230 -0
  457. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  458. package/dist/rag/chunking/tokenChunker.js +183 -0
  459. package/dist/rag/document/MDocument.d.ts +198 -0
  460. package/dist/rag/document/MDocument.js +392 -0
  461. package/dist/rag/document/index.d.ts +5 -0
  462. package/dist/rag/document/index.js +5 -0
  463. package/dist/rag/document/loaders.d.ts +201 -0
  464. package/dist/rag/document/loaders.js +500 -0
  465. package/dist/rag/errors/RAGError.d.ts +244 -0
  466. package/dist/rag/errors/RAGError.js +274 -0
  467. package/dist/rag/errors/index.d.ts +6 -0
  468. package/dist/rag/errors/index.js +6 -0
  469. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  470. package/dist/rag/graphRag/graphRAG.js +384 -0
  471. package/dist/rag/graphRag/index.d.ts +4 -0
  472. package/dist/rag/graphRag/index.js +4 -0
  473. package/dist/rag/index.d.ts +103 -0
  474. package/dist/rag/index.js +141 -0
  475. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  476. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  477. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  478. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  479. package/dist/rag/metadata/index.d.ts +6 -0
  480. package/dist/rag/metadata/index.js +9 -0
  481. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  482. package/dist/rag/metadata/metadataExtractor.js +277 -0
  483. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  484. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  485. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  486. package/dist/rag/pipeline/contextAssembly.js +337 -0
  487. package/dist/rag/pipeline/index.d.ts +5 -0
  488. package/dist/rag/pipeline/index.js +5 -0
  489. package/dist/rag/ragIntegration.d.ts +38 -0
  490. package/dist/rag/ragIntegration.js +211 -0
  491. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  492. package/dist/rag/reranker/RerankerFactory.js +430 -0
  493. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  494. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  495. package/dist/rag/reranker/index.d.ts +6 -0
  496. package/dist/rag/reranker/index.js +9 -0
  497. package/dist/rag/reranker/reranker.d.ts +71 -0
  498. package/dist/rag/reranker/reranker.js +277 -0
  499. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  500. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  501. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  502. package/dist/rag/resilience/RetryHandler.js +300 -0
  503. package/dist/rag/resilience/index.d.ts +7 -0
  504. package/dist/rag/resilience/index.js +7 -0
  505. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  506. package/dist/rag/retrieval/hybridSearch.js +313 -0
  507. package/dist/rag/retrieval/index.d.ts +5 -0
  508. package/dist/rag/retrieval/index.js +5 -0
  509. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  510. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  511. package/dist/rag/types.d.ts +768 -0
  512. package/dist/rag/types.js +8 -0
  513. package/dist/server/index.d.ts +15 -11
  514. package/dist/server/index.js +55 -51
  515. package/dist/server/utils/validation.d.ts +8 -8
  516. package/dist/types/common.d.ts +0 -1
  517. package/dist/types/fileTypes.d.ts +1 -1
  518. package/dist/types/generateTypes.d.ts +42 -8
  519. package/dist/types/generateTypes.js +1 -1
  520. package/dist/types/index.d.ts +25 -24
  521. package/dist/types/index.js +21 -20
  522. package/dist/types/modelTypes.d.ts +10 -10
  523. package/dist/types/pptTypes.d.ts +14 -2
  524. package/dist/types/pptTypes.js +16 -0
  525. package/dist/types/streamTypes.d.ts +28 -8
  526. package/dist/types/streamTypes.js +1 -1
  527. package/dist/utils/async/delay.d.ts +40 -0
  528. package/dist/utils/async/delay.js +42 -0
  529. package/dist/utils/async/index.d.ts +23 -0
  530. package/dist/utils/async/index.js +23 -0
  531. package/dist/utils/async/retry.d.ts +141 -0
  532. package/dist/utils/async/retry.js +171 -0
  533. package/dist/utils/async/withTimeout.d.ts +73 -0
  534. package/dist/utils/async/withTimeout.js +96 -0
  535. package/dist/utils/fileDetector.d.ts +7 -1
  536. package/dist/utils/fileDetector.js +91 -18
  537. package/dist/utils/json/extract.d.ts +103 -0
  538. package/dist/utils/json/extract.js +248 -0
  539. package/dist/utils/json/index.d.ts +36 -0
  540. package/dist/utils/json/index.js +36 -0
  541. package/dist/utils/json/safeParse.d.ts +137 -0
  542. package/dist/utils/json/safeParse.js +190 -0
  543. package/dist/utils/messageBuilder.d.ts +2 -2
  544. package/dist/utils/messageBuilder.js +15 -7
  545. package/dist/utils/modelRouter.d.ts +4 -4
  546. package/dist/utils/modelRouter.js +4 -4
  547. package/dist/utils/sanitizers/filename.d.ts +137 -0
  548. package/dist/utils/sanitizers/filename.js +365 -0
  549. package/dist/utils/sanitizers/html.d.ts +170 -0
  550. package/dist/utils/sanitizers/html.js +325 -0
  551. package/dist/utils/sanitizers/index.d.ts +26 -0
  552. package/dist/utils/sanitizers/index.js +29 -0
  553. package/dist/utils/sanitizers/svg.d.ts +81 -0
  554. package/dist/utils/sanitizers/svg.js +482 -0
  555. package/package.json +2 -2
@@ -0,0 +1,157 @@
1
+ /**
2
+ * Metadata Extractor Factory
3
+ *
4
+ * Factory for creating metadata extractor instances with configuration.
5
+ * Follows the BaseFactory pattern for consistent lifecycle management.
6
+ */
7
+ import { BaseFactory } from "../../core/infrastructure/index.js";
8
+ import type { Chunk, ExtractionResult, ExtractParams } from "../types.js";
9
+ /**
10
+ * Supported metadata extractor types
11
+ */
12
+ export type MetadataExtractorType = "llm" | "title" | "summary" | "keywords" | "questions" | "custom" | "composite";
13
+ /**
14
+ * Metadata Extractor interface - all extractors implement this
15
+ */
16
+ export interface MetadataExtractor {
17
+ /** Extractor type identifier */
18
+ readonly type: MetadataExtractorType;
19
+ /**
20
+ * Extract metadata from chunks
21
+ * @param chunks - Array of chunks to extract metadata from
22
+ * @param params - Extraction parameters
23
+ * @returns Array of extraction results
24
+ */
25
+ extract(chunks: Chunk[], params?: ExtractParams): Promise<ExtractionResult[]>;
26
+ }
27
+ /**
28
+ * Metadata extractor configuration
29
+ */
30
+ export interface MetadataExtractorConfig {
31
+ /** Extractor type */
32
+ type: MetadataExtractorType;
33
+ /** Language model provider */
34
+ provider?: string;
35
+ /** Model name for LLM-based extraction */
36
+ modelName?: string;
37
+ /** Custom prompt template */
38
+ promptTemplate?: string;
39
+ /** Maximum tokens for LLM response */
40
+ maxTokens?: number;
41
+ /** Temperature for LLM generation */
42
+ temperature?: number;
43
+ }
44
+ /**
45
+ * Metadata extractor metadata for discovery and documentation
46
+ */
47
+ export interface MetadataExtractorMetadata {
48
+ /** Human-readable description */
49
+ description: string;
50
+ /** Default configuration */
51
+ defaultConfig: Partial<MetadataExtractorConfig>;
52
+ /** Supported configuration options */
53
+ supportedOptions: string[];
54
+ /** Recommended use cases */
55
+ useCases: string[];
56
+ /** Alternative names for this extractor */
57
+ aliases: string[];
58
+ /** Whether this extractor requires an AI model */
59
+ requiresModel: boolean;
60
+ /** Extraction types this extractor can produce */
61
+ extractionTypes: string[];
62
+ }
63
+ /**
64
+ * Metadata Extractor Factory
65
+ *
66
+ * Creates metadata extractor instances based on type with configuration support.
67
+ * Uses lazy loading via dynamic imports to avoid circular dependencies.
68
+ */
69
+ export declare class MetadataExtractorFactory extends BaseFactory<MetadataExtractor, MetadataExtractorConfig> {
70
+ private static instance;
71
+ private metadataMap;
72
+ private constructor();
73
+ /**
74
+ * Get singleton instance
75
+ */
76
+ static getInstance(): MetadataExtractorFactory;
77
+ /**
78
+ * Reset singleton (for testing)
79
+ */
80
+ static resetInstance(): void;
81
+ /**
82
+ * Register all default extractors
83
+ */
84
+ protected registerAll(): Promise<void>;
85
+ /**
86
+ * Wrap LLMMetadataExtractor to conform to MetadataExtractor interface
87
+ */
88
+ private wrapExtractor;
89
+ /**
90
+ * Create specialized extractor that only extracts specific types
91
+ */
92
+ private createSpecializedExtractor;
93
+ /**
94
+ * Register an extractor with metadata and aliases
95
+ */
96
+ registerExtractor(type: MetadataExtractorType, factory: (config?: MetadataExtractorConfig) => Promise<MetadataExtractor>, metadata: MetadataExtractorMetadata): void;
97
+ /**
98
+ * Create an extractor by type or alias
99
+ */
100
+ createExtractor(typeOrAlias: string, config?: MetadataExtractorConfig): Promise<MetadataExtractor>;
101
+ /**
102
+ * Get metadata for an extractor
103
+ */
104
+ getExtractorMetadata(typeOrAlias: string): MetadataExtractorMetadata | undefined;
105
+ /**
106
+ * Get default configuration for an extractor
107
+ */
108
+ getDefaultConfig(typeOrAlias: string): Partial<MetadataExtractorConfig> | undefined;
109
+ /**
110
+ * Get available extractor types (not including aliases)
111
+ */
112
+ getAvailableTypes(): MetadataExtractorType[];
113
+ /**
114
+ * Get all aliases mapped to their types
115
+ */
116
+ getTypeAliases(): Map<string, string>;
117
+ /**
118
+ * Check if a type exists
119
+ */
120
+ hasType(typeOrAlias: string): boolean;
121
+ /**
122
+ * Get extractors suitable for a use case
123
+ */
124
+ getExtractorsForUseCase(useCase: string): MetadataExtractorType[];
125
+ /**
126
+ * Get extractors that can produce a specific extraction type
127
+ */
128
+ getExtractorsForExtractionType(extractionType: string): MetadataExtractorType[];
129
+ /**
130
+ * Get all extractor metadata
131
+ */
132
+ getAllMetadata(): Map<MetadataExtractorType, MetadataExtractorMetadata>;
133
+ /**
134
+ * Clear factory and metadata
135
+ */
136
+ clear(): void;
137
+ }
138
+ /**
139
+ * Global metadata extractor factory singleton
140
+ */
141
+ export declare const metadataExtractorFactory: MetadataExtractorFactory;
142
+ /**
143
+ * Convenience function to create a metadata extractor
144
+ */
145
+ export declare function createMetadataExtractor(typeOrAlias: string, config?: MetadataExtractorConfig): Promise<MetadataExtractor>;
146
+ /**
147
+ * Convenience function to get available extractor types
148
+ */
149
+ export declare function getAvailableExtractorTypes(): MetadataExtractorType[];
150
+ /**
151
+ * Convenience function to get extractor metadata
152
+ */
153
+ export declare function getExtractorMetadata(typeOrAlias: string): MetadataExtractorMetadata | undefined;
154
+ /**
155
+ * Convenience function to get default config
156
+ */
157
+ export declare function getExtractorDefaultConfig(typeOrAlias: string): Partial<MetadataExtractorConfig> | undefined;
@@ -0,0 +1,418 @@
1
+ /**
2
+ * Metadata Extractor Factory
3
+ *
4
+ * Factory for creating metadata extractor instances with configuration.
5
+ * Follows the BaseFactory pattern for consistent lifecycle management.
6
+ */
7
+ import { BaseFactory } from "../../core/infrastructure/index.js";
8
+ import { logger } from "../../utils/logger.js";
9
+ import { MetadataExtractionError, RAGErrorCodes } from "../errors/RAGError.js";
10
+ /**
11
+ * Default metadata extractor metadata entries
12
+ */
13
+ const DEFAULT_EXTRACTOR_METADATA = {
14
+ llm: {
15
+ description: "Full LLM-powered metadata extraction supporting all extraction types",
16
+ defaultConfig: {
17
+ provider: "openai",
18
+ modelName: "gpt-4o-mini",
19
+ temperature: 0.3,
20
+ },
21
+ supportedOptions: [
22
+ "provider",
23
+ "modelName",
24
+ "promptTemplate",
25
+ "maxTokens",
26
+ "temperature",
27
+ ],
28
+ useCases: [
29
+ "Comprehensive metadata extraction",
30
+ "Multi-type extraction in single pass",
31
+ "Custom schema extraction",
32
+ ],
33
+ aliases: ["full", "comprehensive", "all"],
34
+ requiresModel: true,
35
+ extractionTypes: ["title", "summary", "keywords", "questions", "custom"],
36
+ },
37
+ title: {
38
+ description: "Extracts concise, descriptive titles from document content",
39
+ defaultConfig: {
40
+ provider: "openai",
41
+ modelName: "gpt-4o-mini",
42
+ maxTokens: 100,
43
+ },
44
+ supportedOptions: ["provider", "modelName", "promptTemplate", "maxTokens"],
45
+ useCases: [
46
+ "Document indexing",
47
+ "Content organization",
48
+ "Navigation systems",
49
+ ],
50
+ aliases: ["header", "heading"],
51
+ requiresModel: true,
52
+ extractionTypes: ["title"],
53
+ },
54
+ summary: {
55
+ description: "Generates concise summaries of document chunks",
56
+ defaultConfig: {
57
+ provider: "openai",
58
+ modelName: "gpt-4o-mini",
59
+ maxTokens: 200,
60
+ },
61
+ supportedOptions: [
62
+ "provider",
63
+ "modelName",
64
+ "promptTemplate",
65
+ "maxTokens",
66
+ "maxWords",
67
+ ],
68
+ useCases: [
69
+ "Document previews",
70
+ "Search result snippets",
71
+ "Content condensation",
72
+ ],
73
+ aliases: ["summarize", "abstract"],
74
+ requiresModel: true,
75
+ extractionTypes: ["summary"],
76
+ },
77
+ keywords: {
78
+ description: "Extracts key terms and phrases from content",
79
+ defaultConfig: {
80
+ provider: "openai",
81
+ modelName: "gpt-4o-mini",
82
+ maxTokens: 100,
83
+ },
84
+ supportedOptions: [
85
+ "provider",
86
+ "modelName",
87
+ "promptTemplate",
88
+ "maxKeywords",
89
+ ],
90
+ useCases: ["Tag generation", "Topic modeling", "Search optimization"],
91
+ aliases: ["tags", "terms", "keyphrase"],
92
+ requiresModel: true,
93
+ extractionTypes: ["keywords"],
94
+ },
95
+ questions: {
96
+ description: "Generates Q&A pairs from content for training or FAQs",
97
+ defaultConfig: {
98
+ provider: "openai",
99
+ modelName: "gpt-4o-mini",
100
+ maxTokens: 500,
101
+ },
102
+ supportedOptions: [
103
+ "provider",
104
+ "modelName",
105
+ "promptTemplate",
106
+ "numQuestions",
107
+ "includeAnswers",
108
+ ],
109
+ useCases: [
110
+ "FAQ generation",
111
+ "Training data creation",
112
+ "Knowledge base building",
113
+ ],
114
+ aliases: ["qa", "faq", "questions-answers"],
115
+ requiresModel: true,
116
+ extractionTypes: ["questions"],
117
+ },
118
+ custom: {
119
+ description: "Extracts structured data according to custom schema",
120
+ defaultConfig: {
121
+ provider: "openai",
122
+ modelName: "gpt-4o-mini",
123
+ maxTokens: 500,
124
+ },
125
+ supportedOptions: [
126
+ "provider",
127
+ "modelName",
128
+ "promptTemplate",
129
+ "schema",
130
+ "description",
131
+ ],
132
+ useCases: [
133
+ "Structured data extraction",
134
+ "Entity extraction",
135
+ "Custom field extraction",
136
+ ],
137
+ aliases: ["schema", "structured", "entity"],
138
+ requiresModel: true,
139
+ extractionTypes: ["custom"],
140
+ },
141
+ composite: {
142
+ description: "Combines multiple extraction types in a single pass",
143
+ defaultConfig: {
144
+ provider: "openai",
145
+ modelName: "gpt-4o-mini",
146
+ },
147
+ supportedOptions: ["provider", "modelName", "extractors"],
148
+ useCases: [
149
+ "Multi-field extraction",
150
+ "Complete document processing",
151
+ "Pipeline integration",
152
+ ],
153
+ aliases: ["multi", "combined", "batch"],
154
+ requiresModel: true,
155
+ extractionTypes: ["title", "summary", "keywords", "questions", "custom"],
156
+ },
157
+ };
158
+ /**
159
+ * Metadata Extractor Factory
160
+ *
161
+ * Creates metadata extractor instances based on type with configuration support.
162
+ * Uses lazy loading via dynamic imports to avoid circular dependencies.
163
+ */
164
+ export class MetadataExtractorFactory extends BaseFactory {
165
+ static instance = null;
166
+ metadataMap = new Map();
167
+ constructor() {
168
+ super();
169
+ }
170
+ /**
171
+ * Get singleton instance
172
+ */
173
+ static getInstance() {
174
+ if (!MetadataExtractorFactory.instance) {
175
+ MetadataExtractorFactory.instance = new MetadataExtractorFactory();
176
+ }
177
+ return MetadataExtractorFactory.instance;
178
+ }
179
+ /**
180
+ * Reset singleton (for testing)
181
+ */
182
+ static resetInstance() {
183
+ if (MetadataExtractorFactory.instance) {
184
+ MetadataExtractorFactory.instance.clear();
185
+ MetadataExtractorFactory.instance = null;
186
+ }
187
+ }
188
+ /**
189
+ * Register all default extractors
190
+ */
191
+ async registerAll() {
192
+ // Register full LLM extractor
193
+ this.registerExtractor("llm", async (config) => {
194
+ const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
195
+ return this.wrapExtractor(new LLMMetadataExtractor({
196
+ provider: config?.provider,
197
+ modelName: config?.modelName,
198
+ }), "llm");
199
+ }, DEFAULT_EXTRACTOR_METADATA.llm);
200
+ // Register title extractor
201
+ this.registerExtractor("title", async (config) => {
202
+ const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
203
+ return this.createSpecializedExtractor(new LLMMetadataExtractor({
204
+ provider: config?.provider,
205
+ modelName: config?.modelName,
206
+ }), "title", { title: true });
207
+ }, DEFAULT_EXTRACTOR_METADATA.title);
208
+ // Register summary extractor
209
+ this.registerExtractor("summary", async (config) => {
210
+ const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
211
+ return this.createSpecializedExtractor(new LLMMetadataExtractor({
212
+ provider: config?.provider,
213
+ modelName: config?.modelName,
214
+ }), "summary", { summary: true });
215
+ }, DEFAULT_EXTRACTOR_METADATA.summary);
216
+ // Register keywords extractor
217
+ this.registerExtractor("keywords", async (config) => {
218
+ const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
219
+ return this.createSpecializedExtractor(new LLMMetadataExtractor({
220
+ provider: config?.provider,
221
+ modelName: config?.modelName,
222
+ }), "keywords", { keywords: true });
223
+ }, DEFAULT_EXTRACTOR_METADATA.keywords);
224
+ // Register questions extractor
225
+ this.registerExtractor("questions", async (config) => {
226
+ const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
227
+ return this.createSpecializedExtractor(new LLMMetadataExtractor({
228
+ provider: config?.provider,
229
+ modelName: config?.modelName,
230
+ }), "questions", { questions: true });
231
+ }, DEFAULT_EXTRACTOR_METADATA.questions);
232
+ // Register custom extractor
233
+ this.registerExtractor("custom", async (config) => {
234
+ const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
235
+ return this.wrapExtractor(new LLMMetadataExtractor({
236
+ provider: config?.provider,
237
+ modelName: config?.modelName,
238
+ }), "custom");
239
+ }, DEFAULT_EXTRACTOR_METADATA.custom);
240
+ // Register composite extractor
241
+ this.registerExtractor("composite", async (config) => {
242
+ const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
243
+ return this.wrapExtractor(new LLMMetadataExtractor({
244
+ provider: config?.provider,
245
+ modelName: config?.modelName,
246
+ }), "composite");
247
+ }, DEFAULT_EXTRACTOR_METADATA.composite);
248
+ logger.debug(`[MetadataExtractorFactory] Registered ${this.items.size} extractor types`);
249
+ }
250
+ /**
251
+ * Wrap LLMMetadataExtractor to conform to MetadataExtractor interface
252
+ */
253
+ wrapExtractor(extractor, type) {
254
+ return {
255
+ type,
256
+ async extract(chunks, params) {
257
+ return extractor.extract(chunks, params ?? {});
258
+ },
259
+ };
260
+ }
261
+ /**
262
+ * Create specialized extractor that only extracts specific types
263
+ */
264
+ createSpecializedExtractor(extractor, type, defaultParams) {
265
+ return {
266
+ type,
267
+ async extract(chunks, params) {
268
+ // Merge default params with any provided params
269
+ const mergedParams = { ...defaultParams, ...params };
270
+ return extractor.extract(chunks, mergedParams);
271
+ },
272
+ };
273
+ }
274
+ /**
275
+ * Register an extractor with metadata and aliases
276
+ */
277
+ registerExtractor(type, factory, metadata) {
278
+ // Store metadata
279
+ this.metadataMap.set(type, metadata);
280
+ // Register with aliases
281
+ this.register(type, factory, metadata.aliases, { metadata });
282
+ logger.debug(`[MetadataExtractorFactory] Registered extractor '${type}' with aliases: ${metadata.aliases.join(", ")}`);
283
+ }
284
+ /**
285
+ * Create an extractor by type or alias
286
+ */
287
+ async createExtractor(typeOrAlias, config) {
288
+ await this.ensureInitialized();
289
+ const resolvedName = this.resolveName(typeOrAlias);
290
+ if (!this.has(resolvedName)) {
291
+ const available = this.getAvailable();
292
+ throw new MetadataExtractionError(`Unknown metadata extractor type: '${typeOrAlias}'. Available types: ${available.join(", ")}`, {
293
+ code: RAGErrorCodes.METADATA_EXTRACTOR_NOT_FOUND,
294
+ extractorType: typeOrAlias,
295
+ details: {
296
+ requestedType: typeOrAlias,
297
+ availableTypes: available,
298
+ },
299
+ });
300
+ }
301
+ try {
302
+ const extractor = await this.create(resolvedName, config);
303
+ logger.debug(`[MetadataExtractorFactory] Created extractor '${resolvedName}' with config:`, config);
304
+ return extractor;
305
+ }
306
+ catch (error) {
307
+ // Re-throw if already a MetadataExtractionError
308
+ if (error instanceof MetadataExtractionError) {
309
+ throw error;
310
+ }
311
+ throw new MetadataExtractionError(`Failed to create extractor '${resolvedName}': ${error instanceof Error ? error.message : String(error)}`, {
312
+ extractorType: resolvedName,
313
+ cause: error instanceof Error ? error : undefined,
314
+ details: { type: resolvedName, config },
315
+ });
316
+ }
317
+ }
318
+ /**
319
+ * Get metadata for an extractor
320
+ */
321
+ getExtractorMetadata(typeOrAlias) {
322
+ const resolvedName = this.resolveName(typeOrAlias);
323
+ return this.metadataMap.get(resolvedName);
324
+ }
325
+ /**
326
+ * Get default configuration for an extractor
327
+ */
328
+ getDefaultConfig(typeOrAlias) {
329
+ const metadata = this.getExtractorMetadata(typeOrAlias);
330
+ return metadata?.defaultConfig;
331
+ }
332
+ /**
333
+ * Get available extractor types (not including aliases)
334
+ */
335
+ getAvailableTypes() {
336
+ return this.getAvailable();
337
+ }
338
+ /**
339
+ * Get all aliases mapped to their types
340
+ */
341
+ getTypeAliases() {
342
+ return this.getAliases();
343
+ }
344
+ /**
345
+ * Check if a type exists
346
+ */
347
+ hasType(typeOrAlias) {
348
+ const resolved = this.resolveName(typeOrAlias);
349
+ return this.has(resolved);
350
+ }
351
+ /**
352
+ * Get extractors suitable for a use case
353
+ */
354
+ getExtractorsForUseCase(useCase) {
355
+ const matches = [];
356
+ const useCaseLower = useCase.toLowerCase();
357
+ for (const [type, metadata] of this.metadataMap) {
358
+ const hasMatch = metadata.useCases.some((uc) => uc.toLowerCase().includes(useCaseLower));
359
+ if (hasMatch) {
360
+ matches.push(type);
361
+ }
362
+ }
363
+ return matches;
364
+ }
365
+ /**
366
+ * Get extractors that can produce a specific extraction type
367
+ */
368
+ getExtractorsForExtractionType(extractionType) {
369
+ const matches = [];
370
+ for (const [type, metadata] of this.metadataMap) {
371
+ if (metadata.extractionTypes.includes(extractionType)) {
372
+ matches.push(type);
373
+ }
374
+ }
375
+ return matches;
376
+ }
377
+ /**
378
+ * Get all extractor metadata
379
+ */
380
+ getAllMetadata() {
381
+ return new Map(this.metadataMap);
382
+ }
383
+ /**
384
+ * Clear factory and metadata
385
+ */
386
+ clear() {
387
+ super.clear();
388
+ this.metadataMap.clear();
389
+ }
390
+ }
391
+ /**
392
+ * Global metadata extractor factory singleton
393
+ */
394
+ export const metadataExtractorFactory = MetadataExtractorFactory.getInstance();
395
+ /**
396
+ * Convenience function to create a metadata extractor
397
+ */
398
+ export async function createMetadataExtractor(typeOrAlias, config) {
399
+ return metadataExtractorFactory.createExtractor(typeOrAlias, config);
400
+ }
401
+ /**
402
+ * Convenience function to get available extractor types
403
+ */
404
+ export function getAvailableExtractorTypes() {
405
+ return metadataExtractorFactory.getAvailableTypes();
406
+ }
407
+ /**
408
+ * Convenience function to get extractor metadata
409
+ */
410
+ export function getExtractorMetadata(typeOrAlias) {
411
+ return metadataExtractorFactory.getExtractorMetadata(typeOrAlias);
412
+ }
413
+ /**
414
+ * Convenience function to get default config
415
+ */
416
+ export function getExtractorDefaultConfig(typeOrAlias) {
417
+ return metadataExtractorFactory.getDefaultConfig(typeOrAlias);
418
+ }
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Metadata Extractor Registry
3
+ *
4
+ * Centralized registry for all metadata extractor implementations with metadata
5
+ * and discovery capabilities. Follows the BaseRegistry pattern.
6
+ */
7
+ import { BaseRegistry } from "../../core/infrastructure/index.js";
8
+ import type { MetadataExtractor, MetadataExtractorConfig, MetadataExtractorMetadata, MetadataExtractorType } from "./MetadataExtractorFactory.js";
9
+ /**
10
+ * Metadata Extractor Registry
11
+ *
12
+ * Manages registration and discovery of all metadata extractor implementations.
13
+ * Extends BaseRegistry for consistent lifecycle management.
14
+ */
15
+ export declare class MetadataExtractorRegistry extends BaseRegistry<MetadataExtractor, MetadataExtractorMetadata> {
16
+ private static instance;
17
+ private aliasMap;
18
+ private constructor();
19
+ /**
20
+ * Get singleton instance
21
+ */
22
+ static getInstance(): MetadataExtractorRegistry;
23
+ /**
24
+ * Reset singleton (for testing)
25
+ */
26
+ static resetInstance(): void;
27
+ /**
28
+ * Register all built-in extractors
29
+ */
30
+ protected registerAll(): Promise<void>;
31
+ /**
32
+ * Create extractor instance wrapper
33
+ */
34
+ private createExtractorInstance;
35
+ /**
36
+ * Register an extractor with aliases
37
+ */
38
+ registerExtractor(type: MetadataExtractorType, factory: () => Promise<MetadataExtractor>, metadata: MetadataExtractorMetadata): void;
39
+ /**
40
+ * Resolve type from alias
41
+ */
42
+ resolveType(nameOrAlias: string): MetadataExtractorType;
43
+ /**
44
+ * Get an extractor by type or alias
45
+ */
46
+ getExtractor(typeOrAlias: string): Promise<MetadataExtractor>;
47
+ /**
48
+ * Get list of available extractor types
49
+ */
50
+ getAvailableExtractors(): MetadataExtractorType[];
51
+ /**
52
+ * Get metadata for a specific extractor
53
+ */
54
+ getExtractorMetadata(typeOrAlias: string): MetadataExtractorMetadata | undefined;
55
+ /**
56
+ * Get all aliases for a type
57
+ */
58
+ getAliasesForType(type: MetadataExtractorType): string[];
59
+ /**
60
+ * Get all registered aliases
61
+ */
62
+ getAllAliases(): Map<string, MetadataExtractorType>;
63
+ /**
64
+ * Check if a type or alias exists
65
+ */
66
+ hasExtractor(typeOrAlias: string): boolean;
67
+ /**
68
+ * Get extractors by use case
69
+ */
70
+ getExtractorsByUseCase(useCase: string): MetadataExtractorType[];
71
+ /**
72
+ * Get extractors that can produce a specific extraction type
73
+ */
74
+ getExtractorsByExtractionType(extractionType: string): MetadataExtractorType[];
75
+ /**
76
+ * Get default configuration for an extractor
77
+ */
78
+ getDefaultConfig(typeOrAlias: string): Partial<MetadataExtractorConfig> | undefined;
79
+ /**
80
+ * Clear the registry (also clears aliases)
81
+ */
82
+ clear(): void;
83
+ }
84
+ /**
85
+ * Global metadata extractor registry singleton
86
+ */
87
+ export declare const metadataExtractorRegistry: MetadataExtractorRegistry;
88
+ /**
89
+ * Convenience function to get available extractors
90
+ */
91
+ export declare function getAvailableExtractors(): MetadataExtractorType[];
92
+ /**
93
+ * Convenience function to get extractor by type
94
+ */
95
+ export declare function getExtractor(typeOrAlias: string): Promise<MetadataExtractor>;
96
+ /**
97
+ * Convenience function to get extractor metadata
98
+ */
99
+ export declare function getRegisteredExtractorMetadata(typeOrAlias: string): MetadataExtractorMetadata | undefined;