@juspay/neurolink 9.2.0 → 9.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/README.md +52 -30
  3. package/dist/agent/directTools.d.ts +8 -8
  4. package/dist/cli/commands/config.d.ts +3 -3
  5. package/dist/cli/commands/rag.d.ts +19 -0
  6. package/dist/cli/commands/rag.js +756 -0
  7. package/dist/cli/factories/commandFactory.js +146 -83
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/core/baseProvider.d.ts +43 -30
  10. package/dist/core/baseProvider.js +98 -138
  11. package/dist/core/conversationMemoryFactory.d.ts +2 -2
  12. package/dist/core/conversationMemoryFactory.js +2 -2
  13. package/dist/core/conversationMemoryInitializer.d.ts +1 -2
  14. package/dist/core/conversationMemoryInitializer.js +2 -2
  15. package/dist/core/infrastructure/baseError.d.ts +21 -0
  16. package/dist/core/infrastructure/baseError.js +22 -0
  17. package/dist/core/infrastructure/baseFactory.d.ts +21 -0
  18. package/dist/core/infrastructure/baseFactory.js +54 -0
  19. package/dist/core/infrastructure/baseRegistry.d.ts +21 -0
  20. package/dist/core/infrastructure/baseRegistry.js +49 -0
  21. package/dist/core/infrastructure/index.d.ts +5 -0
  22. package/dist/core/infrastructure/index.js +5 -0
  23. package/dist/core/infrastructure/retry.d.ts +7 -0
  24. package/dist/core/infrastructure/retry.js +20 -0
  25. package/dist/core/infrastructure/typedEventEmitter.d.ts +8 -0
  26. package/dist/core/infrastructure/typedEventEmitter.js +23 -0
  27. package/dist/core/redisConversationMemoryManager.d.ts +1 -6
  28. package/dist/core/redisConversationMemoryManager.js +7 -19
  29. package/dist/factories/providerFactory.d.ts +5 -3
  30. package/dist/factories/providerFactory.js +31 -24
  31. package/dist/index.d.ts +46 -12
  32. package/dist/index.js +88 -36
  33. package/dist/lib/agent/directTools.d.ts +5 -5
  34. package/dist/lib/core/baseProvider.d.ts +43 -30
  35. package/dist/lib/core/baseProvider.js +98 -138
  36. package/dist/lib/core/conversationMemoryFactory.d.ts +2 -2
  37. package/dist/lib/core/conversationMemoryFactory.js +2 -2
  38. package/dist/lib/core/conversationMemoryInitializer.d.ts +1 -2
  39. package/dist/lib/core/conversationMemoryInitializer.js +2 -2
  40. package/dist/lib/core/infrastructure/baseError.d.ts +21 -0
  41. package/dist/lib/core/infrastructure/baseError.js +23 -0
  42. package/dist/lib/core/infrastructure/baseFactory.d.ts +21 -0
  43. package/dist/lib/core/infrastructure/baseFactory.js +55 -0
  44. package/dist/lib/core/infrastructure/baseRegistry.d.ts +21 -0
  45. package/dist/lib/core/infrastructure/baseRegistry.js +50 -0
  46. package/dist/lib/core/infrastructure/index.d.ts +5 -0
  47. package/dist/lib/core/infrastructure/index.js +6 -0
  48. package/dist/lib/core/infrastructure/retry.d.ts +7 -0
  49. package/dist/lib/core/infrastructure/retry.js +21 -0
  50. package/dist/lib/core/infrastructure/typedEventEmitter.d.ts +8 -0
  51. package/dist/lib/core/infrastructure/typedEventEmitter.js +24 -0
  52. package/dist/lib/core/redisConversationMemoryManager.d.ts +1 -6
  53. package/dist/lib/core/redisConversationMemoryManager.js +7 -19
  54. package/dist/lib/factories/providerFactory.d.ts +5 -3
  55. package/dist/lib/factories/providerFactory.js +31 -24
  56. package/dist/lib/index.d.ts +46 -12
  57. package/dist/lib/index.js +88 -36
  58. package/dist/lib/mcp/index.d.ts +6 -5
  59. package/dist/lib/mcp/index.js +7 -5
  60. package/dist/lib/neurolink.d.ts +11 -13
  61. package/dist/lib/neurolink.js +95 -29
  62. package/dist/lib/providers/amazonBedrock.d.ts +15 -2
  63. package/dist/lib/providers/amazonBedrock.js +65 -8
  64. package/dist/lib/providers/anthropic.d.ts +3 -3
  65. package/dist/lib/providers/anthropic.js +10 -7
  66. package/dist/lib/providers/googleAiStudio.d.ts +5 -5
  67. package/dist/lib/providers/googleAiStudio.js +10 -7
  68. package/dist/lib/providers/googleVertex.d.ts +16 -4
  69. package/dist/lib/providers/googleVertex.js +72 -16
  70. package/dist/lib/providers/litellm.d.ts +3 -3
  71. package/dist/lib/providers/litellm.js +10 -10
  72. package/dist/lib/providers/mistral.d.ts +3 -3
  73. package/dist/lib/providers/mistral.js +7 -6
  74. package/dist/lib/providers/ollama.d.ts +3 -4
  75. package/dist/lib/providers/ollama.js +7 -8
  76. package/dist/lib/providers/openAI.d.ts +14 -2
  77. package/dist/lib/providers/openAI.js +60 -6
  78. package/dist/lib/providers/openRouter.d.ts +2 -2
  79. package/dist/lib/providers/openRouter.js +10 -6
  80. package/dist/lib/rag/ChunkerFactory.d.ts +91 -0
  81. package/dist/lib/rag/ChunkerFactory.js +321 -0
  82. package/dist/lib/rag/ChunkerRegistry.d.ts +91 -0
  83. package/dist/lib/rag/ChunkerRegistry.js +422 -0
  84. package/dist/lib/rag/chunkers/BaseChunker.d.ts +53 -0
  85. package/dist/lib/rag/chunkers/BaseChunker.js +144 -0
  86. package/dist/lib/rag/chunkers/CharacterChunker.d.ts +18 -0
  87. package/dist/lib/rag/chunkers/CharacterChunker.js +29 -0
  88. package/dist/lib/rag/chunkers/HTMLChunker.d.ts +19 -0
  89. package/dist/lib/rag/chunkers/HTMLChunker.js +39 -0
  90. package/dist/lib/rag/chunkers/JSONChunker.d.ts +19 -0
  91. package/dist/lib/rag/chunkers/JSONChunker.js +69 -0
  92. package/dist/lib/rag/chunkers/LaTeXChunker.d.ts +15 -0
  93. package/dist/lib/rag/chunkers/LaTeXChunker.js +64 -0
  94. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +15 -0
  95. package/dist/lib/rag/chunkers/MarkdownChunker.js +103 -0
  96. package/dist/lib/rag/chunkers/RecursiveChunker.d.ts +27 -0
  97. package/dist/lib/rag/chunkers/RecursiveChunker.js +140 -0
  98. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  99. package/dist/lib/rag/chunkers/SemanticMarkdownChunker.js +139 -0
  100. package/dist/lib/rag/chunkers/SentenceChunker.d.ts +19 -0
  101. package/dist/lib/rag/chunkers/SentenceChunker.js +67 -0
  102. package/dist/lib/rag/chunkers/TokenChunker.d.ts +19 -0
  103. package/dist/lib/rag/chunkers/TokenChunker.js +62 -0
  104. package/dist/lib/rag/chunkers/index.d.ts +15 -0
  105. package/dist/lib/rag/chunkers/index.js +16 -0
  106. package/dist/lib/rag/chunking/characterChunker.d.ts +16 -0
  107. package/dist/lib/rag/chunking/characterChunker.js +143 -0
  108. package/dist/lib/rag/chunking/chunkerRegistry.d.ts +67 -0
  109. package/dist/lib/rag/chunking/chunkerRegistry.js +195 -0
  110. package/dist/lib/rag/chunking/htmlChunker.d.ts +34 -0
  111. package/dist/lib/rag/chunking/htmlChunker.js +248 -0
  112. package/dist/lib/rag/chunking/index.d.ts +15 -0
  113. package/dist/lib/rag/chunking/index.js +18 -0
  114. package/dist/lib/rag/chunking/jsonChunker.d.ts +20 -0
  115. package/dist/lib/rag/chunking/jsonChunker.js +282 -0
  116. package/dist/lib/rag/chunking/latexChunker.d.ts +26 -0
  117. package/dist/lib/rag/chunking/latexChunker.js +252 -0
  118. package/dist/lib/rag/chunking/markdownChunker.d.ts +19 -0
  119. package/dist/lib/rag/chunking/markdownChunker.js +202 -0
  120. package/dist/lib/rag/chunking/recursiveChunker.d.ts +19 -0
  121. package/dist/lib/rag/chunking/recursiveChunker.js +149 -0
  122. package/dist/lib/rag/chunking/semanticChunker.d.ts +41 -0
  123. package/dist/lib/rag/chunking/semanticChunker.js +307 -0
  124. package/dist/lib/rag/chunking/sentenceChunker.d.ts +25 -0
  125. package/dist/lib/rag/chunking/sentenceChunker.js +231 -0
  126. package/dist/lib/rag/chunking/tokenChunker.d.ts +36 -0
  127. package/dist/lib/rag/chunking/tokenChunker.js +184 -0
  128. package/dist/lib/rag/document/MDocument.d.ts +198 -0
  129. package/dist/lib/rag/document/MDocument.js +393 -0
  130. package/dist/lib/rag/document/index.d.ts +5 -0
  131. package/dist/lib/rag/document/index.js +6 -0
  132. package/dist/lib/rag/document/loaders.d.ts +201 -0
  133. package/dist/lib/rag/document/loaders.js +501 -0
  134. package/dist/lib/rag/errors/RAGError.d.ts +244 -0
  135. package/dist/lib/rag/errors/RAGError.js +275 -0
  136. package/dist/lib/rag/errors/index.d.ts +6 -0
  137. package/dist/lib/rag/errors/index.js +7 -0
  138. package/dist/lib/rag/graphRag/graphRAG.d.ts +115 -0
  139. package/dist/lib/rag/graphRag/graphRAG.js +385 -0
  140. package/dist/lib/rag/graphRag/index.d.ts +4 -0
  141. package/dist/lib/rag/graphRag/index.js +5 -0
  142. package/dist/lib/rag/index.d.ts +103 -0
  143. package/dist/lib/rag/index.js +142 -0
  144. package/dist/lib/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  145. package/dist/lib/rag/metadata/MetadataExtractorFactory.js +419 -0
  146. package/dist/lib/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  147. package/dist/lib/rag/metadata/MetadataExtractorRegistry.js +363 -0
  148. package/dist/lib/rag/metadata/index.d.ts +6 -0
  149. package/dist/lib/rag/metadata/index.js +10 -0
  150. package/dist/lib/rag/metadata/metadataExtractor.d.ts +69 -0
  151. package/dist/lib/rag/metadata/metadataExtractor.js +278 -0
  152. package/dist/lib/rag/pipeline/RAGPipeline.d.ts +235 -0
  153. package/dist/lib/rag/pipeline/RAGPipeline.js +402 -0
  154. package/dist/lib/rag/pipeline/contextAssembly.d.ts +126 -0
  155. package/dist/lib/rag/pipeline/contextAssembly.js +338 -0
  156. package/dist/lib/rag/pipeline/index.d.ts +5 -0
  157. package/dist/lib/rag/pipeline/index.js +6 -0
  158. package/dist/lib/rag/ragIntegration.d.ts +38 -0
  159. package/dist/lib/rag/ragIntegration.js +212 -0
  160. package/dist/lib/rag/reranker/RerankerFactory.d.ts +184 -0
  161. package/dist/lib/rag/reranker/RerankerFactory.js +431 -0
  162. package/dist/lib/rag/reranker/RerankerRegistry.d.ts +119 -0
  163. package/dist/lib/rag/reranker/RerankerRegistry.js +403 -0
  164. package/dist/lib/rag/reranker/index.d.ts +6 -0
  165. package/dist/lib/rag/reranker/index.js +10 -0
  166. package/dist/lib/rag/reranker/reranker.d.ts +71 -0
  167. package/dist/lib/rag/reranker/reranker.js +278 -0
  168. package/dist/lib/rag/resilience/CircuitBreaker.d.ts +215 -0
  169. package/dist/lib/rag/resilience/CircuitBreaker.js +432 -0
  170. package/dist/lib/rag/resilience/RetryHandler.d.ts +115 -0
  171. package/dist/lib/rag/resilience/RetryHandler.js +301 -0
  172. package/dist/lib/rag/resilience/index.d.ts +7 -0
  173. package/dist/lib/rag/resilience/index.js +8 -0
  174. package/dist/lib/rag/retrieval/hybridSearch.d.ts +94 -0
  175. package/dist/lib/rag/retrieval/hybridSearch.js +314 -0
  176. package/dist/lib/rag/retrieval/index.d.ts +5 -0
  177. package/dist/lib/rag/retrieval/index.js +6 -0
  178. package/dist/lib/rag/retrieval/vectorQueryTool.d.ts +93 -0
  179. package/dist/lib/rag/retrieval/vectorQueryTool.js +290 -0
  180. package/dist/lib/rag/types.d.ts +768 -0
  181. package/dist/lib/rag/types.js +9 -0
  182. package/dist/lib/server/index.d.ts +15 -11
  183. package/dist/lib/server/index.js +55 -51
  184. package/dist/lib/server/utils/validation.d.ts +8 -8
  185. package/dist/lib/types/common.d.ts +0 -1
  186. package/dist/lib/types/generateTypes.d.ts +42 -8
  187. package/dist/lib/types/generateTypes.js +1 -1
  188. package/dist/lib/types/modelTypes.d.ts +2 -2
  189. package/dist/lib/types/streamTypes.d.ts +28 -8
  190. package/dist/lib/types/streamTypes.js +1 -1
  191. package/dist/lib/utils/modelRouter.d.ts +4 -4
  192. package/dist/lib/utils/modelRouter.js +4 -4
  193. package/dist/mcp/index.d.ts +6 -5
  194. package/dist/mcp/index.js +7 -5
  195. package/dist/neurolink.d.ts +11 -13
  196. package/dist/neurolink.js +95 -29
  197. package/dist/providers/amazonBedrock.d.ts +15 -2
  198. package/dist/providers/amazonBedrock.js +65 -8
  199. package/dist/providers/anthropic.d.ts +3 -3
  200. package/dist/providers/anthropic.js +10 -7
  201. package/dist/providers/googleAiStudio.d.ts +5 -5
  202. package/dist/providers/googleAiStudio.js +10 -7
  203. package/dist/providers/googleVertex.d.ts +16 -4
  204. package/dist/providers/googleVertex.js +72 -16
  205. package/dist/providers/litellm.d.ts +3 -3
  206. package/dist/providers/litellm.js +10 -10
  207. package/dist/providers/mistral.d.ts +3 -3
  208. package/dist/providers/mistral.js +7 -6
  209. package/dist/providers/ollama.d.ts +3 -4
  210. package/dist/providers/ollama.js +7 -8
  211. package/dist/providers/openAI.d.ts +14 -2
  212. package/dist/providers/openAI.js +60 -6
  213. package/dist/providers/openRouter.d.ts +2 -2
  214. package/dist/providers/openRouter.js +10 -6
  215. package/dist/rag/ChunkerFactory.d.ts +91 -0
  216. package/dist/rag/ChunkerFactory.js +320 -0
  217. package/dist/rag/ChunkerRegistry.d.ts +91 -0
  218. package/dist/rag/ChunkerRegistry.js +421 -0
  219. package/dist/rag/chunkers/BaseChunker.d.ts +53 -0
  220. package/dist/rag/chunkers/BaseChunker.js +143 -0
  221. package/dist/rag/chunkers/CharacterChunker.d.ts +18 -0
  222. package/dist/rag/chunkers/CharacterChunker.js +28 -0
  223. package/dist/rag/chunkers/HTMLChunker.d.ts +19 -0
  224. package/dist/rag/chunkers/HTMLChunker.js +38 -0
  225. package/dist/rag/chunkers/JSONChunker.d.ts +19 -0
  226. package/dist/rag/chunkers/JSONChunker.js +68 -0
  227. package/dist/rag/chunkers/LaTeXChunker.d.ts +15 -0
  228. package/dist/rag/chunkers/LaTeXChunker.js +63 -0
  229. package/dist/rag/chunkers/MarkdownChunker.d.ts +15 -0
  230. package/dist/rag/chunkers/MarkdownChunker.js +102 -0
  231. package/dist/rag/chunkers/RecursiveChunker.d.ts +27 -0
  232. package/dist/rag/chunkers/RecursiveChunker.js +139 -0
  233. package/dist/rag/chunkers/SemanticMarkdownChunker.d.ts +22 -0
  234. package/dist/rag/chunkers/SemanticMarkdownChunker.js +138 -0
  235. package/dist/rag/chunkers/SentenceChunker.d.ts +19 -0
  236. package/dist/rag/chunkers/SentenceChunker.js +66 -0
  237. package/dist/rag/chunkers/TokenChunker.d.ts +19 -0
  238. package/dist/rag/chunkers/TokenChunker.js +61 -0
  239. package/dist/rag/chunkers/index.d.ts +15 -0
  240. package/dist/rag/chunkers/index.js +15 -0
  241. package/dist/rag/chunking/characterChunker.d.ts +16 -0
  242. package/dist/rag/chunking/characterChunker.js +142 -0
  243. package/dist/rag/chunking/chunkerRegistry.d.ts +67 -0
  244. package/dist/rag/chunking/chunkerRegistry.js +194 -0
  245. package/dist/rag/chunking/htmlChunker.d.ts +34 -0
  246. package/dist/rag/chunking/htmlChunker.js +247 -0
  247. package/dist/rag/chunking/index.d.ts +15 -0
  248. package/dist/rag/chunking/index.js +17 -0
  249. package/dist/rag/chunking/jsonChunker.d.ts +20 -0
  250. package/dist/rag/chunking/jsonChunker.js +281 -0
  251. package/dist/rag/chunking/latexChunker.d.ts +26 -0
  252. package/dist/rag/chunking/latexChunker.js +251 -0
  253. package/dist/rag/chunking/markdownChunker.d.ts +19 -0
  254. package/dist/rag/chunking/markdownChunker.js +201 -0
  255. package/dist/rag/chunking/recursiveChunker.d.ts +19 -0
  256. package/dist/rag/chunking/recursiveChunker.js +148 -0
  257. package/dist/rag/chunking/semanticChunker.d.ts +41 -0
  258. package/dist/rag/chunking/semanticChunker.js +306 -0
  259. package/dist/rag/chunking/sentenceChunker.d.ts +25 -0
  260. package/dist/rag/chunking/sentenceChunker.js +230 -0
  261. package/dist/rag/chunking/tokenChunker.d.ts +36 -0
  262. package/dist/rag/chunking/tokenChunker.js +183 -0
  263. package/dist/rag/document/MDocument.d.ts +198 -0
  264. package/dist/rag/document/MDocument.js +392 -0
  265. package/dist/rag/document/index.d.ts +5 -0
  266. package/dist/rag/document/index.js +5 -0
  267. package/dist/rag/document/loaders.d.ts +201 -0
  268. package/dist/rag/document/loaders.js +500 -0
  269. package/dist/rag/errors/RAGError.d.ts +244 -0
  270. package/dist/rag/errors/RAGError.js +274 -0
  271. package/dist/rag/errors/index.d.ts +6 -0
  272. package/dist/rag/errors/index.js +6 -0
  273. package/dist/rag/graphRag/graphRAG.d.ts +115 -0
  274. package/dist/rag/graphRag/graphRAG.js +384 -0
  275. package/dist/rag/graphRag/index.d.ts +4 -0
  276. package/dist/rag/graphRag/index.js +4 -0
  277. package/dist/rag/index.d.ts +103 -0
  278. package/dist/rag/index.js +141 -0
  279. package/dist/rag/metadata/MetadataExtractorFactory.d.ts +157 -0
  280. package/dist/rag/metadata/MetadataExtractorFactory.js +418 -0
  281. package/dist/rag/metadata/MetadataExtractorRegistry.d.ts +99 -0
  282. package/dist/rag/metadata/MetadataExtractorRegistry.js +362 -0
  283. package/dist/rag/metadata/index.d.ts +6 -0
  284. package/dist/rag/metadata/index.js +9 -0
  285. package/dist/rag/metadata/metadataExtractor.d.ts +69 -0
  286. package/dist/rag/metadata/metadataExtractor.js +277 -0
  287. package/dist/rag/pipeline/RAGPipeline.d.ts +235 -0
  288. package/dist/rag/pipeline/RAGPipeline.js +401 -0
  289. package/dist/rag/pipeline/contextAssembly.d.ts +126 -0
  290. package/dist/rag/pipeline/contextAssembly.js +337 -0
  291. package/dist/rag/pipeline/index.d.ts +5 -0
  292. package/dist/rag/pipeline/index.js +5 -0
  293. package/dist/rag/ragIntegration.d.ts +38 -0
  294. package/dist/rag/ragIntegration.js +211 -0
  295. package/dist/rag/reranker/RerankerFactory.d.ts +184 -0
  296. package/dist/rag/reranker/RerankerFactory.js +430 -0
  297. package/dist/rag/reranker/RerankerRegistry.d.ts +119 -0
  298. package/dist/rag/reranker/RerankerRegistry.js +402 -0
  299. package/dist/rag/reranker/index.d.ts +6 -0
  300. package/dist/rag/reranker/index.js +9 -0
  301. package/dist/rag/reranker/reranker.d.ts +71 -0
  302. package/dist/rag/reranker/reranker.js +277 -0
  303. package/dist/rag/resilience/CircuitBreaker.d.ts +215 -0
  304. package/dist/rag/resilience/CircuitBreaker.js +431 -0
  305. package/dist/rag/resilience/RetryHandler.d.ts +115 -0
  306. package/dist/rag/resilience/RetryHandler.js +300 -0
  307. package/dist/rag/resilience/index.d.ts +7 -0
  308. package/dist/rag/resilience/index.js +7 -0
  309. package/dist/rag/retrieval/hybridSearch.d.ts +94 -0
  310. package/dist/rag/retrieval/hybridSearch.js +313 -0
  311. package/dist/rag/retrieval/index.d.ts +5 -0
  312. package/dist/rag/retrieval/index.js +5 -0
  313. package/dist/rag/retrieval/vectorQueryTool.d.ts +93 -0
  314. package/dist/rag/retrieval/vectorQueryTool.js +289 -0
  315. package/dist/rag/types.d.ts +768 -0
  316. package/dist/rag/types.js +8 -0
  317. package/dist/server/index.d.ts +15 -11
  318. package/dist/server/index.js +55 -51
  319. package/dist/server/utils/validation.d.ts +2 -2
  320. package/dist/types/common.d.ts +0 -1
  321. package/dist/types/generateTypes.d.ts +42 -8
  322. package/dist/types/generateTypes.js +1 -1
  323. package/dist/types/modelTypes.d.ts +20 -20
  324. package/dist/types/streamTypes.d.ts +28 -8
  325. package/dist/types/streamTypes.js +1 -1
  326. package/dist/utils/modelRouter.d.ts +4 -4
  327. package/dist/utils/modelRouter.js +4 -4
  328. package/package.json +1 -1
@@ -0,0 +1,362 @@
1
+ /**
2
+ * Metadata Extractor Registry
3
+ *
4
+ * Centralized registry for all metadata extractor implementations with metadata
5
+ * and discovery capabilities. Follows the BaseRegistry pattern.
6
+ */
7
+ import { BaseRegistry } from "../../core/infrastructure/index.js";
8
+ import { logger } from "../../utils/logger.js";
9
+ import { MetadataExtractionError, RAGErrorCodes } from "../errors/RAGError.js";
10
+ /**
11
+ * Default metadata extractor metadata entries
12
+ */
13
+ const DEFAULT_EXTRACTOR_METADATA = {
14
+ llm: {
15
+ description: "Full LLM-powered metadata extraction supporting all extraction types",
16
+ defaultConfig: {
17
+ provider: "openai",
18
+ modelName: "gpt-4o-mini",
19
+ temperature: 0.3,
20
+ },
21
+ supportedOptions: [
22
+ "provider",
23
+ "modelName",
24
+ "promptTemplate",
25
+ "maxTokens",
26
+ "temperature",
27
+ ],
28
+ useCases: [
29
+ "Comprehensive metadata extraction",
30
+ "Multi-type extraction in single pass",
31
+ "Custom schema extraction",
32
+ ],
33
+ aliases: ["full", "comprehensive", "all"],
34
+ requiresModel: true,
35
+ extractionTypes: ["title", "summary", "keywords", "questions", "custom"],
36
+ },
37
+ title: {
38
+ description: "Extracts concise, descriptive titles from document content",
39
+ defaultConfig: {
40
+ provider: "openai",
41
+ modelName: "gpt-4o-mini",
42
+ maxTokens: 100,
43
+ },
44
+ supportedOptions: ["provider", "modelName", "promptTemplate", "maxTokens"],
45
+ useCases: [
46
+ "Document indexing",
47
+ "Content organization",
48
+ "Navigation systems",
49
+ ],
50
+ aliases: ["header", "heading"],
51
+ requiresModel: true,
52
+ extractionTypes: ["title"],
53
+ },
54
+ summary: {
55
+ description: "Generates concise summaries of document chunks",
56
+ defaultConfig: {
57
+ provider: "openai",
58
+ modelName: "gpt-4o-mini",
59
+ maxTokens: 200,
60
+ },
61
+ supportedOptions: [
62
+ "provider",
63
+ "modelName",
64
+ "promptTemplate",
65
+ "maxTokens",
66
+ "maxWords",
67
+ ],
68
+ useCases: [
69
+ "Document previews",
70
+ "Search result snippets",
71
+ "Content condensation",
72
+ ],
73
+ aliases: ["summarize", "abstract"],
74
+ requiresModel: true,
75
+ extractionTypes: ["summary"],
76
+ },
77
+ keywords: {
78
+ description: "Extracts key terms and phrases from content",
79
+ defaultConfig: {
80
+ provider: "openai",
81
+ modelName: "gpt-4o-mini",
82
+ maxTokens: 100,
83
+ },
84
+ supportedOptions: [
85
+ "provider",
86
+ "modelName",
87
+ "promptTemplate",
88
+ "maxKeywords",
89
+ ],
90
+ useCases: ["Tag generation", "Topic modeling", "Search optimization"],
91
+ aliases: ["tags", "terms", "keyphrase"],
92
+ requiresModel: true,
93
+ extractionTypes: ["keywords"],
94
+ },
95
+ questions: {
96
+ description: "Generates Q&A pairs from content for training or FAQs",
97
+ defaultConfig: {
98
+ provider: "openai",
99
+ modelName: "gpt-4o-mini",
100
+ maxTokens: 500,
101
+ },
102
+ supportedOptions: [
103
+ "provider",
104
+ "modelName",
105
+ "promptTemplate",
106
+ "numQuestions",
107
+ "includeAnswers",
108
+ ],
109
+ useCases: [
110
+ "FAQ generation",
111
+ "Training data creation",
112
+ "Knowledge base building",
113
+ ],
114
+ aliases: ["qa", "faq", "questions-answers"],
115
+ requiresModel: true,
116
+ extractionTypes: ["questions"],
117
+ },
118
+ custom: {
119
+ description: "Extracts structured data according to custom schema",
120
+ defaultConfig: {
121
+ provider: "openai",
122
+ modelName: "gpt-4o-mini",
123
+ maxTokens: 500,
124
+ },
125
+ supportedOptions: [
126
+ "provider",
127
+ "modelName",
128
+ "promptTemplate",
129
+ "schema",
130
+ "description",
131
+ ],
132
+ useCases: [
133
+ "Structured data extraction",
134
+ "Entity extraction",
135
+ "Custom field extraction",
136
+ ],
137
+ aliases: ["schema", "structured", "entity"],
138
+ requiresModel: true,
139
+ extractionTypes: ["custom"],
140
+ },
141
+ composite: {
142
+ description: "Combines multiple extraction types in a single pass",
143
+ defaultConfig: {
144
+ provider: "openai",
145
+ modelName: "gpt-4o-mini",
146
+ },
147
+ supportedOptions: ["provider", "modelName", "extractors"],
148
+ useCases: [
149
+ "Multi-field extraction",
150
+ "Complete document processing",
151
+ "Pipeline integration",
152
+ ],
153
+ aliases: ["multi", "combined", "batch"],
154
+ requiresModel: true,
155
+ extractionTypes: ["title", "summary", "keywords", "questions", "custom"],
156
+ },
157
+ };
158
+ /**
159
+ * Metadata Extractor Registry
160
+ *
161
+ * Manages registration and discovery of all metadata extractor implementations.
162
+ * Extends BaseRegistry for consistent lifecycle management.
163
+ */
164
+ export class MetadataExtractorRegistry extends BaseRegistry {
165
+ static instance = null;
166
+ aliasMap = new Map();
167
+ constructor() {
168
+ super();
169
+ }
170
+ /**
171
+ * Get singleton instance
172
+ */
173
+ static getInstance() {
174
+ if (!MetadataExtractorRegistry.instance) {
175
+ MetadataExtractorRegistry.instance = new MetadataExtractorRegistry();
176
+ }
177
+ return MetadataExtractorRegistry.instance;
178
+ }
179
+ /**
180
+ * Reset singleton (for testing)
181
+ */
182
+ static resetInstance() {
183
+ if (MetadataExtractorRegistry.instance) {
184
+ MetadataExtractorRegistry.instance.clear();
185
+ MetadataExtractorRegistry.instance = null;
186
+ }
187
+ }
188
+ /**
189
+ * Register all built-in extractors
190
+ */
191
+ async registerAll() {
192
+ const { LLMMetadataExtractor } = await import("./metadataExtractor.js");
193
+ // Register all extractor types
194
+ for (const [type, metadata] of Object.entries(DEFAULT_EXTRACTOR_METADATA)) {
195
+ this.registerExtractor(type, async () => this.createExtractorInstance(LLMMetadataExtractor, type), metadata);
196
+ }
197
+ logger.debug(`[MetadataExtractorRegistry] Registered ${this.items.size} extractor types`);
198
+ }
199
+ /**
200
+ * Create extractor instance wrapper
201
+ */
202
+ createExtractorInstance(ExtractorClass, type) {
203
+ const extractor = new ExtractorClass();
204
+ return {
205
+ type,
206
+ async extract(chunks, params) {
207
+ return extractor.extract(chunks, params ?? {});
208
+ },
209
+ };
210
+ }
211
+ /**
212
+ * Register an extractor with aliases
213
+ */
214
+ registerExtractor(type, factory, metadata) {
215
+ this.register(type, factory, metadata);
216
+ // Register aliases
217
+ for (const alias of metadata.aliases) {
218
+ this.aliasMap.set(alias.toLowerCase(), type);
219
+ logger.debug(`[MetadataExtractorRegistry] Registered alias '${alias}' -> '${type}'`);
220
+ }
221
+ }
222
+ /**
223
+ * Resolve type from alias
224
+ */
225
+ resolveType(nameOrAlias) {
226
+ const lower = nameOrAlias.toLowerCase();
227
+ // Check if it's a direct type
228
+ if (this.items.has(lower)) {
229
+ return lower;
230
+ }
231
+ // Check aliases
232
+ const resolved = this.aliasMap.get(lower);
233
+ if (resolved) {
234
+ return resolved;
235
+ }
236
+ throw new MetadataExtractionError(`Unknown metadata extractor type: '${nameOrAlias}'. Available types: ${this.getAvailableExtractors().join(", ")}`, {
237
+ code: RAGErrorCodes.METADATA_EXTRACTOR_NOT_FOUND,
238
+ extractorType: nameOrAlias,
239
+ details: {
240
+ requestedType: nameOrAlias,
241
+ availableTypes: this.getAvailableExtractors(),
242
+ },
243
+ });
244
+ }
245
+ /**
246
+ * Get an extractor by type or alias
247
+ */
248
+ async getExtractor(typeOrAlias) {
249
+ await this.ensureInitialized();
250
+ const type = this.resolveType(typeOrAlias);
251
+ const extractor = await this.get(type);
252
+ if (!extractor) {
253
+ throw new MetadataExtractionError(`Metadata extractor not found: ${type}`, {
254
+ code: RAGErrorCodes.METADATA_EXTRACTOR_NOT_FOUND,
255
+ extractorType: type,
256
+ details: { type },
257
+ });
258
+ }
259
+ return extractor;
260
+ }
261
+ /**
262
+ * Get list of available extractor types
263
+ */
264
+ getAvailableExtractors() {
265
+ return this.list().map((item) => item.id);
266
+ }
267
+ /**
268
+ * Get metadata for a specific extractor
269
+ */
270
+ getExtractorMetadata(typeOrAlias) {
271
+ const type = this.resolveType(typeOrAlias);
272
+ const entry = this.list().find((item) => item.id === type);
273
+ return entry?.metadata;
274
+ }
275
+ /**
276
+ * Get all aliases for a type
277
+ */
278
+ getAliasesForType(type) {
279
+ const metadata = DEFAULT_EXTRACTOR_METADATA[type];
280
+ return metadata?.aliases ?? [];
281
+ }
282
+ /**
283
+ * Get all registered aliases
284
+ */
285
+ getAllAliases() {
286
+ return new Map(this.aliasMap);
287
+ }
288
+ /**
289
+ * Check if a type or alias exists
290
+ */
291
+ hasExtractor(typeOrAlias) {
292
+ try {
293
+ this.resolveType(typeOrAlias);
294
+ return true;
295
+ }
296
+ catch {
297
+ return false;
298
+ }
299
+ }
300
+ /**
301
+ * Get extractors by use case
302
+ */
303
+ getExtractorsByUseCase(useCase) {
304
+ const matches = [];
305
+ const useCaseLower = useCase.toLowerCase();
306
+ for (const [type, metadata] of Object.entries(DEFAULT_EXTRACTOR_METADATA)) {
307
+ const hasMatchingUseCase = metadata.useCases.some((uc) => uc.toLowerCase().includes(useCaseLower));
308
+ if (hasMatchingUseCase) {
309
+ matches.push(type);
310
+ }
311
+ }
312
+ return matches;
313
+ }
314
+ /**
315
+ * Get extractors that can produce a specific extraction type
316
+ */
317
+ getExtractorsByExtractionType(extractionType) {
318
+ const matches = [];
319
+ for (const [type, metadata] of Object.entries(DEFAULT_EXTRACTOR_METADATA)) {
320
+ if (metadata.extractionTypes.includes(extractionType)) {
321
+ matches.push(type);
322
+ }
323
+ }
324
+ return matches;
325
+ }
326
+ /**
327
+ * Get default configuration for an extractor
328
+ */
329
+ getDefaultConfig(typeOrAlias) {
330
+ const metadata = this.getExtractorMetadata(typeOrAlias);
331
+ return metadata?.defaultConfig;
332
+ }
333
+ /**
334
+ * Clear the registry (also clears aliases)
335
+ */
336
+ clear() {
337
+ super.clear();
338
+ this.aliasMap.clear();
339
+ }
340
+ }
341
+ /**
342
+ * Global metadata extractor registry singleton
343
+ */
344
+ export const metadataExtractorRegistry = MetadataExtractorRegistry.getInstance();
345
+ /**
346
+ * Convenience function to get available extractors
347
+ */
348
+ export function getAvailableExtractors() {
349
+ return metadataExtractorRegistry.getAvailableExtractors();
350
+ }
351
+ /**
352
+ * Convenience function to get extractor by type
353
+ */
354
+ export async function getExtractor(typeOrAlias) {
355
+ return metadataExtractorRegistry.getExtractor(typeOrAlias);
356
+ }
357
+ /**
358
+ * Convenience function to get extractor metadata
359
+ */
360
+ export function getRegisteredExtractorMetadata(typeOrAlias) {
361
+ return metadataExtractorRegistry.getExtractorMetadata(typeOrAlias);
362
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Metadata Extraction Module Exports
3
+ */
4
+ export { createMetadataExtractor, getAvailableExtractorTypes, getExtractorDefaultConfig, getExtractorMetadata, type MetadataExtractor, type MetadataExtractorConfig, MetadataExtractorFactory, type MetadataExtractorMetadata, type MetadataExtractorType, metadataExtractorFactory, } from "./MetadataExtractorFactory.js";
5
+ export { getAvailableExtractors, getExtractor, getRegisteredExtractorMetadata, MetadataExtractorRegistry, metadataExtractorRegistry, } from "./MetadataExtractorRegistry.js";
6
+ export { extractMetadata, LLMMetadataExtractor } from "./metadataExtractor.js";
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Metadata Extraction Module Exports
3
+ */
4
+ // Factory pattern exports
5
+ export { createMetadataExtractor, getAvailableExtractorTypes, getExtractorDefaultConfig, getExtractorMetadata, MetadataExtractorFactory, metadataExtractorFactory, } from "./MetadataExtractorFactory.js";
6
+ // Registry pattern exports
7
+ export { getAvailableExtractors, getExtractor, getRegisteredExtractorMetadata, MetadataExtractorRegistry, metadataExtractorRegistry, } from "./MetadataExtractorRegistry.js";
8
+ // Core metadata extractor
9
+ export { extractMetadata, LLMMetadataExtractor } from "./metadataExtractor.js";
@@ -0,0 +1,69 @@
1
+ /**
2
+ * LLM-powered Metadata Extractor
3
+ *
4
+ * Extracts structured metadata from document chunks using language models.
5
+ * Supports title, summary, keywords, Q&A pairs, and custom schema extraction.
6
+ */
7
+ import type { Chunk, ExtractParams, ExtractionResult } from "../types.js";
8
+ /**
9
+ * LLM-powered metadata extractor
10
+ * Extracts title, summary, keywords, Q&A pairs, and custom schema data
11
+ */
12
+ export declare class LLMMetadataExtractor {
13
+ private provider;
14
+ private modelName;
15
+ constructor(options?: {
16
+ provider?: string;
17
+ modelName?: string;
18
+ });
19
+ /**
20
+ * Extract metadata from chunks based on configuration
21
+ * @param chunks - Array of chunks to extract metadata from
22
+ * @param params - Extraction parameters
23
+ * @returns Array of extraction results, one per chunk
24
+ */
25
+ extract(chunks: Chunk[], params: ExtractParams): Promise<ExtractionResult[]>;
26
+ /**
27
+ * Group chunks by document ID
28
+ */
29
+ private groupByDocument;
30
+ /**
31
+ * Extract title from document chunks
32
+ */
33
+ private extractTitle;
34
+ /**
35
+ * Extract summary from a chunk
36
+ */
37
+ private extractSummary;
38
+ /**
39
+ * Extract keywords from a chunk
40
+ */
41
+ private extractKeywords;
42
+ /**
43
+ * Extract Q&A pairs from a chunk
44
+ */
45
+ private extractQuestions;
46
+ /**
47
+ * Extract custom schema data from a chunk
48
+ */
49
+ private extractCustom;
50
+ /**
51
+ * Parse Q&A pairs from LLM response
52
+ */
53
+ private parseQAPairs;
54
+ /**
55
+ * Call the LLM with a prompt
56
+ */
57
+ private callLLM;
58
+ }
59
+ /**
60
+ * Convenience function to extract metadata from chunks
61
+ * @param chunks - Chunks to process
62
+ * @param params - Extraction parameters
63
+ * @param options - Extractor options
64
+ * @returns Extraction results
65
+ */
66
+ export declare function extractMetadata(chunks: Chunk[], params: ExtractParams, options?: {
67
+ provider?: string;
68
+ modelName?: string;
69
+ }): Promise<ExtractionResult[]>;