@hazeljs/rag 0.2.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. package/LICENSE +192 -0
  2. package/README.md +504 -0
  3. package/dist/__tests__/graph/community-detector.test.d.ts +2 -0
  4. package/dist/__tests__/graph/community-detector.test.d.ts.map +1 -0
  5. package/dist/__tests__/graph/community-detector.test.js +87 -0
  6. package/dist/__tests__/graph/community-detector.test.js.map +1 -0
  7. package/dist/__tests__/graph/community-summarizer.test.d.ts +2 -0
  8. package/dist/__tests__/graph/community-summarizer.test.d.ts.map +1 -0
  9. package/dist/__tests__/graph/community-summarizer.test.js +131 -0
  10. package/dist/__tests__/graph/community-summarizer.test.js.map +1 -0
  11. package/dist/__tests__/graph/entity-extractor.test.d.ts +2 -0
  12. package/dist/__tests__/graph/entity-extractor.test.d.ts.map +1 -0
  13. package/dist/__tests__/graph/entity-extractor.test.js +129 -0
  14. package/dist/__tests__/graph/entity-extractor.test.js.map +1 -0
  15. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts +2 -0
  16. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts.map +1 -0
  17. package/dist/__tests__/graph/graph-rag-pipeline.test.js +158 -0
  18. package/dist/__tests__/graph/graph-rag-pipeline.test.js.map +1 -0
  19. package/dist/__tests__/graph/knowledge-graph.test.d.ts +2 -0
  20. package/dist/__tests__/graph/knowledge-graph.test.d.ts.map +1 -0
  21. package/dist/__tests__/graph/knowledge-graph.test.js +208 -0
  22. package/dist/__tests__/graph/knowledge-graph.test.js.map +1 -0
  23. package/dist/__tests__/loaders/base.loader.test.d.ts +2 -0
  24. package/dist/__tests__/loaders/base.loader.test.d.ts.map +1 -0
  25. package/dist/__tests__/loaders/base.loader.test.js +114 -0
  26. package/dist/__tests__/loaders/base.loader.test.js.map +1 -0
  27. package/dist/__tests__/loaders/csv-file.loader.test.d.ts +2 -0
  28. package/dist/__tests__/loaders/csv-file.loader.test.d.ts.map +1 -0
  29. package/dist/__tests__/loaders/csv-file.loader.test.js +98 -0
  30. package/dist/__tests__/loaders/csv-file.loader.test.js.map +1 -0
  31. package/dist/__tests__/loaders/directory.loader.test.d.ts +2 -0
  32. package/dist/__tests__/loaders/directory.loader.test.d.ts.map +1 -0
  33. package/dist/__tests__/loaders/directory.loader.test.js +154 -0
  34. package/dist/__tests__/loaders/directory.loader.test.js.map +1 -0
  35. package/dist/__tests__/loaders/html-file.loader.test.d.ts +2 -0
  36. package/dist/__tests__/loaders/html-file.loader.test.d.ts.map +1 -0
  37. package/dist/__tests__/loaders/html-file.loader.test.js +93 -0
  38. package/dist/__tests__/loaders/html-file.loader.test.js.map +1 -0
  39. package/dist/__tests__/loaders/json-file.loader.test.d.ts +2 -0
  40. package/dist/__tests__/loaders/json-file.loader.test.d.ts.map +1 -0
  41. package/dist/__tests__/loaders/json-file.loader.test.js +84 -0
  42. package/dist/__tests__/loaders/json-file.loader.test.js.map +1 -0
  43. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts +2 -0
  44. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts.map +1 -0
  45. package/dist/__tests__/loaders/markdown-file.loader.test.js +83 -0
  46. package/dist/__tests__/loaders/markdown-file.loader.test.js.map +1 -0
  47. package/dist/__tests__/loaders/text-file.loader.test.d.ts +2 -0
  48. package/dist/__tests__/loaders/text-file.loader.test.d.ts.map +1 -0
  49. package/dist/__tests__/loaders/text-file.loader.test.js +50 -0
  50. package/dist/__tests__/loaders/text-file.loader.test.js.map +1 -0
  51. package/dist/__tests__/rag-pipeline.test.d.ts +2 -0
  52. package/dist/__tests__/rag-pipeline.test.d.ts.map +1 -0
  53. package/dist/__tests__/rag-pipeline.test.js +210 -0
  54. package/dist/__tests__/rag-pipeline.test.js.map +1 -0
  55. package/dist/__tests__/retrieval/bm25.test.d.ts +2 -0
  56. package/dist/__tests__/retrieval/bm25.test.d.ts.map +1 -0
  57. package/dist/__tests__/retrieval/bm25.test.js +86 -0
  58. package/dist/__tests__/retrieval/bm25.test.js.map +1 -0
  59. package/dist/__tests__/retrieval/hybrid-search.test.d.ts +2 -0
  60. package/dist/__tests__/retrieval/hybrid-search.test.d.ts.map +1 -0
  61. package/dist/__tests__/retrieval/hybrid-search.test.js +85 -0
  62. package/dist/__tests__/retrieval/hybrid-search.test.js.map +1 -0
  63. package/dist/__tests__/retrieval/multi-query.test.d.ts +2 -0
  64. package/dist/__tests__/retrieval/multi-query.test.d.ts.map +1 -0
  65. package/dist/__tests__/retrieval/multi-query.test.js +90 -0
  66. package/dist/__tests__/retrieval/multi-query.test.js.map +1 -0
  67. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts +2 -0
  68. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts.map +1 -0
  69. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js +97 -0
  70. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js.map +1 -0
  71. package/dist/__tests__/utils/similarity.test.d.ts +2 -0
  72. package/dist/__tests__/utils/similarity.test.d.ts.map +1 -0
  73. package/dist/__tests__/utils/similarity.test.js +47 -0
  74. package/dist/__tests__/utils/similarity.test.js.map +1 -0
  75. package/dist/agentic/agentic-rag.service.d.ts +49 -0
  76. package/dist/agentic/agentic-rag.service.d.ts.map +1 -0
  77. package/dist/agentic/agentic-rag.service.js +149 -0
  78. package/dist/agentic/agentic-rag.service.js.map +1 -0
  79. package/dist/agentic/decorators/active-learning.decorator.d.ts +19 -0
  80. package/dist/agentic/decorators/active-learning.decorator.d.ts.map +1 -0
  81. package/dist/agentic/decorators/active-learning.decorator.js +98 -0
  82. package/dist/agentic/decorators/active-learning.decorator.js.map +1 -0
  83. package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts +17 -0
  84. package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts.map +1 -0
  85. package/dist/agentic/decorators/adaptive-retrieval.decorator.js +103 -0
  86. package/dist/agentic/decorators/adaptive-retrieval.decorator.js.map +1 -0
  87. package/dist/agentic/decorators/cached.decorator.d.ts +18 -0
  88. package/dist/agentic/decorators/cached.decorator.d.ts.map +1 -0
  89. package/dist/agentic/decorators/cached.decorator.js +93 -0
  90. package/dist/agentic/decorators/cached.decorator.js.map +1 -0
  91. package/dist/agentic/decorators/context-aware.decorator.d.ts +16 -0
  92. package/dist/agentic/decorators/context-aware.decorator.d.ts.map +1 -0
  93. package/dist/agentic/decorators/context-aware.decorator.js +169 -0
  94. package/dist/agentic/decorators/context-aware.decorator.js.map +1 -0
  95. package/dist/agentic/decorators/corrective-rag.decorator.d.ts +16 -0
  96. package/dist/agentic/decorators/corrective-rag.decorator.d.ts.map +1 -0
  97. package/dist/agentic/decorators/corrective-rag.decorator.js +142 -0
  98. package/dist/agentic/decorators/corrective-rag.decorator.js.map +1 -0
  99. package/dist/agentic/decorators/hyde.decorator.d.ts +15 -0
  100. package/dist/agentic/decorators/hyde.decorator.d.ts.map +1 -0
  101. package/dist/agentic/decorators/hyde.decorator.js +91 -0
  102. package/dist/agentic/decorators/hyde.decorator.js.map +1 -0
  103. package/dist/agentic/decorators/index.d.ts +16 -0
  104. package/dist/agentic/decorators/index.d.ts.map +1 -0
  105. package/dist/agentic/decorators/index.js +32 -0
  106. package/dist/agentic/decorators/index.js.map +1 -0
  107. package/dist/agentic/decorators/multi-hop.decorator.d.ts +15 -0
  108. package/dist/agentic/decorators/multi-hop.decorator.d.ts.map +1 -0
  109. package/dist/agentic/decorators/multi-hop.decorator.js +109 -0
  110. package/dist/agentic/decorators/multi-hop.decorator.js.map +1 -0
  111. package/dist/agentic/decorators/query-planner.decorator.d.ts +20 -0
  112. package/dist/agentic/decorators/query-planner.decorator.d.ts.map +1 -0
  113. package/dist/agentic/decorators/query-planner.decorator.js +213 -0
  114. package/dist/agentic/decorators/query-planner.decorator.js.map +1 -0
  115. package/dist/agentic/decorators/query-rewriter.decorator.d.ts +16 -0
  116. package/dist/agentic/decorators/query-rewriter.decorator.d.ts.map +1 -0
  117. package/dist/agentic/decorators/query-rewriter.decorator.js +143 -0
  118. package/dist/agentic/decorators/query-rewriter.decorator.js.map +1 -0
  119. package/dist/agentic/decorators/self-reflective.decorator.d.ts +20 -0
  120. package/dist/agentic/decorators/self-reflective.decorator.d.ts.map +1 -0
  121. package/dist/agentic/decorators/self-reflective.decorator.js +189 -0
  122. package/dist/agentic/decorators/self-reflective.decorator.js.map +1 -0
  123. package/dist/agentic/decorators/source-verification.decorator.d.ts +15 -0
  124. package/dist/agentic/decorators/source-verification.decorator.d.ts.map +1 -0
  125. package/dist/agentic/decorators/source-verification.decorator.js +121 -0
  126. package/dist/agentic/decorators/source-verification.decorator.js.map +1 -0
  127. package/dist/agentic/index.d.ts +9 -0
  128. package/dist/agentic/index.d.ts.map +1 -0
  129. package/dist/agentic/index.js +25 -0
  130. package/dist/agentic/index.js.map +1 -0
  131. package/dist/agentic/types.d.ts +210 -0
  132. package/dist/agentic/types.d.ts.map +1 -0
  133. package/dist/agentic/types.js +7 -0
  134. package/dist/agentic/types.js.map +1 -0
  135. package/dist/decorators/embeddable.decorator.d.ts +31 -0
  136. package/dist/decorators/embeddable.decorator.d.ts.map +1 -0
  137. package/dist/decorators/embeddable.decorator.js +44 -0
  138. package/dist/decorators/embeddable.decorator.js.map +1 -0
  139. package/dist/decorators/rag.decorator.d.ts +58 -0
  140. package/dist/decorators/rag.decorator.d.ts.map +1 -0
  141. package/dist/decorators/rag.decorator.js +78 -0
  142. package/dist/decorators/rag.decorator.js.map +1 -0
  143. package/dist/decorators/semantic-search.decorator.d.ts +69 -0
  144. package/dist/decorators/semantic-search.decorator.d.ts.map +1 -0
  145. package/dist/decorators/semantic-search.decorator.js +116 -0
  146. package/dist/decorators/semantic-search.decorator.js.map +1 -0
  147. package/dist/embeddings/cohere-embeddings.d.ts +33 -0
  148. package/dist/embeddings/cohere-embeddings.d.ts.map +1 -0
  149. package/dist/embeddings/cohere-embeddings.js +91 -0
  150. package/dist/embeddings/cohere-embeddings.js.map +1 -0
  151. package/dist/embeddings/openai-embeddings.d.ts +21 -0
  152. package/dist/embeddings/openai-embeddings.d.ts.map +1 -0
  153. package/dist/embeddings/openai-embeddings.js +53 -0
  154. package/dist/embeddings/openai-embeddings.js.map +1 -0
  155. package/dist/graph/community-detector.d.ts +45 -0
  156. package/dist/graph/community-detector.d.ts.map +1 -0
  157. package/dist/graph/community-detector.js +153 -0
  158. package/dist/graph/community-detector.js.map +1 -0
  159. package/dist/graph/community-summarizer.d.ts +41 -0
  160. package/dist/graph/community-summarizer.d.ts.map +1 -0
  161. package/dist/graph/community-summarizer.js +119 -0
  162. package/dist/graph/community-summarizer.js.map +1 -0
  163. package/dist/graph/entity-extractor.d.ts +47 -0
  164. package/dist/graph/entity-extractor.d.ts.map +1 -0
  165. package/dist/graph/entity-extractor.js +224 -0
  166. package/dist/graph/entity-extractor.js.map +1 -0
  167. package/dist/graph/graph-rag-pipeline.d.ts +83 -0
  168. package/dist/graph/graph-rag-pipeline.d.ts.map +1 -0
  169. package/dist/graph/graph-rag-pipeline.js +390 -0
  170. package/dist/graph/graph-rag-pipeline.js.map +1 -0
  171. package/dist/graph/graph.types.d.ts +186 -0
  172. package/dist/graph/graph.types.d.ts.map +1 -0
  173. package/dist/graph/graph.types.js +20 -0
  174. package/dist/graph/graph.types.js.map +1 -0
  175. package/dist/graph/index.d.ts +15 -0
  176. package/dist/graph/index.d.ts.map +1 -0
  177. package/dist/graph/index.js +31 -0
  178. package/dist/graph/index.js.map +1 -0
  179. package/dist/graph/knowledge-graph.d.ts +57 -0
  180. package/dist/graph/knowledge-graph.d.ts.map +1 -0
  181. package/dist/graph/knowledge-graph.js +198 -0
  182. package/dist/graph/knowledge-graph.js.map +1 -0
  183. package/dist/index.d.ts +29 -0
  184. package/dist/index.d.ts.map +1 -0
  185. package/dist/index.js +58 -0
  186. package/dist/index.js.map +1 -0
  187. package/dist/loaders/base.loader.d.ts +108 -0
  188. package/dist/loaders/base.loader.d.ts.map +1 -0
  189. package/dist/loaders/base.loader.js +123 -0
  190. package/dist/loaders/base.loader.js.map +1 -0
  191. package/dist/loaders/csv-file.loader.d.ts +61 -0
  192. package/dist/loaders/csv-file.loader.d.ts.map +1 -0
  193. package/dist/loaders/csv-file.loader.js +162 -0
  194. package/dist/loaders/csv-file.loader.js.map +1 -0
  195. package/dist/loaders/directory.loader.d.ts +67 -0
  196. package/dist/loaders/directory.loader.d.ts.map +1 -0
  197. package/dist/loaders/directory.loader.js +163 -0
  198. package/dist/loaders/directory.loader.js.map +1 -0
  199. package/dist/loaders/docx.loader.d.ts +52 -0
  200. package/dist/loaders/docx.loader.d.ts.map +1 -0
  201. package/dist/loaders/docx.loader.js +110 -0
  202. package/dist/loaders/docx.loader.js.map +1 -0
  203. package/dist/loaders/github.loader.d.ts +114 -0
  204. package/dist/loaders/github.loader.d.ts.map +1 -0
  205. package/dist/loaders/github.loader.js +217 -0
  206. package/dist/loaders/github.loader.js.map +1 -0
  207. package/dist/loaders/html-file.loader.d.ts +55 -0
  208. package/dist/loaders/html-file.loader.d.ts.map +1 -0
  209. package/dist/loaders/html-file.loader.js +170 -0
  210. package/dist/loaders/html-file.loader.js.map +1 -0
  211. package/dist/loaders/index.d.ts +52 -0
  212. package/dist/loaders/index.d.ts.map +1 -0
  213. package/dist/loaders/index.js +61 -0
  214. package/dist/loaders/index.js.map +1 -0
  215. package/dist/loaders/json-file.loader.d.ts +51 -0
  216. package/dist/loaders/json-file.loader.d.ts.map +1 -0
  217. package/dist/loaders/json-file.loader.js +100 -0
  218. package/dist/loaders/json-file.loader.js.map +1 -0
  219. package/dist/loaders/markdown-file.loader.d.ts +61 -0
  220. package/dist/loaders/markdown-file.loader.d.ts.map +1 -0
  221. package/dist/loaders/markdown-file.loader.js +148 -0
  222. package/dist/loaders/markdown-file.loader.js.map +1 -0
  223. package/dist/loaders/pdf.loader.d.ts +64 -0
  224. package/dist/loaders/pdf.loader.d.ts.map +1 -0
  225. package/dist/loaders/pdf.loader.js +163 -0
  226. package/dist/loaders/pdf.loader.js.map +1 -0
  227. package/dist/loaders/text-file.loader.d.ts +39 -0
  228. package/dist/loaders/text-file.loader.d.ts.map +1 -0
  229. package/dist/loaders/text-file.loader.js +69 -0
  230. package/dist/loaders/text-file.loader.js.map +1 -0
  231. package/dist/loaders/web.loader.d.ts +87 -0
  232. package/dist/loaders/web.loader.d.ts.map +1 -0
  233. package/dist/loaders/web.loader.js +194 -0
  234. package/dist/loaders/web.loader.js.map +1 -0
  235. package/dist/loaders/youtube-transcript.loader.d.ts +92 -0
  236. package/dist/loaders/youtube-transcript.loader.d.ts.map +1 -0
  237. package/dist/loaders/youtube-transcript.loader.js +254 -0
  238. package/dist/loaders/youtube-transcript.loader.js.map +1 -0
  239. package/dist/memory/index.d.ts +11 -0
  240. package/dist/memory/index.d.ts.map +1 -0
  241. package/dist/memory/index.js +31 -0
  242. package/dist/memory/index.js.map +1 -0
  243. package/dist/memory/memory-manager.d.ts +96 -0
  244. package/dist/memory/memory-manager.d.ts.map +1 -0
  245. package/dist/memory/memory-manager.js +369 -0
  246. package/dist/memory/memory-manager.js.map +1 -0
  247. package/dist/memory/memory-store.interface.d.ts +73 -0
  248. package/dist/memory/memory-store.interface.d.ts.map +1 -0
  249. package/dist/memory/memory-store.interface.js +6 -0
  250. package/dist/memory/memory-store.interface.js.map +1 -0
  251. package/dist/memory/stores/buffer-memory.d.ts +47 -0
  252. package/dist/memory/stores/buffer-memory.d.ts.map +1 -0
  253. package/dist/memory/stores/buffer-memory.js +280 -0
  254. package/dist/memory/stores/buffer-memory.js.map +1 -0
  255. package/dist/memory/stores/hybrid-memory.d.ts +49 -0
  256. package/dist/memory/stores/hybrid-memory.d.ts.map +1 -0
  257. package/dist/memory/stores/hybrid-memory.js +194 -0
  258. package/dist/memory/stores/hybrid-memory.js.map +1 -0
  259. package/dist/memory/stores/vector-memory.d.ts +48 -0
  260. package/dist/memory/stores/vector-memory.d.ts.map +1 -0
  261. package/dist/memory/stores/vector-memory.js +312 -0
  262. package/dist/memory/stores/vector-memory.js.map +1 -0
  263. package/dist/memory/types.d.ts +119 -0
  264. package/dist/memory/types.d.ts.map +1 -0
  265. package/dist/memory/types.js +18 -0
  266. package/dist/memory/types.js.map +1 -0
  267. package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts +8 -0
  268. package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts.map +1 -0
  269. package/dist/prompts/agentic/adaptive-retrieval.prompt.js +27 -0
  270. package/dist/prompts/agentic/adaptive-retrieval.prompt.js.map +1 -0
  271. package/dist/prompts/agentic/corrective-rag.prompt.d.ts +9 -0
  272. package/dist/prompts/agentic/corrective-rag.prompt.d.ts.map +1 -0
  273. package/dist/prompts/agentic/corrective-rag.prompt.js +23 -0
  274. package/dist/prompts/agentic/corrective-rag.prompt.js.map +1 -0
  275. package/dist/prompts/agentic/hyde.prompt.d.ts +9 -0
  276. package/dist/prompts/agentic/hyde.prompt.d.ts.map +1 -0
  277. package/dist/prompts/agentic/hyde.prompt.js +18 -0
  278. package/dist/prompts/agentic/hyde.prompt.js.map +1 -0
  279. package/dist/prompts/agentic/multi-hop.prompt.d.ts +15 -0
  280. package/dist/prompts/agentic/multi-hop.prompt.d.ts.map +1 -0
  281. package/dist/prompts/agentic/multi-hop.prompt.js +38 -0
  282. package/dist/prompts/agentic/multi-hop.prompt.js.map +1 -0
  283. package/dist/prompts/agentic/query-planner.prompt.d.ts +8 -0
  284. package/dist/prompts/agentic/query-planner.prompt.d.ts.map +1 -0
  285. package/dist/prompts/agentic/query-planner.prompt.js +30 -0
  286. package/dist/prompts/agentic/query-planner.prompt.js.map +1 -0
  287. package/dist/prompts/agentic/query-rewriter.prompt.d.ts +10 -0
  288. package/dist/prompts/agentic/query-rewriter.prompt.d.ts.map +1 -0
  289. package/dist/prompts/agentic/query-rewriter.prompt.js +17 -0
  290. package/dist/prompts/agentic/query-rewriter.prompt.js.map +1 -0
  291. package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts +10 -0
  292. package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts.map +1 -0
  293. package/dist/prompts/agentic/self-reflective-improve.prompt.js +24 -0
  294. package/dist/prompts/agentic/self-reflective-improve.prompt.js.map +1 -0
  295. package/dist/prompts/agentic/self-reflective.prompt.d.ts +9 -0
  296. package/dist/prompts/agentic/self-reflective.prompt.d.ts.map +1 -0
  297. package/dist/prompts/agentic/self-reflective.prompt.js +32 -0
  298. package/dist/prompts/agentic/self-reflective.prompt.js.map +1 -0
  299. package/dist/prompts/community-summary.prompt.d.ts +9 -0
  300. package/dist/prompts/community-summary.prompt.d.ts.map +1 -0
  301. package/dist/prompts/community-summary.prompt.js +30 -0
  302. package/dist/prompts/community-summary.prompt.js.map +1 -0
  303. package/dist/prompts/entity-extraction.prompt.d.ts +10 -0
  304. package/dist/prompts/entity-extraction.prompt.d.ts.map +1 -0
  305. package/dist/prompts/entity-extraction.prompt.js +39 -0
  306. package/dist/prompts/entity-extraction.prompt.js.map +1 -0
  307. package/dist/prompts/graph-search.prompt.d.ts +10 -0
  308. package/dist/prompts/graph-search.prompt.d.ts.map +1 -0
  309. package/dist/prompts/graph-search.prompt.js +23 -0
  310. package/dist/prompts/graph-search.prompt.js.map +1 -0
  311. package/dist/prompts/index.d.ts +13 -0
  312. package/dist/prompts/index.d.ts.map +1 -0
  313. package/dist/prompts/index.js +29 -0
  314. package/dist/prompts/index.js.map +1 -0
  315. package/dist/prompts/rag-answer.prompt.d.ts +9 -0
  316. package/dist/prompts/rag-answer.prompt.d.ts.map +1 -0
  317. package/dist/prompts/rag-answer.prompt.js +20 -0
  318. package/dist/prompts/rag-answer.prompt.js.map +1 -0
  319. package/dist/rag-pipeline-with-memory.d.ts +68 -0
  320. package/dist/rag-pipeline-with-memory.d.ts.map +1 -0
  321. package/dist/rag-pipeline-with-memory.js +186 -0
  322. package/dist/rag-pipeline-with-memory.js.map +1 -0
  323. package/dist/rag-pipeline.d.ts +59 -0
  324. package/dist/rag-pipeline.d.ts.map +1 -0
  325. package/dist/rag-pipeline.js +181 -0
  326. package/dist/rag-pipeline.js.map +1 -0
  327. package/dist/rag.module.d.ts +26 -0
  328. package/dist/rag.module.d.ts.map +1 -0
  329. package/dist/rag.module.js +40 -0
  330. package/dist/rag.module.js.map +1 -0
  331. package/dist/rag.service.d.ts +96 -0
  332. package/dist/rag.service.d.ts.map +1 -0
  333. package/dist/rag.service.js +173 -0
  334. package/dist/rag.service.js.map +1 -0
  335. package/dist/retrieval/bm25.d.ts +57 -0
  336. package/dist/retrieval/bm25.d.ts.map +1 -0
  337. package/dist/retrieval/bm25.js +106 -0
  338. package/dist/retrieval/bm25.js.map +1 -0
  339. package/dist/retrieval/hybrid-search.d.ts +48 -0
  340. package/dist/retrieval/hybrid-search.d.ts.map +1 -0
  341. package/dist/retrieval/hybrid-search.js +123 -0
  342. package/dist/retrieval/hybrid-search.js.map +1 -0
  343. package/dist/retrieval/multi-query.d.ts +38 -0
  344. package/dist/retrieval/multi-query.d.ts.map +1 -0
  345. package/dist/retrieval/multi-query.js +135 -0
  346. package/dist/retrieval/multi-query.js.map +1 -0
  347. package/dist/text-splitters/recursive-text-splitter.d.ts +21 -0
  348. package/dist/text-splitters/recursive-text-splitter.d.ts.map +1 -0
  349. package/dist/text-splitters/recursive-text-splitter.js +95 -0
  350. package/dist/text-splitters/recursive-text-splitter.js.map +1 -0
  351. package/dist/types/index.d.ts +144 -0
  352. package/dist/types/index.d.ts.map +1 -0
  353. package/dist/types/index.js +16 -0
  354. package/dist/types/index.js.map +1 -0
  355. package/dist/utils/similarity.d.ts +16 -0
  356. package/dist/utils/similarity.d.ts.map +1 -0
  357. package/dist/utils/similarity.js +58 -0
  358. package/dist/utils/similarity.js.map +1 -0
  359. package/dist/vector-stores/chroma.store.d.ts +42 -0
  360. package/dist/vector-stores/chroma.store.d.ts.map +1 -0
  361. package/dist/vector-stores/chroma.store.js +242 -0
  362. package/dist/vector-stores/chroma.store.js.map +1 -0
  363. package/dist/vector-stores/memory-vector-store.d.ts +20 -0
  364. package/dist/vector-stores/memory-vector-store.d.ts.map +1 -0
  365. package/dist/vector-stores/memory-vector-store.js +94 -0
  366. package/dist/vector-stores/memory-vector-store.js.map +1 -0
  367. package/dist/vector-stores/pinecone.store.d.ts +34 -0
  368. package/dist/vector-stores/pinecone.store.d.ts.map +1 -0
  369. package/dist/vector-stores/pinecone.store.js +146 -0
  370. package/dist/vector-stores/pinecone.store.js.map +1 -0
  371. package/dist/vector-stores/qdrant.store.d.ts +33 -0
  372. package/dist/vector-stores/qdrant.store.d.ts.map +1 -0
  373. package/dist/vector-stores/qdrant.store.js +174 -0
  374. package/dist/vector-stores/qdrant.store.js.map +1 -0
  375. package/dist/vector-stores/weaviate.store.d.ts +37 -0
  376. package/dist/vector-stores/weaviate.store.d.ts.map +1 -0
  377. package/dist/vector-stores/weaviate.store.js +226 -0
  378. package/dist/vector-stores/weaviate.store.js.map +1 -0
  379. package/package.json +146 -0
@@ -0,0 +1,69 @@
1
+ "use strict";
2
+ /**
3
+ * TextFileLoader
4
+ *
5
+ * Loads one or more plain-text files (.txt, .log, .tsv, etc.) from disk.
6
+ * Each file becomes a single `Document`.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * const loader = new TextFileLoader({ path: './docs/notes.txt' });
11
+ * const docs = await loader.load();
12
+ * // docs[0].content === "...file contents..."
13
+ * // docs[0].metadata.source === "notes.txt"
14
+ * ```
15
+ *
16
+ * Multiple files:
17
+ * ```typescript
18
+ * const loader = new TextFileLoader({ paths: ['./a.txt', './b.txt'] });
19
+ * ```
20
+ */
21
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
22
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
23
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
24
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
25
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
26
+ };
27
+ var __metadata = (this && this.__metadata) || function (k, v) {
28
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
29
+ };
30
+ Object.defineProperty(exports, "__esModule", { value: true });
31
+ exports.TextFileLoader = void 0;
32
+ const promises_1 = require("fs/promises");
33
+ const path_1 = require("path");
34
+ const base_loader_1 = require("./base.loader");
35
+ let TextFileLoader = class TextFileLoader extends base_loader_1.BaseDocumentLoader {
36
+ constructor(options) {
37
+ super();
38
+ if (!options.path && (!options.paths || options.paths.length === 0)) {
39
+ throw new Error('TextFileLoader: provide at least one file path via `path` or `paths`.');
40
+ }
41
+ this.paths = options.paths ?? (options.path ? [options.path] : []);
42
+ this.encoding = options.encoding ?? 'utf-8';
43
+ this.extraMetadata = options.metadata ?? {};
44
+ }
45
+ async load() {
46
+ const results = [];
47
+ for (const filePath of this.paths) {
48
+ const content = await (0, promises_1.readFile)(filePath, { encoding: this.encoding });
49
+ results.push(this.createDocument(content, {
50
+ source: (0, path_1.basename)(filePath),
51
+ filePath,
52
+ loaderType: 'text',
53
+ ...this.extraMetadata,
54
+ }));
55
+ }
56
+ return results;
57
+ }
58
+ };
59
+ exports.TextFileLoader = TextFileLoader;
60
+ exports.TextFileLoader = TextFileLoader = __decorate([
61
+ (0, base_loader_1.Loader)({
62
+ name: 'TextFileLoader',
63
+ description: 'Loads plain-text files from the local filesystem.',
64
+ extensions: ['.txt', '.text', '.log', '.tsv', '.nfo'],
65
+ mimeTypes: ['text/plain'],
66
+ }),
67
+ __metadata("design:paramtypes", [Object])
68
+ ], TextFileLoader);
69
+ //# sourceMappingURL=text-file.loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text-file.loader.js","sourceRoot":"","sources":["../../src/loaders/text-file.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;GAkBG;;;;;;;;;;;;AAEH,0CAAuC;AACvC,+BAAgC;AAChC,+CAA2D;AAoBpD,IAAM,cAAc,GAApB,MAAM,cAAe,SAAQ,gCAAkB;IAKpD,YAAY,OAA8B;QACxC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;YACpE,MAAM,IAAI,KAAK,CAAC,uEAAuE,CAAC,CAAC;QAC3F,CAAC;QACD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACnE,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC;QAC5C,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAClC,MAAM,OAAO,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;YACtE,OAAO,CAAC,IAAI,CACV,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE;gBAC3B,MAAM,EAAE,IAAA,eAAQ,EAAC,QAAQ,CAAC;gBAC1B,QAAQ;gBACR,UAAU,EAAE,MAAM;gBAClB,GAAG,IAAI,CAAC,aAAa;aACtB,CAAC,CACH,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF,CAAA;AAhCY,wCAAc;yBAAd,cAAc;IAN1B,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,mDAAmD;QAChE,UAAU,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;QACrD,SAAS,EAAE,CAAC,YAAY,CAAC;KAC1B,CAAC;;GACW,cAAc,CAgC1B"}
@@ -0,0 +1,87 @@
1
+ /**
2
+ * WebLoader
3
+ *
4
+ * Scrapes one or more URLs and converts the page content to plain text
5
+ * (or optionally HTML) suitable for RAG indexing.
6
+ *
7
+ * Two operation modes:
8
+ * - **built-in** (default) — uses Node.js `fetch` + a lightweight HTML
9
+ * stripper. No extra dependencies.
10
+ * - **cheerio** mode — uses the optional `cheerio` peer dependency for
11
+ * CSS-selector-based content extraction (much more precise).
12
+ *
13
+ * Install cheerio for the selector mode:
14
+ * ```bash
15
+ * npm install cheerio
16
+ * ```
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * // Scrape a single page (no extra deps)
21
+ * const loader = new WebLoader({ url: 'https://hazeljs.com/docs' });
22
+ * const docs = await loader.load();
23
+ *
24
+ * // Scrape multiple URLs
25
+ * const loader = new WebLoader({
26
+ * urls: ['https://example.com/a', 'https://example.com/b'],
27
+ * });
28
+ *
29
+ * // Extract only the <article> element (requires cheerio)
30
+ * const loader = new WebLoader({
31
+ * url: 'https://blog.example.com/post',
32
+ * selector: 'article',
33
+ * });
34
+ * ```
35
+ */
36
+ import { BaseDocumentLoader } from './base.loader';
37
+ import type { Document } from '../types';
38
+ export interface WebLoaderOptions {
39
+ /** Single URL to scrape. */
40
+ url?: string;
41
+ /** Multiple URLs — loaded in parallel (max `concurrency`). */
42
+ urls?: string[];
43
+ /**
44
+ * CSS selector to extract content from.
45
+ * Requires the optional `cheerio` peer dependency.
46
+ * @example 'article', 'main', '.post-content', '#readme'
47
+ */
48
+ selector?: string;
49
+ /**
50
+ * Maximum number of concurrent fetches.
51
+ * @default 3
52
+ */
53
+ concurrency?: number;
54
+ /**
55
+ * Request timeout in milliseconds.
56
+ * @default 15000
57
+ */
58
+ timeout?: number;
59
+ /**
60
+ * Additional headers sent with every request.
61
+ * @example { 'User-Agent': 'HazelJSBot/1.0' }
62
+ */
63
+ headers?: Record<string, string>;
64
+ /**
65
+ * Retry failed requests this many times before giving up.
66
+ * @default 2
67
+ */
68
+ retries?: number;
69
+ /** Extra metadata merged into every document. */
70
+ metadata?: Record<string, unknown>;
71
+ }
72
+ export declare class WebLoader extends BaseDocumentLoader {
73
+ private readonly urls;
74
+ private readonly selector?;
75
+ private readonly concurrency;
76
+ private readonly timeout;
77
+ private readonly headers;
78
+ private readonly retries;
79
+ private readonly extraMetadata;
80
+ constructor(options: WebLoaderOptions);
81
+ load(): Promise<Document[]>;
82
+ private scrape;
83
+ private fetchWithRetry;
84
+ private extractTitle;
85
+ private stripTags;
86
+ }
87
+ //# sourceMappingURL=web.loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"web.loader.d.ts","sourceRoot":"","sources":["../../src/loaders/web.loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AAEH,OAAO,EAAE,kBAAkB,EAAU,MAAM,eAAe,CAAC;AAC3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,gBAAgB;IAC/B,4BAA4B;IAC5B,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,8DAA8D;IAC9D,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,qBAKa,SAAU,SAAQ,kBAAkB;IAC/C,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAW;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAyB;IACjD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA0B;gBAE5C,OAAO,EAAE,gBAAgB;IAiB/B,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YAuBnB,MAAM;YAgDN,cAAc;IA0C5B,OAAO,CAAC,YAAY;IAKpB,OAAO,CAAC,SAAS;CAoBlB"}
@@ -0,0 +1,194 @@
1
+ "use strict";
2
+ /**
3
+ * WebLoader
4
+ *
5
+ * Scrapes one or more URLs and converts the page content to plain text
6
+ * (or optionally HTML) suitable for RAG indexing.
7
+ *
8
+ * Two operation modes:
9
+ * - **built-in** (default) — uses Node.js `fetch` + a lightweight HTML
10
+ * stripper. No extra dependencies.
11
+ * - **cheerio** mode — uses the optional `cheerio` peer dependency for
12
+ * CSS-selector-based content extraction (much more precise).
13
+ *
14
+ * Install cheerio for the selector mode:
15
+ * ```bash
16
+ * npm install cheerio
17
+ * ```
18
+ *
19
+ * @example
20
+ * ```typescript
21
+ * // Scrape a single page (no extra deps)
22
+ * const loader = new WebLoader({ url: 'https://hazeljs.com/docs' });
23
+ * const docs = await loader.load();
24
+ *
25
+ * // Scrape multiple URLs
26
+ * const loader = new WebLoader({
27
+ * urls: ['https://example.com/a', 'https://example.com/b'],
28
+ * });
29
+ *
30
+ * // Extract only the <article> element (requires cheerio)
31
+ * const loader = new WebLoader({
32
+ * url: 'https://blog.example.com/post',
33
+ * selector: 'article',
34
+ * });
35
+ * ```
36
+ */
37
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
38
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
39
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
40
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
41
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
42
+ };
43
+ var __metadata = (this && this.__metadata) || function (k, v) {
44
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
45
+ };
46
+ Object.defineProperty(exports, "__esModule", { value: true });
47
+ exports.WebLoader = void 0;
48
+ const base_loader_1 = require("./base.loader");
49
+ let WebLoader = class WebLoader extends base_loader_1.BaseDocumentLoader {
50
+ constructor(options) {
51
+ super();
52
+ if (!options.url && (!options.urls || options.urls.length === 0)) {
53
+ throw new Error('WebLoader: provide at least one URL via `url` or `urls`.');
54
+ }
55
+ this.urls = options.urls ?? (options.url ? [options.url] : []);
56
+ this.selector = options.selector;
57
+ this.concurrency = options.concurrency ?? 3;
58
+ this.timeout = options.timeout ?? 15000;
59
+ this.headers = {
60
+ 'User-Agent': 'Mozilla/5.0 (compatible; HazelJSBot/1.0)',
61
+ ...options.headers,
62
+ };
63
+ this.retries = options.retries ?? 2;
64
+ this.extraMetadata = options.metadata ?? {};
65
+ }
66
+ async load() {
67
+ const allDocs = [];
68
+ // Process in chunks of `concurrency`
69
+ for (let i = 0; i < this.urls.length; i += this.concurrency) {
70
+ const batch = this.urls.slice(i, i + this.concurrency);
71
+ const results = await Promise.allSettled(batch.map((url) => this.scrape(url)));
72
+ for (const result of results) {
73
+ if (result.status === 'fulfilled' && result.value) {
74
+ allDocs.push(result.value);
75
+ }
76
+ else if (result.status === 'rejected') {
77
+ // eslint-disable-next-line no-console
78
+ console.warn('[WebLoader] Failed to scrape URL:', result.reason);
79
+ }
80
+ }
81
+ }
82
+ return allDocs;
83
+ }
84
+ // ── Private helpers ──────────────────────────────────────────────────────
85
+ async scrape(url) {
86
+ const html = await this.fetchWithRetry(url);
87
+ if (!html)
88
+ return null;
89
+ let text;
90
+ let title = '';
91
+ if (this.selector) {
92
+ try {
93
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
94
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-require-imports
95
+ const cheerio = require('cheerio');
96
+ const $ = cheerio.load(html);
97
+ title = $('title').first().text().trim();
98
+ text = $(this.selector).text().trim();
99
+ if (!text) {
100
+ // eslint-disable-next-line no-console
101
+ console.warn(`[WebLoader] Selector "${this.selector}" found no content at ${url}. Falling back to full page.`);
102
+ text = this.stripTags(html);
103
+ }
104
+ }
105
+ catch {
106
+ // cheerio not installed
107
+ if (this.selector) {
108
+ throw new Error(`[WebLoader] The \`selector\` option requires the optional peer dependency "cheerio". ` +
109
+ `Run: npm install cheerio\n` +
110
+ `Or remove the \`selector\` option to use built-in HTML stripping.`);
111
+ }
112
+ text = this.stripTags(html);
113
+ }
114
+ }
115
+ else {
116
+ title = this.extractTitle(html);
117
+ text = this.stripTags(html);
118
+ }
119
+ return this.createDocument(text, {
120
+ source: url,
121
+ url,
122
+ loaderType: 'web',
123
+ ...(title && { title }),
124
+ scrapedAt: new Date().toISOString(),
125
+ ...this.extraMetadata,
126
+ });
127
+ }
128
+ async fetchWithRetry(url) {
129
+ let lastError;
130
+ for (let attempt = 0; attempt <= this.retries; attempt++) {
131
+ try {
132
+ const controller = new AbortController();
133
+ const timer = setTimeout(() => controller.abort(), this.timeout);
134
+ const response = await fetch(url, {
135
+ headers: this.headers,
136
+ signal: controller.signal,
137
+ });
138
+ clearTimeout(timer);
139
+ if (!response.ok) {
140
+ throw new Error(`HTTP ${response.status} ${response.statusText}`);
141
+ }
142
+ const contentType = response.headers.get('content-type') ?? '';
143
+ if (!contentType.includes('text/')) {
144
+ // eslint-disable-next-line no-console
145
+ console.warn(`[WebLoader] Skipping non-text content at ${url}: ${contentType}`);
146
+ return null;
147
+ }
148
+ return await response.text();
149
+ }
150
+ catch (err) {
151
+ lastError = err;
152
+ if (attempt < this.retries) {
153
+ // Exponential backoff: 500ms, 1000ms, 2000ms
154
+ await new Promise((r) => setTimeout(r, 500 * Math.pow(2, attempt)));
155
+ }
156
+ }
157
+ }
158
+ throw new Error(`[WebLoader] Failed to fetch ${url} after ${this.retries + 1} attempts: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
159
+ }
160
+ extractTitle(html) {
161
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
162
+ return match ? match[1].trim() : '';
163
+ }
164
+ stripTags(html) {
165
+ return html
166
+ .replace(/<script[\s\S]*?<\/script>/gi, ' ')
167
+ .replace(/<style[\s\S]*?<\/style>/gi, ' ')
168
+ .replace(/<nav[\s\S]*?<\/nav>/gi, ' ')
169
+ .replace(/<header[\s\S]*?<\/header>/gi, ' ')
170
+ .replace(/<footer[\s\S]*?<\/footer>/gi, ' ')
171
+ .replace(/<!--[\s\S]*?-->/g, ' ')
172
+ .replace(/<\/(p|div|h[1-6]|li|tr|br)[^>]*>/gi, '\n')
173
+ .replace(/<[^>]+>/g, ' ')
174
+ .replace(/&amp;/g, '&')
175
+ .replace(/&lt;/g, '<')
176
+ .replace(/&gt;/g, '>')
177
+ .replace(/&quot;/g, '"')
178
+ .replace(/&nbsp;/g, ' ')
179
+ .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
180
+ .replace(/[ \t]+/g, ' ')
181
+ .replace(/\n{3,}/g, '\n\n')
182
+ .trim();
183
+ }
184
+ };
185
+ exports.WebLoader = WebLoader;
186
+ exports.WebLoader = WebLoader = __decorate([
187
+ (0, base_loader_1.Loader)({
188
+ name: 'WebLoader',
189
+ description: 'Scrapes web pages and converts them to plain text documents.',
190
+ mimeTypes: ['text/html'],
191
+ }),
192
+ __metadata("design:paramtypes", [Object])
193
+ ], WebLoader);
194
+ //# sourceMappingURL=web.loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"web.loader.js","sourceRoot":"","sources":["../../src/loaders/web.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;;;;;;;;;;;;AAEH,+CAA2D;AA2CpD,IAAM,SAAS,GAAf,MAAM,SAAU,SAAQ,gCAAkB;IAS/C,YAAY,OAAyB;QACnC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;YACjE,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;QAC9E,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC/D,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;QAC5C,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,KAAM,CAAC;QACzC,IAAI,CAAC,OAAO,GAAG;YACb,YAAY,EAAE,0CAA0C;YACxD,GAAG,OAAO,CAAC,OAAO;SACnB,CAAC;QACF,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC;QACpC,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,qCAAqC;QACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC;YACvD,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAE/E,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAClD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC7B,CAAC;qBAAM,IAAI,MAAM,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;oBACxC,sCAAsC;oBACtC,OAAO,CAAC,IAAI,CAAC,mCAAmC,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;gBACnE,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,4EAA4E;IAEpE,KAAK,CAAC,MAAM,CAAC,GAAW;QAC9B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAC5C,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,IAAI,IAAY,CAAC;QACjB,IAAI,KAAK,GAAG,EAAE,CAAC;QAEf,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC;gBACH,iEAAiE;gBACjE,qGAAqG;gBACrG,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,CAAQ,CAAC;gBAC1C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC7B,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACzC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACtC,IAAI,CAAC,IAAI,EAAE,CAAC;oBACV,sCAAsC;oBACtC,OAAO,CAAC,IAAI,CACV,yBAAyB,IAAI,CAAC,QAAQ,yBAAyB,GAAG,8BAA8B,CACjG,CAAC;oBACF,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;gBACxB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAClB,MAAM,IAAI,KAAK,CACb,uFAAuF;wBACrF,4BAA4B;wBAC5B,mEAAmE,CACtE,CAAC;gBACJ,CAAC;gBACD,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAChC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC;QAED,OAAO,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE;YAC/B,MAAM,EAAE,GAAG;YACX,GAAG;YACH,UAAU,EAAE,KAAK;YACjB,GAAG,CAAC,KAAK,IAAI,EAAE,KAAK,EAAE,CAAC;YACvB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,GAAG,IAAI,CAAC,aAAa;SACtB,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,cAAc,CAAC,GAAW;QACtC,IAAI,SAAkB,CAAC;QAEvB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;YACzD,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;gBACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;gBAEjE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;oBAChC,OAAO,EAAE,IAAI,CAAC,OAAO;oBACrB,MAAM,EAAE,UAAU,CAAC,MAAM;iBAC1B,CAAC,CAAC;gBACH,YAAY,CAAC,KAAK,CAAC,CAAC;gBAEpB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;oBACjB,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;gBACpE,CAAC;gBAED,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;gBAC/D,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;oBACnC,sCAAsC;oBACtC,OAAO,CAAC,IAAI,CAAC,4CAA4C,GAAG,KAAK,WAAW,EAAE,CAAC,CAAC;oBAChF,OAAO,IAAI,CAAC;gBACd,CAAC;gBAED,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC/B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,SAAS,GAAG,GAAG,CAAC;gBAChB,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;oBAC3B,6CAA6C;oBAC7C,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;gBACtE,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,IAAI,KAAK,CACb,+BAA+B,GAAG,UAAU,IAAI,CAAC,OAAO,GAAG,CAAC,cAC1D,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CACnE,EAAE,CACH,CAAC;IACJ,CAAC;IAEO,YAAY,CAAC,IAAY;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QAC7D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACtC,CAAC;IAEO,SAAS,CAAC,IAAY;QAC5B,OAAO,IAAI;aACR,OAAO,CAAC,6BAA6B,EAAE,GAAG,CAAC;aAC3C,OAAO,CAAC,2BAA2B,EAAE,GAAG,CAAC;aACzC,OAAO,CAAC,uBAAuB,EAAE,GAAG,CAAC;aACrC,OAAO,CAAC,6BAA6B,EAAE,GAAG,CAAC;aAC3C,OAAO,CAAC,6BAA6B,EAAE,GAAG,CAAC;aAC3C,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC;aAChC,OAAO,CAAC,oCAAoC,EAAE,IAAI,CAAC;aACnD,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;aACxB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC;aAC1E,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;aAC1B,IAAI,EAAE,CAAC;IACZ,CAAC;CACF,CAAA;AApKY,8BAAS;oBAAT,SAAS;IALrB,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,WAAW;QACjB,WAAW,EAAE,8DAA8D;QAC3E,SAAS,EAAE,CAAC,WAAW,CAAC;KACzB,CAAC;;GACW,SAAS,CAoKrB"}
@@ -0,0 +1,92 @@
1
+ /**
2
+ * YouTubeTranscriptLoader
3
+ *
4
+ * Downloads the caption/transcript for one or more YouTube videos and
5
+ * converts them to `Document` objects — no extra npm dependency required.
6
+ * Uses Node.js built-in `fetch` and parses the YouTube InnerTube API response.
7
+ *
8
+ * How it works:
9
+ * 1. Fetches the YouTube watch page for the video.
10
+ * 2. Extracts the `ytInitialPlayerResponse` JSON blob from the page HTML.
11
+ * 3. Locates the caption tracks list and selects the preferred language.
12
+ * 4. Fetches the caption XML file.
13
+ * 5. Parses `<text>` elements and decodes HTML entities.
14
+ * 6. Returns the transcript as one document (or split into segments).
15
+ *
16
+ * Accepts video IDs, full watch URLs, and youtu.be short URLs.
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * const loader = new YouTubeTranscriptLoader({
21
+ * videoId: 'dQw4w9WgXcQ',
22
+ * });
23
+ * const docs = await loader.load();
24
+ * // docs[0].content === "Never gonna give you up, never gonna let you..."
25
+ * // docs[0].metadata.title === "Rick Astley - Never Gonna Give You Up"
26
+ * // docs[0].metadata.videoId === "dQw4w9WgXcQ"
27
+ * ```
28
+ *
29
+ * Multiple videos:
30
+ * ```typescript
31
+ * const loader = new YouTubeTranscriptLoader({
32
+ * videoIds: ['VIDEO_ID_1', 'VIDEO_ID_2'],
33
+ * });
34
+ * ```
35
+ *
36
+ * Split into timed segments (useful for timestamped RAG retrieval):
37
+ * ```typescript
38
+ * const loader = new YouTubeTranscriptLoader({
39
+ * videoId: 'VIDEO_ID',
40
+ * segmentDuration: 120, // one document per 2-minute window
41
+ * });
42
+ * ```
43
+ */
44
+ import { BaseDocumentLoader } from './base.loader';
45
+ import type { Document } from '../types';
46
+ export interface YouTubeTranscriptLoaderOptions {
47
+ /** Single YouTube video ID or URL. */
48
+ videoId?: string;
49
+ /** Multiple YouTube video IDs or URLs. */
50
+ videoIds?: string[];
51
+ /**
52
+ * Preferred caption language code.
53
+ * Falls back to the first available track if not found.
54
+ * @default 'en'
55
+ */
56
+ language?: string;
57
+ /**
58
+ * If set, the transcript is split into segments of approximately this
59
+ * many seconds. Each segment becomes a separate `Document` with
60
+ * `metadata.startTime` and `metadata.endTime`.
61
+ * @default undefined (single document per video)
62
+ */
63
+ segmentDuration?: number;
64
+ /**
65
+ * Request timeout in milliseconds.
66
+ * @default 15000
67
+ */
68
+ timeout?: number;
69
+ /** Extra metadata merged into every document. */
70
+ metadata?: Record<string, unknown>;
71
+ }
72
+ export declare class YouTubeTranscriptLoader extends BaseDocumentLoader {
73
+ private readonly videoIds;
74
+ private readonly language;
75
+ private readonly segmentDuration?;
76
+ private readonly timeout;
77
+ private readonly extraMetadata;
78
+ private static readonly YOUTUBE_BASE;
79
+ private static readonly RE_PLAYER_RESPONSE;
80
+ private static readonly RE_CAPTION_TEXT;
81
+ constructor(options: YouTubeTranscriptLoaderOptions);
82
+ load(): Promise<Document[]>;
83
+ private loadVideo;
84
+ private extractCaptionInfo;
85
+ private parseCaptionXml;
86
+ private buildSegments;
87
+ private normaliseId;
88
+ private fetch;
89
+ private decodeEntities;
90
+ private formatTime;
91
+ }
92
+ //# sourceMappingURL=youtube-transcript.loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"youtube-transcript.loader.d.ts","sourceRoot":"","sources":["../../src/loaders/youtube-transcript.loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AAEH,OAAO,EAAE,kBAAkB,EAAU,MAAM,eAAe,CAAC;AAC3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,8BAA8B;IAC7C,sCAAsC;IACtC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,0CAA0C;IAC1C,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAQD,qBAIa,uBAAwB,SAAQ,kBAAkB;IAC7D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAW;IACpC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAS;IAC1C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA0B;IAExD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,CAA6B;IACjE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAA+C;IACzF,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CACkC;gBAE7D,OAAO,EAAE,8BAA8B;IAa7C,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YAmBnB,SAAS;IA+BvB,OAAO,CAAC,kBAAkB;IAqD1B,OAAO,CAAC,eAAe;IAgBvB,OAAO,CAAC,aAAa;IAqDrB,OAAO,CAAC,WAAW;YAYL,KAAK;IAsBnB,OAAO,CAAC,cAAc;IAUtB,OAAO,CAAC,UAAU;CAMnB"}