@hazeljs/rag 0.2.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. package/LICENSE +192 -0
  2. package/README.md +504 -0
  3. package/dist/__tests__/graph/community-detector.test.d.ts +2 -0
  4. package/dist/__tests__/graph/community-detector.test.d.ts.map +1 -0
  5. package/dist/__tests__/graph/community-detector.test.js +87 -0
  6. package/dist/__tests__/graph/community-detector.test.js.map +1 -0
  7. package/dist/__tests__/graph/community-summarizer.test.d.ts +2 -0
  8. package/dist/__tests__/graph/community-summarizer.test.d.ts.map +1 -0
  9. package/dist/__tests__/graph/community-summarizer.test.js +131 -0
  10. package/dist/__tests__/graph/community-summarizer.test.js.map +1 -0
  11. package/dist/__tests__/graph/entity-extractor.test.d.ts +2 -0
  12. package/dist/__tests__/graph/entity-extractor.test.d.ts.map +1 -0
  13. package/dist/__tests__/graph/entity-extractor.test.js +129 -0
  14. package/dist/__tests__/graph/entity-extractor.test.js.map +1 -0
  15. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts +2 -0
  16. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts.map +1 -0
  17. package/dist/__tests__/graph/graph-rag-pipeline.test.js +158 -0
  18. package/dist/__tests__/graph/graph-rag-pipeline.test.js.map +1 -0
  19. package/dist/__tests__/graph/knowledge-graph.test.d.ts +2 -0
  20. package/dist/__tests__/graph/knowledge-graph.test.d.ts.map +1 -0
  21. package/dist/__tests__/graph/knowledge-graph.test.js +208 -0
  22. package/dist/__tests__/graph/knowledge-graph.test.js.map +1 -0
  23. package/dist/__tests__/loaders/base.loader.test.d.ts +2 -0
  24. package/dist/__tests__/loaders/base.loader.test.d.ts.map +1 -0
  25. package/dist/__tests__/loaders/base.loader.test.js +114 -0
  26. package/dist/__tests__/loaders/base.loader.test.js.map +1 -0
  27. package/dist/__tests__/loaders/csv-file.loader.test.d.ts +2 -0
  28. package/dist/__tests__/loaders/csv-file.loader.test.d.ts.map +1 -0
  29. package/dist/__tests__/loaders/csv-file.loader.test.js +98 -0
  30. package/dist/__tests__/loaders/csv-file.loader.test.js.map +1 -0
  31. package/dist/__tests__/loaders/directory.loader.test.d.ts +2 -0
  32. package/dist/__tests__/loaders/directory.loader.test.d.ts.map +1 -0
  33. package/dist/__tests__/loaders/directory.loader.test.js +154 -0
  34. package/dist/__tests__/loaders/directory.loader.test.js.map +1 -0
  35. package/dist/__tests__/loaders/html-file.loader.test.d.ts +2 -0
  36. package/dist/__tests__/loaders/html-file.loader.test.d.ts.map +1 -0
  37. package/dist/__tests__/loaders/html-file.loader.test.js +93 -0
  38. package/dist/__tests__/loaders/html-file.loader.test.js.map +1 -0
  39. package/dist/__tests__/loaders/json-file.loader.test.d.ts +2 -0
  40. package/dist/__tests__/loaders/json-file.loader.test.d.ts.map +1 -0
  41. package/dist/__tests__/loaders/json-file.loader.test.js +84 -0
  42. package/dist/__tests__/loaders/json-file.loader.test.js.map +1 -0
  43. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts +2 -0
  44. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts.map +1 -0
  45. package/dist/__tests__/loaders/markdown-file.loader.test.js +83 -0
  46. package/dist/__tests__/loaders/markdown-file.loader.test.js.map +1 -0
  47. package/dist/__tests__/loaders/text-file.loader.test.d.ts +2 -0
  48. package/dist/__tests__/loaders/text-file.loader.test.d.ts.map +1 -0
  49. package/dist/__tests__/loaders/text-file.loader.test.js +50 -0
  50. package/dist/__tests__/loaders/text-file.loader.test.js.map +1 -0
  51. package/dist/__tests__/rag-pipeline.test.d.ts +2 -0
  52. package/dist/__tests__/rag-pipeline.test.d.ts.map +1 -0
  53. package/dist/__tests__/rag-pipeline.test.js +210 -0
  54. package/dist/__tests__/rag-pipeline.test.js.map +1 -0
  55. package/dist/__tests__/retrieval/bm25.test.d.ts +2 -0
  56. package/dist/__tests__/retrieval/bm25.test.d.ts.map +1 -0
  57. package/dist/__tests__/retrieval/bm25.test.js +86 -0
  58. package/dist/__tests__/retrieval/bm25.test.js.map +1 -0
  59. package/dist/__tests__/retrieval/hybrid-search.test.d.ts +2 -0
  60. package/dist/__tests__/retrieval/hybrid-search.test.d.ts.map +1 -0
  61. package/dist/__tests__/retrieval/hybrid-search.test.js +85 -0
  62. package/dist/__tests__/retrieval/hybrid-search.test.js.map +1 -0
  63. package/dist/__tests__/retrieval/multi-query.test.d.ts +2 -0
  64. package/dist/__tests__/retrieval/multi-query.test.d.ts.map +1 -0
  65. package/dist/__tests__/retrieval/multi-query.test.js +90 -0
  66. package/dist/__tests__/retrieval/multi-query.test.js.map +1 -0
  67. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts +2 -0
  68. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts.map +1 -0
  69. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js +97 -0
  70. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js.map +1 -0
  71. package/dist/__tests__/utils/similarity.test.d.ts +2 -0
  72. package/dist/__tests__/utils/similarity.test.d.ts.map +1 -0
  73. package/dist/__tests__/utils/similarity.test.js +47 -0
  74. package/dist/__tests__/utils/similarity.test.js.map +1 -0
  75. package/dist/agentic/agentic-rag.service.d.ts +49 -0
  76. package/dist/agentic/agentic-rag.service.d.ts.map +1 -0
  77. package/dist/agentic/agentic-rag.service.js +149 -0
  78. package/dist/agentic/agentic-rag.service.js.map +1 -0
  79. package/dist/agentic/decorators/active-learning.decorator.d.ts +19 -0
  80. package/dist/agentic/decorators/active-learning.decorator.d.ts.map +1 -0
  81. package/dist/agentic/decorators/active-learning.decorator.js +98 -0
  82. package/dist/agentic/decorators/active-learning.decorator.js.map +1 -0
  83. package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts +17 -0
  84. package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts.map +1 -0
  85. package/dist/agentic/decorators/adaptive-retrieval.decorator.js +103 -0
  86. package/dist/agentic/decorators/adaptive-retrieval.decorator.js.map +1 -0
  87. package/dist/agentic/decorators/cached.decorator.d.ts +18 -0
  88. package/dist/agentic/decorators/cached.decorator.d.ts.map +1 -0
  89. package/dist/agentic/decorators/cached.decorator.js +93 -0
  90. package/dist/agentic/decorators/cached.decorator.js.map +1 -0
  91. package/dist/agentic/decorators/context-aware.decorator.d.ts +16 -0
  92. package/dist/agentic/decorators/context-aware.decorator.d.ts.map +1 -0
  93. package/dist/agentic/decorators/context-aware.decorator.js +169 -0
  94. package/dist/agentic/decorators/context-aware.decorator.js.map +1 -0
  95. package/dist/agentic/decorators/corrective-rag.decorator.d.ts +16 -0
  96. package/dist/agentic/decorators/corrective-rag.decorator.d.ts.map +1 -0
  97. package/dist/agentic/decorators/corrective-rag.decorator.js +142 -0
  98. package/dist/agentic/decorators/corrective-rag.decorator.js.map +1 -0
  99. package/dist/agentic/decorators/hyde.decorator.d.ts +15 -0
  100. package/dist/agentic/decorators/hyde.decorator.d.ts.map +1 -0
  101. package/dist/agentic/decorators/hyde.decorator.js +91 -0
  102. package/dist/agentic/decorators/hyde.decorator.js.map +1 -0
  103. package/dist/agentic/decorators/index.d.ts +16 -0
  104. package/dist/agentic/decorators/index.d.ts.map +1 -0
  105. package/dist/agentic/decorators/index.js +32 -0
  106. package/dist/agentic/decorators/index.js.map +1 -0
  107. package/dist/agentic/decorators/multi-hop.decorator.d.ts +15 -0
  108. package/dist/agentic/decorators/multi-hop.decorator.d.ts.map +1 -0
  109. package/dist/agentic/decorators/multi-hop.decorator.js +109 -0
  110. package/dist/agentic/decorators/multi-hop.decorator.js.map +1 -0
  111. package/dist/agentic/decorators/query-planner.decorator.d.ts +20 -0
  112. package/dist/agentic/decorators/query-planner.decorator.d.ts.map +1 -0
  113. package/dist/agentic/decorators/query-planner.decorator.js +213 -0
  114. package/dist/agentic/decorators/query-planner.decorator.js.map +1 -0
  115. package/dist/agentic/decorators/query-rewriter.decorator.d.ts +16 -0
  116. package/dist/agentic/decorators/query-rewriter.decorator.d.ts.map +1 -0
  117. package/dist/agentic/decorators/query-rewriter.decorator.js +143 -0
  118. package/dist/agentic/decorators/query-rewriter.decorator.js.map +1 -0
  119. package/dist/agentic/decorators/self-reflective.decorator.d.ts +20 -0
  120. package/dist/agentic/decorators/self-reflective.decorator.d.ts.map +1 -0
  121. package/dist/agentic/decorators/self-reflective.decorator.js +189 -0
  122. package/dist/agentic/decorators/self-reflective.decorator.js.map +1 -0
  123. package/dist/agentic/decorators/source-verification.decorator.d.ts +15 -0
  124. package/dist/agentic/decorators/source-verification.decorator.d.ts.map +1 -0
  125. package/dist/agentic/decorators/source-verification.decorator.js +121 -0
  126. package/dist/agentic/decorators/source-verification.decorator.js.map +1 -0
  127. package/dist/agentic/index.d.ts +9 -0
  128. package/dist/agentic/index.d.ts.map +1 -0
  129. package/dist/agentic/index.js +25 -0
  130. package/dist/agentic/index.js.map +1 -0
  131. package/dist/agentic/types.d.ts +210 -0
  132. package/dist/agentic/types.d.ts.map +1 -0
  133. package/dist/agentic/types.js +7 -0
  134. package/dist/agentic/types.js.map +1 -0
  135. package/dist/decorators/embeddable.decorator.d.ts +31 -0
  136. package/dist/decorators/embeddable.decorator.d.ts.map +1 -0
  137. package/dist/decorators/embeddable.decorator.js +44 -0
  138. package/dist/decorators/embeddable.decorator.js.map +1 -0
  139. package/dist/decorators/rag.decorator.d.ts +58 -0
  140. package/dist/decorators/rag.decorator.d.ts.map +1 -0
  141. package/dist/decorators/rag.decorator.js +78 -0
  142. package/dist/decorators/rag.decorator.js.map +1 -0
  143. package/dist/decorators/semantic-search.decorator.d.ts +69 -0
  144. package/dist/decorators/semantic-search.decorator.d.ts.map +1 -0
  145. package/dist/decorators/semantic-search.decorator.js +116 -0
  146. package/dist/decorators/semantic-search.decorator.js.map +1 -0
  147. package/dist/embeddings/cohere-embeddings.d.ts +33 -0
  148. package/dist/embeddings/cohere-embeddings.d.ts.map +1 -0
  149. package/dist/embeddings/cohere-embeddings.js +91 -0
  150. package/dist/embeddings/cohere-embeddings.js.map +1 -0
  151. package/dist/embeddings/openai-embeddings.d.ts +21 -0
  152. package/dist/embeddings/openai-embeddings.d.ts.map +1 -0
  153. package/dist/embeddings/openai-embeddings.js +53 -0
  154. package/dist/embeddings/openai-embeddings.js.map +1 -0
  155. package/dist/graph/community-detector.d.ts +45 -0
  156. package/dist/graph/community-detector.d.ts.map +1 -0
  157. package/dist/graph/community-detector.js +153 -0
  158. package/dist/graph/community-detector.js.map +1 -0
  159. package/dist/graph/community-summarizer.d.ts +41 -0
  160. package/dist/graph/community-summarizer.d.ts.map +1 -0
  161. package/dist/graph/community-summarizer.js +119 -0
  162. package/dist/graph/community-summarizer.js.map +1 -0
  163. package/dist/graph/entity-extractor.d.ts +47 -0
  164. package/dist/graph/entity-extractor.d.ts.map +1 -0
  165. package/dist/graph/entity-extractor.js +224 -0
  166. package/dist/graph/entity-extractor.js.map +1 -0
  167. package/dist/graph/graph-rag-pipeline.d.ts +83 -0
  168. package/dist/graph/graph-rag-pipeline.d.ts.map +1 -0
  169. package/dist/graph/graph-rag-pipeline.js +390 -0
  170. package/dist/graph/graph-rag-pipeline.js.map +1 -0
  171. package/dist/graph/graph.types.d.ts +186 -0
  172. package/dist/graph/graph.types.d.ts.map +1 -0
  173. package/dist/graph/graph.types.js +20 -0
  174. package/dist/graph/graph.types.js.map +1 -0
  175. package/dist/graph/index.d.ts +15 -0
  176. package/dist/graph/index.d.ts.map +1 -0
  177. package/dist/graph/index.js +31 -0
  178. package/dist/graph/index.js.map +1 -0
  179. package/dist/graph/knowledge-graph.d.ts +57 -0
  180. package/dist/graph/knowledge-graph.d.ts.map +1 -0
  181. package/dist/graph/knowledge-graph.js +198 -0
  182. package/dist/graph/knowledge-graph.js.map +1 -0
  183. package/dist/index.d.ts +29 -0
  184. package/dist/index.d.ts.map +1 -0
  185. package/dist/index.js +58 -0
  186. package/dist/index.js.map +1 -0
  187. package/dist/loaders/base.loader.d.ts +108 -0
  188. package/dist/loaders/base.loader.d.ts.map +1 -0
  189. package/dist/loaders/base.loader.js +123 -0
  190. package/dist/loaders/base.loader.js.map +1 -0
  191. package/dist/loaders/csv-file.loader.d.ts +61 -0
  192. package/dist/loaders/csv-file.loader.d.ts.map +1 -0
  193. package/dist/loaders/csv-file.loader.js +162 -0
  194. package/dist/loaders/csv-file.loader.js.map +1 -0
  195. package/dist/loaders/directory.loader.d.ts +67 -0
  196. package/dist/loaders/directory.loader.d.ts.map +1 -0
  197. package/dist/loaders/directory.loader.js +163 -0
  198. package/dist/loaders/directory.loader.js.map +1 -0
  199. package/dist/loaders/docx.loader.d.ts +52 -0
  200. package/dist/loaders/docx.loader.d.ts.map +1 -0
  201. package/dist/loaders/docx.loader.js +110 -0
  202. package/dist/loaders/docx.loader.js.map +1 -0
  203. package/dist/loaders/github.loader.d.ts +114 -0
  204. package/dist/loaders/github.loader.d.ts.map +1 -0
  205. package/dist/loaders/github.loader.js +217 -0
  206. package/dist/loaders/github.loader.js.map +1 -0
  207. package/dist/loaders/html-file.loader.d.ts +55 -0
  208. package/dist/loaders/html-file.loader.d.ts.map +1 -0
  209. package/dist/loaders/html-file.loader.js +170 -0
  210. package/dist/loaders/html-file.loader.js.map +1 -0
  211. package/dist/loaders/index.d.ts +52 -0
  212. package/dist/loaders/index.d.ts.map +1 -0
  213. package/dist/loaders/index.js +61 -0
  214. package/dist/loaders/index.js.map +1 -0
  215. package/dist/loaders/json-file.loader.d.ts +51 -0
  216. package/dist/loaders/json-file.loader.d.ts.map +1 -0
  217. package/dist/loaders/json-file.loader.js +100 -0
  218. package/dist/loaders/json-file.loader.js.map +1 -0
  219. package/dist/loaders/markdown-file.loader.d.ts +61 -0
  220. package/dist/loaders/markdown-file.loader.d.ts.map +1 -0
  221. package/dist/loaders/markdown-file.loader.js +148 -0
  222. package/dist/loaders/markdown-file.loader.js.map +1 -0
  223. package/dist/loaders/pdf.loader.d.ts +64 -0
  224. package/dist/loaders/pdf.loader.d.ts.map +1 -0
  225. package/dist/loaders/pdf.loader.js +163 -0
  226. package/dist/loaders/pdf.loader.js.map +1 -0
  227. package/dist/loaders/text-file.loader.d.ts +39 -0
  228. package/dist/loaders/text-file.loader.d.ts.map +1 -0
  229. package/dist/loaders/text-file.loader.js +69 -0
  230. package/dist/loaders/text-file.loader.js.map +1 -0
  231. package/dist/loaders/web.loader.d.ts +87 -0
  232. package/dist/loaders/web.loader.d.ts.map +1 -0
  233. package/dist/loaders/web.loader.js +194 -0
  234. package/dist/loaders/web.loader.js.map +1 -0
  235. package/dist/loaders/youtube-transcript.loader.d.ts +92 -0
  236. package/dist/loaders/youtube-transcript.loader.d.ts.map +1 -0
  237. package/dist/loaders/youtube-transcript.loader.js +254 -0
  238. package/dist/loaders/youtube-transcript.loader.js.map +1 -0
  239. package/dist/memory/index.d.ts +11 -0
  240. package/dist/memory/index.d.ts.map +1 -0
  241. package/dist/memory/index.js +31 -0
  242. package/dist/memory/index.js.map +1 -0
  243. package/dist/memory/memory-manager.d.ts +96 -0
  244. package/dist/memory/memory-manager.d.ts.map +1 -0
  245. package/dist/memory/memory-manager.js +369 -0
  246. package/dist/memory/memory-manager.js.map +1 -0
  247. package/dist/memory/memory-store.interface.d.ts +73 -0
  248. package/dist/memory/memory-store.interface.d.ts.map +1 -0
  249. package/dist/memory/memory-store.interface.js +6 -0
  250. package/dist/memory/memory-store.interface.js.map +1 -0
  251. package/dist/memory/stores/buffer-memory.d.ts +47 -0
  252. package/dist/memory/stores/buffer-memory.d.ts.map +1 -0
  253. package/dist/memory/stores/buffer-memory.js +280 -0
  254. package/dist/memory/stores/buffer-memory.js.map +1 -0
  255. package/dist/memory/stores/hybrid-memory.d.ts +49 -0
  256. package/dist/memory/stores/hybrid-memory.d.ts.map +1 -0
  257. package/dist/memory/stores/hybrid-memory.js +194 -0
  258. package/dist/memory/stores/hybrid-memory.js.map +1 -0
  259. package/dist/memory/stores/vector-memory.d.ts +48 -0
  260. package/dist/memory/stores/vector-memory.d.ts.map +1 -0
  261. package/dist/memory/stores/vector-memory.js +312 -0
  262. package/dist/memory/stores/vector-memory.js.map +1 -0
  263. package/dist/memory/types.d.ts +119 -0
  264. package/dist/memory/types.d.ts.map +1 -0
  265. package/dist/memory/types.js +18 -0
  266. package/dist/memory/types.js.map +1 -0
  267. package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts +8 -0
  268. package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts.map +1 -0
  269. package/dist/prompts/agentic/adaptive-retrieval.prompt.js +27 -0
  270. package/dist/prompts/agentic/adaptive-retrieval.prompt.js.map +1 -0
  271. package/dist/prompts/agentic/corrective-rag.prompt.d.ts +9 -0
  272. package/dist/prompts/agentic/corrective-rag.prompt.d.ts.map +1 -0
  273. package/dist/prompts/agentic/corrective-rag.prompt.js +23 -0
  274. package/dist/prompts/agentic/corrective-rag.prompt.js.map +1 -0
  275. package/dist/prompts/agentic/hyde.prompt.d.ts +9 -0
  276. package/dist/prompts/agentic/hyde.prompt.d.ts.map +1 -0
  277. package/dist/prompts/agentic/hyde.prompt.js +18 -0
  278. package/dist/prompts/agentic/hyde.prompt.js.map +1 -0
  279. package/dist/prompts/agentic/multi-hop.prompt.d.ts +15 -0
  280. package/dist/prompts/agentic/multi-hop.prompt.d.ts.map +1 -0
  281. package/dist/prompts/agentic/multi-hop.prompt.js +38 -0
  282. package/dist/prompts/agentic/multi-hop.prompt.js.map +1 -0
  283. package/dist/prompts/agentic/query-planner.prompt.d.ts +8 -0
  284. package/dist/prompts/agentic/query-planner.prompt.d.ts.map +1 -0
  285. package/dist/prompts/agentic/query-planner.prompt.js +30 -0
  286. package/dist/prompts/agentic/query-planner.prompt.js.map +1 -0
  287. package/dist/prompts/agentic/query-rewriter.prompt.d.ts +10 -0
  288. package/dist/prompts/agentic/query-rewriter.prompt.d.ts.map +1 -0
  289. package/dist/prompts/agentic/query-rewriter.prompt.js +17 -0
  290. package/dist/prompts/agentic/query-rewriter.prompt.js.map +1 -0
  291. package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts +10 -0
  292. package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts.map +1 -0
  293. package/dist/prompts/agentic/self-reflective-improve.prompt.js +24 -0
  294. package/dist/prompts/agentic/self-reflective-improve.prompt.js.map +1 -0
  295. package/dist/prompts/agentic/self-reflective.prompt.d.ts +9 -0
  296. package/dist/prompts/agentic/self-reflective.prompt.d.ts.map +1 -0
  297. package/dist/prompts/agentic/self-reflective.prompt.js +32 -0
  298. package/dist/prompts/agentic/self-reflective.prompt.js.map +1 -0
  299. package/dist/prompts/community-summary.prompt.d.ts +9 -0
  300. package/dist/prompts/community-summary.prompt.d.ts.map +1 -0
  301. package/dist/prompts/community-summary.prompt.js +30 -0
  302. package/dist/prompts/community-summary.prompt.js.map +1 -0
  303. package/dist/prompts/entity-extraction.prompt.d.ts +10 -0
  304. package/dist/prompts/entity-extraction.prompt.d.ts.map +1 -0
  305. package/dist/prompts/entity-extraction.prompt.js +39 -0
  306. package/dist/prompts/entity-extraction.prompt.js.map +1 -0
  307. package/dist/prompts/graph-search.prompt.d.ts +10 -0
  308. package/dist/prompts/graph-search.prompt.d.ts.map +1 -0
  309. package/dist/prompts/graph-search.prompt.js +23 -0
  310. package/dist/prompts/graph-search.prompt.js.map +1 -0
  311. package/dist/prompts/index.d.ts +13 -0
  312. package/dist/prompts/index.d.ts.map +1 -0
  313. package/dist/prompts/index.js +29 -0
  314. package/dist/prompts/index.js.map +1 -0
  315. package/dist/prompts/rag-answer.prompt.d.ts +9 -0
  316. package/dist/prompts/rag-answer.prompt.d.ts.map +1 -0
  317. package/dist/prompts/rag-answer.prompt.js +20 -0
  318. package/dist/prompts/rag-answer.prompt.js.map +1 -0
  319. package/dist/rag-pipeline-with-memory.d.ts +68 -0
  320. package/dist/rag-pipeline-with-memory.d.ts.map +1 -0
  321. package/dist/rag-pipeline-with-memory.js +186 -0
  322. package/dist/rag-pipeline-with-memory.js.map +1 -0
  323. package/dist/rag-pipeline.d.ts +59 -0
  324. package/dist/rag-pipeline.d.ts.map +1 -0
  325. package/dist/rag-pipeline.js +181 -0
  326. package/dist/rag-pipeline.js.map +1 -0
  327. package/dist/rag.module.d.ts +26 -0
  328. package/dist/rag.module.d.ts.map +1 -0
  329. package/dist/rag.module.js +40 -0
  330. package/dist/rag.module.js.map +1 -0
  331. package/dist/rag.service.d.ts +96 -0
  332. package/dist/rag.service.d.ts.map +1 -0
  333. package/dist/rag.service.js +173 -0
  334. package/dist/rag.service.js.map +1 -0
  335. package/dist/retrieval/bm25.d.ts +57 -0
  336. package/dist/retrieval/bm25.d.ts.map +1 -0
  337. package/dist/retrieval/bm25.js +106 -0
  338. package/dist/retrieval/bm25.js.map +1 -0
  339. package/dist/retrieval/hybrid-search.d.ts +48 -0
  340. package/dist/retrieval/hybrid-search.d.ts.map +1 -0
  341. package/dist/retrieval/hybrid-search.js +123 -0
  342. package/dist/retrieval/hybrid-search.js.map +1 -0
  343. package/dist/retrieval/multi-query.d.ts +38 -0
  344. package/dist/retrieval/multi-query.d.ts.map +1 -0
  345. package/dist/retrieval/multi-query.js +135 -0
  346. package/dist/retrieval/multi-query.js.map +1 -0
  347. package/dist/text-splitters/recursive-text-splitter.d.ts +21 -0
  348. package/dist/text-splitters/recursive-text-splitter.d.ts.map +1 -0
  349. package/dist/text-splitters/recursive-text-splitter.js +95 -0
  350. package/dist/text-splitters/recursive-text-splitter.js.map +1 -0
  351. package/dist/types/index.d.ts +144 -0
  352. package/dist/types/index.d.ts.map +1 -0
  353. package/dist/types/index.js +16 -0
  354. package/dist/types/index.js.map +1 -0
  355. package/dist/utils/similarity.d.ts +16 -0
  356. package/dist/utils/similarity.d.ts.map +1 -0
  357. package/dist/utils/similarity.js +58 -0
  358. package/dist/utils/similarity.js.map +1 -0
  359. package/dist/vector-stores/chroma.store.d.ts +42 -0
  360. package/dist/vector-stores/chroma.store.d.ts.map +1 -0
  361. package/dist/vector-stores/chroma.store.js +242 -0
  362. package/dist/vector-stores/chroma.store.js.map +1 -0
  363. package/dist/vector-stores/memory-vector-store.d.ts +20 -0
  364. package/dist/vector-stores/memory-vector-store.d.ts.map +1 -0
  365. package/dist/vector-stores/memory-vector-store.js +94 -0
  366. package/dist/vector-stores/memory-vector-store.js.map +1 -0
  367. package/dist/vector-stores/pinecone.store.d.ts +34 -0
  368. package/dist/vector-stores/pinecone.store.d.ts.map +1 -0
  369. package/dist/vector-stores/pinecone.store.js +146 -0
  370. package/dist/vector-stores/pinecone.store.js.map +1 -0
  371. package/dist/vector-stores/qdrant.store.d.ts +33 -0
  372. package/dist/vector-stores/qdrant.store.d.ts.map +1 -0
  373. package/dist/vector-stores/qdrant.store.js +174 -0
  374. package/dist/vector-stores/qdrant.store.js.map +1 -0
  375. package/dist/vector-stores/weaviate.store.d.ts +37 -0
  376. package/dist/vector-stores/weaviate.store.d.ts.map +1 -0
  377. package/dist/vector-stores/weaviate.store.js +226 -0
  378. package/dist/vector-stores/weaviate.store.js.map +1 -0
  379. package/package.json +146 -0
@@ -0,0 +1,217 @@
1
+ "use strict";
2
+ /**
3
+ * GitHubLoader
4
+ *
5
+ * Loads files from a GitHub repository using the GitHub REST API.
6
+ * No extra npm dependency required — uses Node.js built-in `fetch`.
7
+ *
8
+ * Supports:
9
+ * - Public repositories (no token needed, but rate-limited to 60 req/h)
10
+ * - Private repositories (requires `token`)
11
+ * - Specific branches, tags, or commits via `ref`
12
+ * - Recursive directory traversal with depth limit
13
+ * - Glob-style extension filtering (`includeExtensions`)
14
+ * - Path prefix filtering (`includePaths`)
15
+ *
16
+ * Rate limits:
17
+ * - Unauthenticated: 60 requests per hour per IP
18
+ * - Authenticated: 5,000 requests per hour per token
19
+ * For large repos, always provide a `token`.
20
+ *
21
+ * @example
22
+ * ```typescript
23
+ * // Load all Markdown files from a public repo
24
+ * const loader = new GitHubLoader({
25
+ * owner: 'hazeljs',
26
+ * repo: 'hazeljs',
27
+ * ref: 'main',
28
+ * includeExtensions: ['.md'],
29
+ * token: process.env.GITHUB_TOKEN,
30
+ * });
31
+ * const docs = await loader.load();
32
+ * // docs[0].metadata.path === 'README.md'
33
+ * // docs[0].metadata.url === 'https://github.com/hazeljs/hazeljs/blob/main/README.md'
34
+ * ```
35
+ *
36
+ * Load a specific directory only:
37
+ * ```typescript
38
+ * const loader = new GitHubLoader({
39
+ * owner: 'facebook',
40
+ * repo: 'react',
41
+ * ref: 'main',
42
+ * directory: 'packages/react/src',
43
+ * includeExtensions: ['.js', '.ts'],
44
+ * });
45
+ * ```
46
+ */
47
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
48
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
49
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
50
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
51
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
52
+ };
53
+ var __metadata = (this && this.__metadata) || function (k, v) {
54
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
55
+ };
56
+ Object.defineProperty(exports, "__esModule", { value: true });
57
+ exports.GitHubLoader = void 0;
58
+ const base_loader_1 = require("./base.loader");
59
+ let GitHubLoader = class GitHubLoader extends base_loader_1.BaseDocumentLoader {
60
+ constructor(options) {
61
+ super();
62
+ this.apiBase = 'https://api.github.com';
63
+ this.opts = {
64
+ owner: options.owner,
65
+ repo: options.repo,
66
+ ref: options.ref ?? 'main',
67
+ directory: options.directory ?? '',
68
+ includeExtensions: options.includeExtensions ?? [],
69
+ includePaths: options.includePaths ?? [],
70
+ excludePaths: options.excludePaths ?? ['node_modules/', 'dist/', '.git/'],
71
+ maxFiles: options.maxFiles ?? 500,
72
+ maxFileSize: options.maxFileSize ?? 102400,
73
+ token: options.token ?? '',
74
+ timeout: options.timeout ?? 15000,
75
+ metadata: options.metadata ?? {},
76
+ };
77
+ }
78
+ async load() {
79
+ const treeItems = await this.getTree();
80
+ const filtered = this.filterItems(treeItems);
81
+ if (filtered.length === 0) {
82
+ // eslint-disable-next-line no-console
83
+ console.warn(`[GitHubLoader] No files matched the filter criteria in ${this.opts.owner}/${this.opts.repo}`);
84
+ return [];
85
+ }
86
+ const docs = [];
87
+ for (let i = 0; i < filtered.length && docs.length < this.opts.maxFiles; i++) {
88
+ const item = filtered[i];
89
+ try {
90
+ const content = await this.getFileContent(item);
91
+ if (content !== null) {
92
+ docs.push(this.createDocument(content, {
93
+ source: item.path,
94
+ path: item.path,
95
+ repo: `${this.opts.owner}/${this.opts.repo}`,
96
+ ref: this.opts.ref,
97
+ url: `https://github.com/${this.opts.owner}/${this.opts.repo}/blob/${this.opts.ref}/${item.path}`,
98
+ rawUrl: `https://raw.githubusercontent.com/${this.opts.owner}/${this.opts.repo}/${this.opts.ref}/${item.path}`,
99
+ sha: item.sha,
100
+ loaderType: 'github',
101
+ ...this.opts.metadata,
102
+ }));
103
+ }
104
+ }
105
+ catch (err) {
106
+ const message = err instanceof Error ? err.message : String(err);
107
+ // eslint-disable-next-line no-console
108
+ console.warn(`[GitHubLoader] Skipping ${item.path}: ${message}`);
109
+ }
110
+ }
111
+ return docs;
112
+ }
113
+ // ── Private: GitHub API helpers ──────────────────────────────────────────
114
+ async getTree() {
115
+ // Use the recursive git tree endpoint — one API call for the whole repo
116
+ const url = `${this.apiBase}/repos/${this.opts.owner}/${this.opts.repo}` +
117
+ `/git/trees/${this.opts.ref}?recursive=1`;
118
+ const data = await this.apiGet(url);
119
+ if (data.truncated) {
120
+ // eslint-disable-next-line no-console
121
+ console.warn(`[GitHubLoader] Repository tree was truncated by the GitHub API ` +
122
+ `(too many files). Use \`directory\` or \`includePaths\` to narrow the scope.`);
123
+ }
124
+ let items = data.tree.filter((item) => item.type === 'blob');
125
+ // Restrict to `directory` prefix if set
126
+ if (this.opts.directory) {
127
+ const prefix = this.opts.directory.replace(/\/$/, '') + '/';
128
+ items = items.filter((item) => item.path.startsWith(prefix));
129
+ }
130
+ return items;
131
+ }
132
+ filterItems(items) {
133
+ return items.filter((item) => {
134
+ const path = item.path;
135
+ const ext = '.' + path.split('.').pop().toLowerCase();
136
+ // Extension filter
137
+ if (this.opts.includeExtensions.length > 0 && !this.opts.includeExtensions.includes(ext)) {
138
+ return false;
139
+ }
140
+ // Include path filter
141
+ if (this.opts.includePaths.length > 0 &&
142
+ !this.opts.includePaths.some((prefix) => path.startsWith(prefix))) {
143
+ return false;
144
+ }
145
+ // Exclude path filter
146
+ if (this.opts.excludePaths.some((prefix) => path.startsWith(prefix))) {
147
+ return false;
148
+ }
149
+ // File size filter
150
+ if (item.size !== undefined && item.size > this.opts.maxFileSize) {
151
+ // eslint-disable-next-line no-console
152
+ console.warn(`[GitHubLoader] Skipping ${path} — size ${item.size} bytes exceeds maxFileSize ${this.opts.maxFileSize}`);
153
+ return false;
154
+ }
155
+ return true;
156
+ });
157
+ }
158
+ async getFileContent(item) {
159
+ // Use raw content URL to avoid base64 decoding and extra API calls
160
+ const rawUrl = `https://raw.githubusercontent.com/${this.opts.owner}/${this.opts.repo}` +
161
+ `/${this.opts.ref}/${item.path}`;
162
+ const response = await this.httpGet(rawUrl);
163
+ // Skip binary files
164
+ if (this.isBinary(response)) {
165
+ return null;
166
+ }
167
+ return response;
168
+ }
169
+ async apiGet(url) {
170
+ const text = await this.httpGet(url);
171
+ return JSON.parse(text);
172
+ }
173
+ async httpGet(url) {
174
+ const controller = new AbortController();
175
+ const timer = setTimeout(() => controller.abort(), this.opts.timeout);
176
+ const headers = {
177
+ Accept: 'application/vnd.github.v3+json',
178
+ 'User-Agent': 'HazelJS-RAG/1.0',
179
+ };
180
+ if (this.opts.token) {
181
+ headers['Authorization'] = `Bearer ${this.opts.token}`;
182
+ }
183
+ const response = await fetch(url, { headers, signal: controller.signal });
184
+ clearTimeout(timer);
185
+ if (!response.ok) {
186
+ const body = await response.text().catch(() => '');
187
+ const hint = response.status === 403
188
+ ? ' (rate limited — provide a GitHub token with the `token` option)'
189
+ : response.status === 404
190
+ ? ' (repo or path not found — check owner/repo/ref/directory)'
191
+ : '';
192
+ throw new Error(`GitHub API error ${response.status}${hint}: ${body.slice(0, 200)}`);
193
+ }
194
+ return response.text();
195
+ }
196
+ isBinary(text) {
197
+ // Heuristic: if > 10% of the first 512 bytes are non-text codepoints, treat as binary
198
+ const sample = text.slice(0, 512);
199
+ let nonText = 0;
200
+ for (let i = 0; i < sample.length; i++) {
201
+ const code = sample.charCodeAt(i);
202
+ if (code < 9 || (code > 13 && code < 32 && code !== 27)) {
203
+ nonText++;
204
+ }
205
+ }
206
+ return nonText / sample.length > 0.1;
207
+ }
208
+ };
209
+ exports.GitHubLoader = GitHubLoader;
210
+ exports.GitHubLoader = GitHubLoader = __decorate([
211
+ (0, base_loader_1.Loader)({
212
+ name: 'GitHubLoader',
213
+ description: 'Loads files from a GitHub repository using the GitHub REST API.',
214
+ }),
215
+ __metadata("design:paramtypes", [Object])
216
+ ], GitHubLoader);
217
+ //# sourceMappingURL=github.loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"github.loader.js","sourceRoot":"","sources":["../../src/loaders/github.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;;;;;;;;;;;;AAEH,+CAA2D;AAsEpD,IAAM,YAAY,GAAlB,MAAM,YAAa,SAAQ,gCAAkB;IAIlD,YAAY,OAA4B;QACtC,KAAK,EAAE,CAAC;QAHO,YAAO,GAAG,wBAAwB,CAAC;QAIlD,IAAI,CAAC,IAAI,GAAG;YACV,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,GAAG,EAAE,OAAO,CAAC,GAAG,IAAI,MAAM;YAC1B,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,EAAE;YAClC,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,IAAI,EAAE;YAClD,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,EAAE;YACxC,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC,eAAe,EAAE,OAAO,EAAE,OAAO,CAAC;YACzE,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,GAAG;YACjC,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,MAAO;YAC3C,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAM;YAClC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,EAAE;SACjC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAE7C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,sCAAsC;YACtC,OAAO,CAAC,IAAI,CACV,0DAA0D,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAC9F,CAAC;YACF,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,IAAI,GAAe,EAAE,CAAC;QAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7E,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;gBAChD,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;oBACrB,IAAI,CAAC,IAAI,CACP,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE;wBAC3B,MAAM,EAAE,IAAI,CAAC,IAAI;wBACjB,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,IAAI,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;wBAC5C,GAAG,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG;wBAClB,GAAG,EAAE,sBAAsB,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,SAAS,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE;wBACjG,MAAM,EAAE,qCAAqC,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE;wBAC9G,GAAG,EAAE,IAAI,CAAC,GAAG;wBACb,UAAU,EAAE,QAAQ;wBACpB,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ;qBACtB,CAAC,CACH,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACjE,sCAAsC;gBACtC,OAAO,CAAC,IAAI,CAAC,2BAA2B,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC,CAAC;YACnE,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED,4EAA4E;IAEpE,KAAK,CAAC,OAAO;QACnB,wEAAwE;QACxE,MAAM,GAAG,GACP,GAAG,IAAI,CAAC,OAAO,UAAU,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAC5D,cAAc,IAAI,CAAC,IAAI,CAAC,GAAG,cAAc,CAAC;QAE5C,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAG3B,GAAG,CAAC,CAAC;QAER,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,sCAAsC;YACtC,OAAO,CAAC,IAAI,CACV,iEAAiE;gBAC/D,8EAA8E,CACjF,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;QAE7D,wCAAwC;QACxC,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC;YAC5D,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,WAAW,CAAC,KAAuB;QACzC,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;YACvB,MAAM,GAAG,GAAG,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAG,CAAC,WAAW,EAAE,CAAC;YAEvD,mBAAmB;YACnB,IAAI,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACzF,OAAO,KAAK,CAAC;YACf,CAAC;YAED,sBAAsB;YACtB,IACE,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC;gBACjC,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,EACjE,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC;YAED,sBAAsB;YACtB,IAAI,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;gBACrE,OAAO,KAAK,CAAC;YACf,CAAC;YAED,mBAAmB;YACnB,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;gBACjE,sCAAsC;gBACtC,OAAO,CAAC,IAAI,CACV,2BAA2B,IAAI,WAAW,IAAI,CAAC,IAAI,8BAA8B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CACzG,CAAC;gBACF,OAAO,KAAK,CAAC;YACf,CAAC;YAED,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,cAAc,CAAC,IAAoB;QAC/C,mEAAmE;QACnE,MAAM,MAAM,GACV,qCAAqC,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACxE,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAE5C,oBAAoB;QACpB,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,KAAK,CAAC,MAAM,CAAI,GAAW;QACjC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACrC,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAM,CAAC;IAC/B,CAAC;IAEO,KAAK,CAAC,OAAO,CAAC,GAAW;QAC/B,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEtE,MAAM,OAAO,GAA2B;YACtC,MAAM,EAAE,gCAAgC;YACxC,YAAY,EAAE,iBAAiB;SAChC,CAAC;QACF,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YACpB,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACzD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1E,YAAY,CAAC,KAAK,CAAC,CAAC;QAEpB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACnD,MAAM,IAAI,GACR,QAAQ,CAAC,MAAM,KAAK,GAAG;gBACrB,CAAC,CAAC,kEAAkE;gBACpE,CAAC,CAAC,QAAQ,CAAC,MAAM,KAAK,GAAG;oBACvB,CAAC,CAAC,4DAA4D;oBAC9D,CAAC,CAAC,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CAAC,oBAAoB,QAAQ,CAAC,MAAM,GAAG,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACvF,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,QAAQ,CAAC,IAAY;QAC3B,sFAAsF;QACtF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAClC,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAClC,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,EAAE,IAAI,IAAI,GAAG,EAAE,IAAI,IAAI,KAAK,EAAE,CAAC,EAAE,CAAC;gBACxD,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QACD,OAAO,OAAO,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC;IACvC,CAAC;CACF,CAAA;AAnMY,oCAAY;uBAAZ,YAAY;IAJxB,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,cAAc;QACpB,WAAW,EAAE,iEAAiE;KAC/E,CAAC;;GACW,YAAY,CAmMxB"}
@@ -0,0 +1,55 @@
1
+ /**
2
+ * HTMLFileLoader
3
+ *
4
+ * Loads HTML files from disk and converts them to plain text.
5
+ *
6
+ * Uses a built-in tag stripper with no external dependencies.
7
+ * For advanced HTML parsing (CSS selectors, JavaScript rendering), use
8
+ * `WebLoader` with the optional `cheerio` dependency instead.
9
+ *
10
+ * Optionally strips:
11
+ * - `<script>` and `<style>` blocks (content + tags)
12
+ * - HTML comments
13
+ * - All remaining HTML tags (leaving only text nodes)
14
+ *
15
+ * @example
16
+ * ```typescript
17
+ * const loader = new HTMLFileLoader({ path: './page.html' });
18
+ * const docs = await loader.load();
19
+ * // docs[0].content === stripped plain text
20
+ * // docs[0].metadata.title === page <title>
21
+ * ```
22
+ */
23
+ import { BaseDocumentLoader } from './base.loader';
24
+ import type { Document } from '../types';
25
+ export interface HTMLFileLoaderOptions {
26
+ path?: string;
27
+ paths?: string[];
28
+ /**
29
+ * CSS selector string used to limit extraction to a specific element.
30
+ * Requires the optional `cheerio` peer dependency to be installed.
31
+ * If cheerio is not installed, the full stripped text is returned.
32
+ * @example 'article', 'main', '#content'
33
+ */
34
+ selector?: string;
35
+ /** Strip <script> and <style> blocks entirely. @default true */
36
+ stripScripts?: boolean;
37
+ /** Normalise multiple blank lines into a single blank line. @default true */
38
+ collapseWhitespace?: boolean;
39
+ /** Extra metadata merged into every document. */
40
+ metadata?: Record<string, unknown>;
41
+ }
42
+ export declare class HTMLFileLoader extends BaseDocumentLoader {
43
+ private readonly paths;
44
+ private readonly selector?;
45
+ private readonly stripScripts;
46
+ private readonly collapseWhitespace;
47
+ private readonly extraMetadata;
48
+ constructor(options: HTMLFileLoaderOptions);
49
+ load(): Promise<Document[]>;
50
+ private extractTitle;
51
+ private stripTags;
52
+ private decodeEntities;
53
+ private clean;
54
+ }
55
+ //# sourceMappingURL=html-file.loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-file.loader.d.ts","sourceRoot":"","sources":["../../src/loaders/html-file.loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAIH,OAAO,EAAE,kBAAkB,EAAU,MAAM,eAAe,CAAC;AAC3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,qBAAqB;IACpC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gEAAgE;IAChE,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,6EAA6E;IAC7E,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,qBAMa,cAAe,SAAQ,kBAAkB;IACpD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAW;IACjC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAU;IACvC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAU;IAC7C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA0B;gBAE5C,OAAO,EAAE,qBAAqB;IAYpC,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;IA4CjC,OAAO,CAAC,YAAY;IAKpB,OAAO,CAAC,SAAS;IAsBjB,OAAO,CAAC,cAAc;IAYtB,OAAO,CAAC,KAAK;CAOd"}
@@ -0,0 +1,170 @@
1
+ "use strict";
2
+ /**
3
+ * HTMLFileLoader
4
+ *
5
+ * Loads HTML files from disk and converts them to plain text.
6
+ *
7
+ * Uses a built-in tag stripper with no external dependencies.
8
+ * For advanced HTML parsing (CSS selectors, JavaScript rendering), use
9
+ * `WebLoader` with the optional `cheerio` dependency instead.
10
+ *
11
+ * Optionally strips:
12
+ * - `<script>` and `<style>` blocks (content + tags)
13
+ * - HTML comments
14
+ * - All remaining HTML tags (leaving only text nodes)
15
+ *
16
+ * @example
17
+ * ```typescript
18
+ * const loader = new HTMLFileLoader({ path: './page.html' });
19
+ * const docs = await loader.load();
20
+ * // docs[0].content === stripped plain text
21
+ * // docs[0].metadata.title === page <title>
22
+ * ```
23
+ */
24
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
25
+ if (k2 === undefined) k2 = k;
26
+ var desc = Object.getOwnPropertyDescriptor(m, k);
27
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
28
+ desc = { enumerable: true, get: function() { return m[k]; } };
29
+ }
30
+ Object.defineProperty(o, k2, desc);
31
+ }) : (function(o, m, k, k2) {
32
+ if (k2 === undefined) k2 = k;
33
+ o[k2] = m[k];
34
+ }));
35
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
36
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
37
+ }) : function(o, v) {
38
+ o["default"] = v;
39
+ });
40
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
41
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
42
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
43
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
44
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
45
+ };
46
+ var __importStar = (this && this.__importStar) || (function () {
47
+ var ownKeys = function(o) {
48
+ ownKeys = Object.getOwnPropertyNames || function (o) {
49
+ var ar = [];
50
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
51
+ return ar;
52
+ };
53
+ return ownKeys(o);
54
+ };
55
+ return function (mod) {
56
+ if (mod && mod.__esModule) return mod;
57
+ var result = {};
58
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
59
+ __setModuleDefault(result, mod);
60
+ return result;
61
+ };
62
+ })();
63
+ var __metadata = (this && this.__metadata) || function (k, v) {
64
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
65
+ };
66
+ Object.defineProperty(exports, "__esModule", { value: true });
67
+ exports.HTMLFileLoader = void 0;
68
+ const promises_1 = require("fs/promises");
69
+ const path_1 = require("path");
70
+ const base_loader_1 = require("./base.loader");
71
+ let HTMLFileLoader = class HTMLFileLoader extends base_loader_1.BaseDocumentLoader {
72
+ constructor(options) {
73
+ super();
74
+ if (!options.path && (!options.paths || options.paths.length === 0)) {
75
+ throw new Error('HTMLFileLoader: provide at least one path via `path` or `paths`.');
76
+ }
77
+ this.paths = options.paths ?? (options.path ? [options.path] : []);
78
+ this.selector = options.selector;
79
+ this.stripScripts = options.stripScripts ?? true;
80
+ this.collapseWhitespace = options.collapseWhitespace ?? true;
81
+ this.extraMetadata = options.metadata ?? {};
82
+ }
83
+ async load() {
84
+ const docs = [];
85
+ for (const filePath of this.paths) {
86
+ const html = await (0, promises_1.readFile)(filePath, { encoding: 'utf-8' });
87
+ // Try cheerio for selector support, fall back to built-in stripper
88
+ let text;
89
+ let title = '';
90
+ if (this.selector) {
91
+ try {
92
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
93
+ const cheerio = await Promise.resolve(`${'cheerio'}`).then(s => __importStar(require(s))).then((m) => m);
94
+ const $ = (cheerio.load ?? cheerio.default)(html);
95
+ title = $('title').first().text().trim();
96
+ text = this.clean($(this.selector).text());
97
+ }
98
+ catch {
99
+ // cheerio not installed — fall back
100
+ text = this.stripTags(html);
101
+ }
102
+ }
103
+ else {
104
+ title = this.extractTitle(html);
105
+ text = this.stripTags(html);
106
+ }
107
+ docs.push(this.createDocument(text, {
108
+ source: (0, path_1.basename)(filePath),
109
+ filePath,
110
+ loaderType: 'html',
111
+ ...(title && { title }),
112
+ ...this.extraMetadata,
113
+ }));
114
+ }
115
+ return docs;
116
+ }
117
+ // ── Private helpers ──────────────────────────────────────────────────────
118
+ extractTitle(html) {
119
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
120
+ return match ? match[1].trim() : '';
121
+ }
122
+ stripTags(html) {
123
+ let text = html;
124
+ if (this.stripScripts) {
125
+ // Remove <script> blocks
126
+ text = text.replace(/<script[\s\S]*?<\/script>/gi, ' ');
127
+ // Remove <style> blocks
128
+ text = text.replace(/<style[\s\S]*?<\/style>/gi, ' ');
129
+ }
130
+ // Remove HTML comments
131
+ text = text.replace(/<!--[\s\S]*?-->/g, ' ');
132
+ // Replace block-level tags with newlines for readability
133
+ text = text.replace(/<\/(p|div|h[1-6]|li|tr|br)[^>]*>/gi, '\n');
134
+ // Remove all remaining tags
135
+ text = text.replace(/<[^>]+>/g, ' ');
136
+ // Decode common HTML entities
137
+ text = this.decodeEntities(text);
138
+ return this.clean(text);
139
+ }
140
+ decodeEntities(text) {
141
+ return text
142
+ .replace(/&amp;/g, '&')
143
+ .replace(/&lt;/g, '<')
144
+ .replace(/&gt;/g, '>')
145
+ .replace(/&quot;/g, '"')
146
+ .replace(/&#039;/g, "'")
147
+ .replace(/&nbsp;/g, ' ')
148
+ .replace(/&hellip;/g, '...')
149
+ .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
150
+ }
151
+ clean(text) {
152
+ if (!this.collapseWhitespace)
153
+ return text.trim();
154
+ return text
155
+ .replace(/[ \t]+/g, ' ') // collapse inline spaces
156
+ .replace(/\n{3,}/g, '\n\n') // collapse multiple blank lines
157
+ .trim();
158
+ }
159
+ };
160
+ exports.HTMLFileLoader = HTMLFileLoader;
161
+ exports.HTMLFileLoader = HTMLFileLoader = __decorate([
162
+ (0, base_loader_1.Loader)({
163
+ name: 'HTMLFileLoader',
164
+ description: 'Loads local HTML files and strips tags to produce plain text.',
165
+ extensions: ['.html', '.htm', '.xhtml'],
166
+ mimeTypes: ['text/html', 'application/xhtml+xml'],
167
+ }),
168
+ __metadata("design:paramtypes", [Object])
169
+ ], HTMLFileLoader);
170
+ //# sourceMappingURL=html-file.loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-file.loader.js","sourceRoot":"","sources":["../../src/loaders/html-file.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,0CAAuC;AACvC,+BAAgC;AAChC,+CAA2D;AA2BpD,IAAM,cAAc,GAApB,MAAM,cAAe,SAAQ,gCAAkB;IAOpD,YAAY,OAA8B;QACxC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;YACpE,MAAM,IAAI,KAAK,CAAC,kEAAkE,CAAC,CAAC;QACtF,CAAC;QACD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACnE,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;QACjD,IAAI,CAAC,kBAAkB,GAAG,OAAO,CAAC,kBAAkB,IAAI,IAAI,CAAC;QAC7D,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,IAAI,GAAe,EAAE,CAAC;QAE5B,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAClC,MAAM,IAAI,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;YAE7D,mEAAmE;YACnE,IAAI,IAAY,CAAC;YACjB,IAAI,KAAK,GAAG,EAAE,CAAC;YAEf,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,IAAI,CAAC;oBACH,8DAA8D;oBAC9D,MAAM,OAAO,GAAG,MAAM,mBAAO,SAAmB,wCAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAQ,CAAC,CAAC;oBACxE,MAAM,CAAC,GAAG,CACR,OAAO,CAAC,IAAI,IAAK,OAAuD,CAAC,OAAO,CACjF,CAAC,IAAI,CAAC,CAAC;oBACR,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;oBACzC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBAAC,MAAM,CAAC;oBACP,oCAAoC;oBACpC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;gBAChC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,CAAC,IAAI,CACP,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE;gBACxB,MAAM,EAAE,IAAA,eAAQ,EAAC,QAAQ,CAAC;gBAC1B,QAAQ;gBACR,UAAU,EAAE,MAAM;gBAClB,GAAG,CAAC,KAAK,IAAI,EAAE,KAAK,EAAE,CAAC;gBACvB,GAAG,IAAI,CAAC,aAAa;aACtB,CAAC,CACH,CAAC;QACJ,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED,4EAA4E;IAEpE,YAAY,CAAC,IAAY;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QAC7D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACtC,CAAC;IAEO,SAAS,CAAC,IAAY;QAC5B,IAAI,IAAI,GAAG,IAAI,CAAC;QAEhB,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,yBAAyB;YACzB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;YACxD,wBAAwB;YACxB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,2BAA2B,EAAE,GAAG,CAAC,CAAC;QACxD,CAAC;QAED,uBAAuB;QACvB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;QAC7C,yDAAyD;QACzD,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,oCAAoC,EAAE,IAAI,CAAC,CAAC;QAChE,4BAA4B;QAC5B,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QACrC,8BAA8B;QAC9B,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAEjC,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,OAAO,IAAI;aACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC;aAC3B,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAChF,CAAC;IAEO,KAAK,CAAC,IAAY;QACxB,IAAI,CAAC,IAAI,CAAC,kBAAkB;YAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;QACjD,OAAO,IAAI;aACR,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,yBAAyB;aACjD,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,gCAAgC;aAC3D,IAAI,EAAE,CAAC;IACZ,CAAC;CACF,CAAA;AA7GY,wCAAc;yBAAd,cAAc;IAN1B,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,+DAA+D;QAC5E,UAAU,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC;QACvC,SAAS,EAAE,CAAC,WAAW,EAAE,uBAAuB,CAAC;KAClD,CAAC;;GACW,cAAc,CA6G1B"}
@@ -0,0 +1,52 @@
1
+ /**
2
+ * @hazeljs/rag — Document Loaders
3
+ *
4
+ * All document loaders that ship with @hazeljs/rag.
5
+ *
6
+ * Built-in loaders (no extra dependencies):
7
+ * - TextFileLoader .txt, .log
8
+ * - JSONFileLoader .json
9
+ * - CSVFileLoader .csv
10
+ * - MarkdownFileLoader .md, .mdx
11
+ * - HTMLFileLoader .html, .htm
12
+ * - DirectoryLoader walks a directory and delegates to the above
13
+ * - WebLoader fetches web pages (Node.js fetch)
14
+ * - YouTubeTranscriptLoader YouTube captions via ytInitialPlayerResponse
15
+ * - GitHubLoader GitHub REST API (no extra dep)
16
+ *
17
+ * Optional-dependency loaders (graceful error if dep not installed):
18
+ * - PdfLoader requires: npm install pdf-parse
19
+ * - DocxLoader requires: npm install mammoth
20
+ * - WebLoader (selector) requires: npm install cheerio (only for the `selector` option)
21
+ *
22
+ * Base class and decorator (extend to build your own):
23
+ * - BaseDocumentLoader
24
+ * - @Loader
25
+ * - DocumentLoaderRegistry
26
+ * - LoaderConfig
27
+ */
28
+ export { BaseDocumentLoader, Loader, getLoaderConfig, DocumentLoaderRegistry } from './base.loader';
29
+ export type { LoaderConfig } from './base.loader';
30
+ export { TextFileLoader } from './text-file.loader';
31
+ export { JSONFileLoader } from './json-file.loader';
32
+ export { CSVFileLoader } from './csv-file.loader';
33
+ export { MarkdownFileLoader } from './markdown-file.loader';
34
+ export { HTMLFileLoader } from './html-file.loader';
35
+ export { DirectoryLoader } from './directory.loader';
36
+ export { PdfLoader } from './pdf.loader';
37
+ export { DocxLoader } from './docx.loader';
38
+ export { WebLoader } from './web.loader';
39
+ export { YouTubeTranscriptLoader } from './youtube-transcript.loader';
40
+ export { GitHubLoader } from './github.loader';
41
+ export type { TextFileLoaderOptions } from './text-file.loader';
42
+ export type { JSONFileLoaderOptions } from './json-file.loader';
43
+ export type { CSVFileLoaderOptions } from './csv-file.loader';
44
+ export type { MarkdownFileLoaderOptions } from './markdown-file.loader';
45
+ export type { HTMLFileLoaderOptions } from './html-file.loader';
46
+ export type { DirectoryLoaderOptions, LoaderFactory } from './directory.loader';
47
+ export type { PdfLoaderOptions } from './pdf.loader';
48
+ export type { DocxLoaderOptions } from './docx.loader';
49
+ export type { WebLoaderOptions } from './web.loader';
50
+ export type { YouTubeTranscriptLoaderOptions } from './youtube-transcript.loader';
51
+ export type { GitHubLoaderOptions } from './github.loader';
52
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/loaders/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AACpG,YAAY,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAGlD,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAGrD,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAG3C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,uBAAuB,EAAE,MAAM,6BAA6B,CAAC;AACtE,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAG/C,YAAY,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,YAAY,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,YAAY,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AAC9D,YAAY,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AACxE,YAAY,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,YAAY,EAAE,sBAAsB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAChF,YAAY,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AACrD,YAAY,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AACvD,YAAY,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AACrD,YAAY,EAAE,8BAA8B,EAAE,MAAM,6BAA6B,CAAC;AAClF,YAAY,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,61 @@
1
+ "use strict";
2
+ /**
3
+ * @hazeljs/rag — Document Loaders
4
+ *
5
+ * All document loaders that ship with @hazeljs/rag.
6
+ *
7
+ * Built-in loaders (no extra dependencies):
8
+ * - TextFileLoader .txt, .log
9
+ * - JSONFileLoader .json
10
+ * - CSVFileLoader .csv
11
+ * - MarkdownFileLoader .md, .mdx
12
+ * - HTMLFileLoader .html, .htm
13
+ * - DirectoryLoader walks a directory and delegates to the above
14
+ * - WebLoader fetches web pages (Node.js fetch)
15
+ * - YouTubeTranscriptLoader YouTube captions via ytInitialPlayerResponse
16
+ * - GitHubLoader GitHub REST API (no extra dep)
17
+ *
18
+ * Optional-dependency loaders (graceful error if dep not installed):
19
+ * - PdfLoader requires: npm install pdf-parse
20
+ * - DocxLoader requires: npm install mammoth
21
+ * - WebLoader (selector) requires: npm install cheerio (only for the `selector` option)
22
+ *
23
+ * Base class and decorator (extend to build your own):
24
+ * - BaseDocumentLoader
25
+ * - @Loader
26
+ * - DocumentLoaderRegistry
27
+ * - LoaderConfig
28
+ */
29
+ Object.defineProperty(exports, "__esModule", { value: true });
30
+ exports.GitHubLoader = exports.YouTubeTranscriptLoader = exports.WebLoader = exports.DocxLoader = exports.PdfLoader = exports.DirectoryLoader = exports.HTMLFileLoader = exports.MarkdownFileLoader = exports.CSVFileLoader = exports.JSONFileLoader = exports.TextFileLoader = exports.DocumentLoaderRegistry = exports.getLoaderConfig = exports.Loader = exports.BaseDocumentLoader = void 0;
31
+ var base_loader_1 = require("./base.loader");
32
+ Object.defineProperty(exports, "BaseDocumentLoader", { enumerable: true, get: function () { return base_loader_1.BaseDocumentLoader; } });
33
+ Object.defineProperty(exports, "Loader", { enumerable: true, get: function () { return base_loader_1.Loader; } });
34
+ Object.defineProperty(exports, "getLoaderConfig", { enumerable: true, get: function () { return base_loader_1.getLoaderConfig; } });
35
+ Object.defineProperty(exports, "DocumentLoaderRegistry", { enumerable: true, get: function () { return base_loader_1.DocumentLoaderRegistry; } });
36
+ // ── File-system loaders ───────────────────────────────────────────────────
37
+ var text_file_loader_1 = require("./text-file.loader");
38
+ Object.defineProperty(exports, "TextFileLoader", { enumerable: true, get: function () { return text_file_loader_1.TextFileLoader; } });
39
+ var json_file_loader_1 = require("./json-file.loader");
40
+ Object.defineProperty(exports, "JSONFileLoader", { enumerable: true, get: function () { return json_file_loader_1.JSONFileLoader; } });
41
+ var csv_file_loader_1 = require("./csv-file.loader");
42
+ Object.defineProperty(exports, "CSVFileLoader", { enumerable: true, get: function () { return csv_file_loader_1.CSVFileLoader; } });
43
+ var markdown_file_loader_1 = require("./markdown-file.loader");
44
+ Object.defineProperty(exports, "MarkdownFileLoader", { enumerable: true, get: function () { return markdown_file_loader_1.MarkdownFileLoader; } });
45
+ var html_file_loader_1 = require("./html-file.loader");
46
+ Object.defineProperty(exports, "HTMLFileLoader", { enumerable: true, get: function () { return html_file_loader_1.HTMLFileLoader; } });
47
+ var directory_loader_1 = require("./directory.loader");
48
+ Object.defineProperty(exports, "DirectoryLoader", { enumerable: true, get: function () { return directory_loader_1.DirectoryLoader; } });
49
+ // ── Format loaders (optional deps) ───────────────────────────────────────
50
+ var pdf_loader_1 = require("./pdf.loader");
51
+ Object.defineProperty(exports, "PdfLoader", { enumerable: true, get: function () { return pdf_loader_1.PdfLoader; } });
52
+ var docx_loader_1 = require("./docx.loader");
53
+ Object.defineProperty(exports, "DocxLoader", { enumerable: true, get: function () { return docx_loader_1.DocxLoader; } });
54
+ // ── Remote loaders ────────────────────────────────────────────────────────
55
+ var web_loader_1 = require("./web.loader");
56
+ Object.defineProperty(exports, "WebLoader", { enumerable: true, get: function () { return web_loader_1.WebLoader; } });
57
+ var youtube_transcript_loader_1 = require("./youtube-transcript.loader");
58
+ Object.defineProperty(exports, "YouTubeTranscriptLoader", { enumerable: true, get: function () { return youtube_transcript_loader_1.YouTubeTranscriptLoader; } });
59
+ var github_loader_1 = require("./github.loader");
60
+ Object.defineProperty(exports, "GitHubLoader", { enumerable: true, get: function () { return github_loader_1.GitHubLoader; } });
61
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/loaders/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;;;AAEH,6CAAoG;AAA3F,iHAAA,kBAAkB,OAAA;AAAE,qGAAA,MAAM,OAAA;AAAE,8GAAA,eAAe,OAAA;AAAE,qHAAA,sBAAsB,OAAA;AAG5E,6EAA6E;AAC7E,uDAAoD;AAA3C,kHAAA,cAAc,OAAA;AACvB,uDAAoD;AAA3C,kHAAA,cAAc,OAAA;AACvB,qDAAkD;AAAzC,gHAAA,aAAa,OAAA;AACtB,+DAA4D;AAAnD,0HAAA,kBAAkB,OAAA;AAC3B,uDAAoD;AAA3C,kHAAA,cAAc,OAAA;AACvB,uDAAqD;AAA5C,mHAAA,eAAe,OAAA;AAExB,4EAA4E;AAC5E,2CAAyC;AAAhC,uGAAA,SAAS,OAAA;AAClB,6CAA2C;AAAlC,yGAAA,UAAU,OAAA;AAEnB,6EAA6E;AAC7E,2CAAyC;AAAhC,uGAAA,SAAS,OAAA;AAClB,yEAAsE;AAA7D,oIAAA,uBAAuB,OAAA;AAChC,iDAA+C;AAAtC,6GAAA,YAAY,OAAA"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * JSONFileLoader
3
+ *
4
+ * Loads a JSON file and converts it to documents.
5
+ *
6
+ * Supports two shapes:
7
+ * - **Array mode** (default) — if the root value is an array, each element
8
+ * becomes a document. The element's text is either the value of `textKey`
9
+ * (a specific field) or the full JSON stringification of the element.
10
+ * - **Object mode** — a single root object becomes one document.
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * // articles.json = [{ "title": "...", "body": "..." }, ...]
15
+ * const loader = new JSONFileLoader({
16
+ * path: './articles.json',
17
+ * textKey: 'body', // use "body" field as content
18
+ * metadataKeys: ['title'], // include "title" in metadata
19
+ * });
20
+ * const docs = await loader.load();
21
+ * ```
22
+ */
23
+ import { BaseDocumentLoader } from './base.loader';
24
+ import type { Document } from '../types';
25
+ export interface JSONFileLoaderOptions {
26
+ /** Path to the JSON file. */
27
+ path: string;
28
+ /**
29
+ * Key in each array element (or root object) whose value becomes the
30
+ * document content. If omitted, the entire element is JSON-stringified.
31
+ */
32
+ textKey?: string;
33
+ /**
34
+ * Keys to extract from each element into the document metadata.
35
+ * All other keys are omitted from metadata.
36
+ */
37
+ metadataKeys?: string[];
38
+ /** Extra metadata merged into every document. */
39
+ metadata?: Record<string, unknown>;
40
+ /**
41
+ * JSON pointer (dot-separated path) to the array within the JSON file.
42
+ * @example 'data.results' → reads `json.data.results`
43
+ */
44
+ jsonPointer?: string;
45
+ }
46
+ export declare class JSONFileLoader extends BaseDocumentLoader {
47
+ private readonly opts;
48
+ constructor(options: JSONFileLoaderOptions);
49
+ load(): Promise<Document[]>;
50
+ }
51
+ //# sourceMappingURL=json-file.loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-file.loader.d.ts","sourceRoot":"","sources":["../../src/loaders/json-file.loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAIH,OAAO,EAAE,kBAAkB,EAAU,MAAM,eAAe,CAAC;AAC3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,qBAAqB;IACpC,6BAA6B;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAMa,cAAe,SAAQ,kBAAkB;IACpD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAGnB;gBAEU,OAAO,EAAE,qBAAqB;IAWpC,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;CAkDlC"}