langchain 0.2.18 → 0.3.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (338) hide show
  1. package/dist/agents/openai_functions/index.cjs +2 -2
  2. package/dist/agents/openai_functions/index.js +2 -2
  3. package/dist/chains/combine_documents/stuff.cjs +2 -2
  4. package/dist/chains/combine_documents/stuff.js +2 -2
  5. package/dist/chains/openai_functions/openapi.cjs +3 -1
  6. package/dist/chains/openai_functions/openapi.js +3 -1
  7. package/dist/load/import_constants.cjs +2 -39
  8. package/dist/load/import_constants.js +2 -39
  9. package/dist/load/import_map.cjs +2 -3
  10. package/dist/load/import_map.d.ts +0 -1
  11. package/dist/load/import_map.js +0 -1
  12. package/dist/smith/config.d.ts +1 -5
  13. package/package.json +31 -854
  14. package/dist/document_loaders/fs/chatgpt.cjs +0 -90
  15. package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
  16. package/dist/document_loaders/fs/chatgpt.js +0 -86
  17. package/dist/document_loaders/fs/csv.cjs +0 -73
  18. package/dist/document_loaders/fs/csv.d.ts +0 -65
  19. package/dist/document_loaders/fs/csv.js +0 -69
  20. package/dist/document_loaders/fs/docx.cjs +0 -58
  21. package/dist/document_loaders/fs/docx.d.ts +0 -25
  22. package/dist/document_loaders/fs/docx.js +0 -54
  23. package/dist/document_loaders/fs/epub.cjs +0 -103
  24. package/dist/document_loaders/fs/epub.d.ts +0 -33
  25. package/dist/document_loaders/fs/epub.js +0 -99
  26. package/dist/document_loaders/fs/notion.cjs +0 -26
  27. package/dist/document_loaders/fs/notion.d.ts +0 -12
  28. package/dist/document_loaders/fs/notion.js +0 -22
  29. package/dist/document_loaders/fs/obsidian.cjs +0 -247
  30. package/dist/document_loaders/fs/obsidian.d.ts +0 -28
  31. package/dist/document_loaders/fs/obsidian.js +0 -240
  32. package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
  33. package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
  34. package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
  35. package/dist/document_loaders/fs/pdf.cjs +0 -148
  36. package/dist/document_loaders/fs/pdf.d.ts +0 -49
  37. package/dist/document_loaders/fs/pdf.js +0 -144
  38. package/dist/document_loaders/fs/pptx.cjs +0 -46
  39. package/dist/document_loaders/fs/pptx.d.ts +0 -25
  40. package/dist/document_loaders/fs/pptx.js +0 -42
  41. package/dist/document_loaders/fs/srt.cjs +0 -57
  42. package/dist/document_loaders/fs/srt.d.ts +0 -32
  43. package/dist/document_loaders/fs/srt.js +0 -50
  44. package/dist/document_loaders/fs/unstructured.cjs +0 -338
  45. package/dist/document_loaders/fs/unstructured.d.ts +0 -125
  46. package/dist/document_loaders/fs/unstructured.js +0 -333
  47. package/dist/document_loaders/web/apify_dataset.cjs +0 -130
  48. package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
  49. package/dist/document_loaders/web/apify_dataset.js +0 -126
  50. package/dist/document_loaders/web/assemblyai.cjs +0 -200
  51. package/dist/document_loaders/web/assemblyai.d.ts +0 -95
  52. package/dist/document_loaders/web/assemblyai.js +0 -193
  53. package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
  54. package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
  55. package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
  56. package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
  57. package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
  58. package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
  59. package/dist/document_loaders/web/browserbase.cjs +0 -93
  60. package/dist/document_loaders/web/browserbase.d.ts +0 -48
  61. package/dist/document_loaders/web/browserbase.js +0 -86
  62. package/dist/document_loaders/web/cheerio.cjs +0 -118
  63. package/dist/document_loaders/web/cheerio.d.ts +0 -77
  64. package/dist/document_loaders/web/cheerio.js +0 -114
  65. package/dist/document_loaders/web/college_confidential.cjs +0 -41
  66. package/dist/document_loaders/web/college_confidential.d.ts +0 -25
  67. package/dist/document_loaders/web/college_confidential.js +0 -37
  68. package/dist/document_loaders/web/confluence.cjs +0 -190
  69. package/dist/document_loaders/web/confluence.d.ts +0 -114
  70. package/dist/document_loaders/web/confluence.js +0 -186
  71. package/dist/document_loaders/web/couchbase.cjs +0 -95
  72. package/dist/document_loaders/web/couchbase.d.ts +0 -32
  73. package/dist/document_loaders/web/couchbase.js +0 -91
  74. package/dist/document_loaders/web/figma.cjs +0 -102
  75. package/dist/document_loaders/web/figma.d.ts +0 -82
  76. package/dist/document_loaders/web/figma.js +0 -98
  77. package/dist/document_loaders/web/firecrawl.cjs +0 -95
  78. package/dist/document_loaders/web/firecrawl.d.ts +0 -50
  79. package/dist/document_loaders/web/firecrawl.js +0 -88
  80. package/dist/document_loaders/web/gitbook.cjs +0 -110
  81. package/dist/document_loaders/web/gitbook.d.ts +0 -55
  82. package/dist/document_loaders/web/gitbook.js +0 -106
  83. package/dist/document_loaders/web/github.cjs +0 -615
  84. package/dist/document_loaders/web/github.d.ts +0 -203
  85. package/dist/document_loaders/web/github.js +0 -608
  86. package/dist/document_loaders/web/hn.cjs +0 -90
  87. package/dist/document_loaders/web/hn.d.ts +0 -42
  88. package/dist/document_loaders/web/hn.js +0 -86
  89. package/dist/document_loaders/web/imsdb.cjs +0 -44
  90. package/dist/document_loaders/web/imsdb.d.ts +0 -23
  91. package/dist/document_loaders/web/imsdb.js +0 -40
  92. package/dist/document_loaders/web/notionapi.cjs +0 -404
  93. package/dist/document_loaders/web/notionapi.d.ts +0 -133
  94. package/dist/document_loaders/web/notionapi.js +0 -392
  95. package/dist/document_loaders/web/notiondb.cjs +0 -199
  96. package/dist/document_loaders/web/notiondb.d.ts +0 -56
  97. package/dist/document_loaders/web/notiondb.js +0 -195
  98. package/dist/document_loaders/web/pdf.cjs +0 -140
  99. package/dist/document_loaders/web/pdf.d.ts +0 -35
  100. package/dist/document_loaders/web/pdf.js +0 -136
  101. package/dist/document_loaders/web/playwright.cjs +0 -89
  102. package/dist/document_loaders/web/playwright.d.ts +0 -58
  103. package/dist/document_loaders/web/playwright.js +0 -85
  104. package/dist/document_loaders/web/puppeteer.cjs +0 -139
  105. package/dist/document_loaders/web/puppeteer.d.ts +0 -82
  106. package/dist/document_loaders/web/puppeteer.js +0 -135
  107. package/dist/document_loaders/web/recursive_url.cjs +0 -198
  108. package/dist/document_loaders/web/recursive_url.d.ts +0 -33
  109. package/dist/document_loaders/web/recursive_url.js +0 -194
  110. package/dist/document_loaders/web/s3.cjs +0 -164
  111. package/dist/document_loaders/web/s3.d.ts +0 -78
  112. package/dist/document_loaders/web/s3.js +0 -137
  113. package/dist/document_loaders/web/searchapi.cjs +0 -150
  114. package/dist/document_loaders/web/searchapi.d.ts +0 -76
  115. package/dist/document_loaders/web/searchapi.js +0 -146
  116. package/dist/document_loaders/web/serpapi.cjs +0 -127
  117. package/dist/document_loaders/web/serpapi.d.ts +0 -62
  118. package/dist/document_loaders/web/serpapi.js +0 -123
  119. package/dist/document_loaders/web/sitemap.cjs +0 -118
  120. package/dist/document_loaders/web/sitemap.d.ts +0 -41
  121. package/dist/document_loaders/web/sitemap.js +0 -114
  122. package/dist/document_loaders/web/sonix_audio.cjs +0 -68
  123. package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
  124. package/dist/document_loaders/web/sonix_audio.js +0 -64
  125. package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
  126. package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
  127. package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
  128. package/dist/document_loaders/web/youtube.cjs +0 -116
  129. package/dist/document_loaders/web/youtube.d.ts +0 -55
  130. package/dist/document_loaders/web/youtube.js +0 -112
  131. package/dist/experimental/tools/pyinterpreter.cjs +0 -248
  132. package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
  133. package/dist/experimental/tools/pyinterpreter.js +0 -244
  134. package/dist/retrievers/self_query/chroma.cjs +0 -48
  135. package/dist/retrievers/self_query/chroma.d.ts +0 -26
  136. package/dist/retrievers/self_query/chroma.js +0 -44
  137. package/dist/retrievers/self_query/pinecone.cjs +0 -47
  138. package/dist/retrievers/self_query/pinecone.d.ts +0 -26
  139. package/dist/retrievers/self_query/pinecone.js +0 -43
  140. package/dist/retrievers/self_query/supabase.cjs +0 -278
  141. package/dist/retrievers/self_query/supabase.d.ts +0 -109
  142. package/dist/retrievers/self_query/supabase.js +0 -274
  143. package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
  144. package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
  145. package/dist/retrievers/self_query/supabase_utils.js +0 -259
  146. package/dist/retrievers/self_query/vectara.cjs +0 -143
  147. package/dist/retrievers/self_query/vectara.d.ts +0 -42
  148. package/dist/retrievers/self_query/vectara.js +0 -139
  149. package/dist/retrievers/self_query/weaviate.cjs +0 -201
  150. package/dist/retrievers/self_query/weaviate.d.ts +0 -99
  151. package/dist/retrievers/self_query/weaviate.js +0 -197
  152. package/dist/types/assemblyai-types.cjs +0 -2
  153. package/dist/types/assemblyai-types.d.ts +0 -4
  154. package/dist/types/assemblyai-types.js +0 -1
  155. package/document_loaders/fs/chatgpt.cjs +0 -1
  156. package/document_loaders/fs/chatgpt.d.cts +0 -1
  157. package/document_loaders/fs/chatgpt.d.ts +0 -1
  158. package/document_loaders/fs/chatgpt.js +0 -1
  159. package/document_loaders/fs/csv.cjs +0 -1
  160. package/document_loaders/fs/csv.d.cts +0 -1
  161. package/document_loaders/fs/csv.d.ts +0 -1
  162. package/document_loaders/fs/csv.js +0 -1
  163. package/document_loaders/fs/docx.cjs +0 -1
  164. package/document_loaders/fs/docx.d.cts +0 -1
  165. package/document_loaders/fs/docx.d.ts +0 -1
  166. package/document_loaders/fs/docx.js +0 -1
  167. package/document_loaders/fs/epub.cjs +0 -1
  168. package/document_loaders/fs/epub.d.cts +0 -1
  169. package/document_loaders/fs/epub.d.ts +0 -1
  170. package/document_loaders/fs/epub.js +0 -1
  171. package/document_loaders/fs/notion.cjs +0 -1
  172. package/document_loaders/fs/notion.d.cts +0 -1
  173. package/document_loaders/fs/notion.d.ts +0 -1
  174. package/document_loaders/fs/notion.js +0 -1
  175. package/document_loaders/fs/obsidian.cjs +0 -1
  176. package/document_loaders/fs/obsidian.d.cts +0 -1
  177. package/document_loaders/fs/obsidian.d.ts +0 -1
  178. package/document_loaders/fs/obsidian.js +0 -1
  179. package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
  180. package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
  181. package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
  182. package/document_loaders/fs/openai_whisper_audio.js +0 -1
  183. package/document_loaders/fs/pdf.cjs +0 -1
  184. package/document_loaders/fs/pdf.d.cts +0 -1
  185. package/document_loaders/fs/pdf.d.ts +0 -1
  186. package/document_loaders/fs/pdf.js +0 -1
  187. package/document_loaders/fs/pptx.cjs +0 -1
  188. package/document_loaders/fs/pptx.d.cts +0 -1
  189. package/document_loaders/fs/pptx.d.ts +0 -1
  190. package/document_loaders/fs/pptx.js +0 -1
  191. package/document_loaders/fs/srt.cjs +0 -1
  192. package/document_loaders/fs/srt.d.cts +0 -1
  193. package/document_loaders/fs/srt.d.ts +0 -1
  194. package/document_loaders/fs/srt.js +0 -1
  195. package/document_loaders/fs/unstructured.cjs +0 -1
  196. package/document_loaders/fs/unstructured.d.cts +0 -1
  197. package/document_loaders/fs/unstructured.d.ts +0 -1
  198. package/document_loaders/fs/unstructured.js +0 -1
  199. package/document_loaders/web/apify_dataset.cjs +0 -1
  200. package/document_loaders/web/apify_dataset.d.cts +0 -1
  201. package/document_loaders/web/apify_dataset.d.ts +0 -1
  202. package/document_loaders/web/apify_dataset.js +0 -1
  203. package/document_loaders/web/assemblyai.cjs +0 -1
  204. package/document_loaders/web/assemblyai.d.cts +0 -1
  205. package/document_loaders/web/assemblyai.d.ts +0 -1
  206. package/document_loaders/web/assemblyai.js +0 -1
  207. package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
  208. package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
  209. package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
  210. package/document_loaders/web/azure_blob_storage_container.js +0 -1
  211. package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
  212. package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
  213. package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
  214. package/document_loaders/web/azure_blob_storage_file.js +0 -1
  215. package/document_loaders/web/browserbase.cjs +0 -1
  216. package/document_loaders/web/browserbase.d.cts +0 -1
  217. package/document_loaders/web/browserbase.d.ts +0 -1
  218. package/document_loaders/web/browserbase.js +0 -1
  219. package/document_loaders/web/cheerio.cjs +0 -1
  220. package/document_loaders/web/cheerio.d.cts +0 -1
  221. package/document_loaders/web/cheerio.d.ts +0 -1
  222. package/document_loaders/web/cheerio.js +0 -1
  223. package/document_loaders/web/college_confidential.cjs +0 -1
  224. package/document_loaders/web/college_confidential.d.cts +0 -1
  225. package/document_loaders/web/college_confidential.d.ts +0 -1
  226. package/document_loaders/web/college_confidential.js +0 -1
  227. package/document_loaders/web/confluence.cjs +0 -1
  228. package/document_loaders/web/confluence.d.cts +0 -1
  229. package/document_loaders/web/confluence.d.ts +0 -1
  230. package/document_loaders/web/confluence.js +0 -1
  231. package/document_loaders/web/couchbase.cjs +0 -1
  232. package/document_loaders/web/couchbase.d.cts +0 -1
  233. package/document_loaders/web/couchbase.d.ts +0 -1
  234. package/document_loaders/web/couchbase.js +0 -1
  235. package/document_loaders/web/figma.cjs +0 -1
  236. package/document_loaders/web/figma.d.cts +0 -1
  237. package/document_loaders/web/figma.d.ts +0 -1
  238. package/document_loaders/web/figma.js +0 -1
  239. package/document_loaders/web/firecrawl.cjs +0 -1
  240. package/document_loaders/web/firecrawl.d.cts +0 -1
  241. package/document_loaders/web/firecrawl.d.ts +0 -1
  242. package/document_loaders/web/firecrawl.js +0 -1
  243. package/document_loaders/web/gitbook.cjs +0 -1
  244. package/document_loaders/web/gitbook.d.cts +0 -1
  245. package/document_loaders/web/gitbook.d.ts +0 -1
  246. package/document_loaders/web/gitbook.js +0 -1
  247. package/document_loaders/web/github.cjs +0 -1
  248. package/document_loaders/web/github.d.cts +0 -1
  249. package/document_loaders/web/github.d.ts +0 -1
  250. package/document_loaders/web/github.js +0 -1
  251. package/document_loaders/web/hn.cjs +0 -1
  252. package/document_loaders/web/hn.d.cts +0 -1
  253. package/document_loaders/web/hn.d.ts +0 -1
  254. package/document_loaders/web/hn.js +0 -1
  255. package/document_loaders/web/imsdb.cjs +0 -1
  256. package/document_loaders/web/imsdb.d.cts +0 -1
  257. package/document_loaders/web/imsdb.d.ts +0 -1
  258. package/document_loaders/web/imsdb.js +0 -1
  259. package/document_loaders/web/notionapi.cjs +0 -1
  260. package/document_loaders/web/notionapi.d.cts +0 -1
  261. package/document_loaders/web/notionapi.d.ts +0 -1
  262. package/document_loaders/web/notionapi.js +0 -1
  263. package/document_loaders/web/notiondb.cjs +0 -1
  264. package/document_loaders/web/notiondb.d.cts +0 -1
  265. package/document_loaders/web/notiondb.d.ts +0 -1
  266. package/document_loaders/web/notiondb.js +0 -1
  267. package/document_loaders/web/pdf.cjs +0 -1
  268. package/document_loaders/web/pdf.d.cts +0 -1
  269. package/document_loaders/web/pdf.d.ts +0 -1
  270. package/document_loaders/web/pdf.js +0 -1
  271. package/document_loaders/web/playwright.cjs +0 -1
  272. package/document_loaders/web/playwright.d.cts +0 -1
  273. package/document_loaders/web/playwright.d.ts +0 -1
  274. package/document_loaders/web/playwright.js +0 -1
  275. package/document_loaders/web/puppeteer.cjs +0 -1
  276. package/document_loaders/web/puppeteer.d.cts +0 -1
  277. package/document_loaders/web/puppeteer.d.ts +0 -1
  278. package/document_loaders/web/puppeteer.js +0 -1
  279. package/document_loaders/web/recursive_url.cjs +0 -1
  280. package/document_loaders/web/recursive_url.d.cts +0 -1
  281. package/document_loaders/web/recursive_url.d.ts +0 -1
  282. package/document_loaders/web/recursive_url.js +0 -1
  283. package/document_loaders/web/s3.cjs +0 -1
  284. package/document_loaders/web/s3.d.cts +0 -1
  285. package/document_loaders/web/s3.d.ts +0 -1
  286. package/document_loaders/web/s3.js +0 -1
  287. package/document_loaders/web/searchapi.cjs +0 -1
  288. package/document_loaders/web/searchapi.d.cts +0 -1
  289. package/document_loaders/web/searchapi.d.ts +0 -1
  290. package/document_loaders/web/searchapi.js +0 -1
  291. package/document_loaders/web/serpapi.cjs +0 -1
  292. package/document_loaders/web/serpapi.d.cts +0 -1
  293. package/document_loaders/web/serpapi.d.ts +0 -1
  294. package/document_loaders/web/serpapi.js +0 -1
  295. package/document_loaders/web/sitemap.cjs +0 -1
  296. package/document_loaders/web/sitemap.d.cts +0 -1
  297. package/document_loaders/web/sitemap.d.ts +0 -1
  298. package/document_loaders/web/sitemap.js +0 -1
  299. package/document_loaders/web/sonix_audio.cjs +0 -1
  300. package/document_loaders/web/sonix_audio.d.cts +0 -1
  301. package/document_loaders/web/sonix_audio.d.ts +0 -1
  302. package/document_loaders/web/sonix_audio.js +0 -1
  303. package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
  304. package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
  305. package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
  306. package/document_loaders/web/sort_xyz_blockchain.js +0 -1
  307. package/document_loaders/web/youtube.cjs +0 -1
  308. package/document_loaders/web/youtube.d.cts +0 -1
  309. package/document_loaders/web/youtube.d.ts +0 -1
  310. package/document_loaders/web/youtube.js +0 -1
  311. package/experimental/tools/pyinterpreter.cjs +0 -1
  312. package/experimental/tools/pyinterpreter.d.cts +0 -1
  313. package/experimental/tools/pyinterpreter.d.ts +0 -1
  314. package/experimental/tools/pyinterpreter.js +0 -1
  315. package/memory/index.cjs +0 -1
  316. package/memory/index.d.cts +0 -1
  317. package/memory/index.d.ts +0 -1
  318. package/memory/index.js +0 -1
  319. package/retrievers/self_query/chroma.cjs +0 -1
  320. package/retrievers/self_query/chroma.d.cts +0 -1
  321. package/retrievers/self_query/chroma.d.ts +0 -1
  322. package/retrievers/self_query/chroma.js +0 -1
  323. package/retrievers/self_query/pinecone.cjs +0 -1
  324. package/retrievers/self_query/pinecone.d.cts +0 -1
  325. package/retrievers/self_query/pinecone.d.ts +0 -1
  326. package/retrievers/self_query/pinecone.js +0 -1
  327. package/retrievers/self_query/supabase.cjs +0 -1
  328. package/retrievers/self_query/supabase.d.cts +0 -1
  329. package/retrievers/self_query/supabase.d.ts +0 -1
  330. package/retrievers/self_query/supabase.js +0 -1
  331. package/retrievers/self_query/vectara.cjs +0 -1
  332. package/retrievers/self_query/vectara.d.cts +0 -1
  333. package/retrievers/self_query/vectara.d.ts +0 -1
  334. package/retrievers/self_query/vectara.js +0 -1
  335. package/retrievers/self_query/weaviate.cjs +0 -1
  336. package/retrievers/self_query/weaviate.d.cts +0 -1
  337. package/retrievers/self_query/weaviate.d.ts +0 -1
  338. package/retrievers/self_query/weaviate.js +0 -1
@@ -1,608 +0,0 @@
1
- import ignore from "ignore";
2
- import binaryExtensions from "binary-extensions";
3
- import { Document } from "@langchain/core/documents";
4
- import { getEnvironmentVariable } from "@langchain/core/utils/env";
5
- import { AsyncCaller, } from "@langchain/core/utils/async_caller";
6
- import { BaseDocumentLoader } from "../base.js";
7
- import { UnknownHandling } from "../fs/directory.js";
8
- import { extname } from "../../util/extname.js";
9
- import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
10
- /* #__PURE__ */ logVersion020MigrationWarning({
11
- oldEntrypointName: "document_loaders/web/github",
12
- newPackageName: "@langchain/community",
13
- });
14
- const extensions = /* #__PURE__ */ new Set(binaryExtensions);
15
- /**
16
- * A function that checks if a file path is a binary file based on its
17
- * extension.
18
- * @param name The file path to check.
19
- * @returns A boolean indicating whether the file path is a binary file.
20
- */
21
- function isBinaryPath(name) {
22
- return extensions.has(extname(name).slice(1).toLowerCase());
23
- }
24
- /**
25
- * @deprecated - Import from "@langchain/community/document_loaders/web/github" instead. This entrypoint will be removed in 0.3.0.
26
- *
27
- * A class that extends the BaseDocumentLoader and implements the
28
- * GithubRepoLoaderParams interface. It represents a document loader for
29
- * loading files from a GitHub repository.
30
- */
31
- export class GithubRepoLoader extends BaseDocumentLoader {
32
- constructor(githubUrl, { accessToken = getEnvironmentVariable("GITHUB_ACCESS_TOKEN"), baseUrl = "https://github.com", apiUrl = "https://api.github.com", branch = "main", recursive = true, processSubmodules = false, unknown = UnknownHandling.Warn, ignoreFiles = [], ignorePaths, verbose = false, maxConcurrency = 2, maxRetries = 2, ...rest } = {}) {
33
- super();
34
- Object.defineProperty(this, "baseUrl", {
35
- enumerable: true,
36
- configurable: true,
37
- writable: true,
38
- value: void 0
39
- });
40
- Object.defineProperty(this, "apiUrl", {
41
- enumerable: true,
42
- configurable: true,
43
- writable: true,
44
- value: void 0
45
- });
46
- Object.defineProperty(this, "owner", {
47
- enumerable: true,
48
- configurable: true,
49
- writable: true,
50
- value: void 0
51
- });
52
- Object.defineProperty(this, "repo", {
53
- enumerable: true,
54
- configurable: true,
55
- writable: true,
56
- value: void 0
57
- });
58
- Object.defineProperty(this, "initialPath", {
59
- enumerable: true,
60
- configurable: true,
61
- writable: true,
62
- value: void 0
63
- });
64
- Object.defineProperty(this, "headers", {
65
- enumerable: true,
66
- configurable: true,
67
- writable: true,
68
- value: {}
69
- });
70
- Object.defineProperty(this, "branch", {
71
- enumerable: true,
72
- configurable: true,
73
- writable: true,
74
- value: void 0
75
- });
76
- Object.defineProperty(this, "recursive", {
77
- enumerable: true,
78
- configurable: true,
79
- writable: true,
80
- value: void 0
81
- });
82
- Object.defineProperty(this, "processSubmodules", {
83
- enumerable: true,
84
- configurable: true,
85
- writable: true,
86
- value: void 0
87
- });
88
- Object.defineProperty(this, "unknown", {
89
- enumerable: true,
90
- configurable: true,
91
- writable: true,
92
- value: void 0
93
- });
94
- Object.defineProperty(this, "accessToken", {
95
- enumerable: true,
96
- configurable: true,
97
- writable: true,
98
- value: void 0
99
- });
100
- Object.defineProperty(this, "ignoreFiles", {
101
- enumerable: true,
102
- configurable: true,
103
- writable: true,
104
- value: void 0
105
- });
106
- Object.defineProperty(this, "ignore", {
107
- enumerable: true,
108
- configurable: true,
109
- writable: true,
110
- value: void 0
111
- });
112
- Object.defineProperty(this, "verbose", {
113
- enumerable: true,
114
- configurable: true,
115
- writable: true,
116
- value: void 0
117
- });
118
- Object.defineProperty(this, "maxConcurrency", {
119
- enumerable: true,
120
- configurable: true,
121
- writable: true,
122
- value: void 0
123
- });
124
- Object.defineProperty(this, "maxRetries", {
125
- enumerable: true,
126
- configurable: true,
127
- writable: true,
128
- value: void 0
129
- });
130
- Object.defineProperty(this, "caller", {
131
- enumerable: true,
132
- configurable: true,
133
- writable: true,
134
- value: void 0
135
- });
136
- Object.defineProperty(this, "ignorePaths", {
137
- enumerable: true,
138
- configurable: true,
139
- writable: true,
140
- value: void 0
141
- });
142
- Object.defineProperty(this, "submoduleInfos", {
143
- enumerable: true,
144
- configurable: true,
145
- writable: true,
146
- value: void 0
147
- });
148
- this.baseUrl = baseUrl;
149
- this.apiUrl = apiUrl;
150
- const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
151
- this.owner = owner;
152
- this.repo = repo;
153
- this.initialPath = path;
154
- this.branch = branch;
155
- this.recursive = recursive;
156
- // processing submodules without processing contents of other directories makes no sense
157
- if (processSubmodules && !recursive) {
158
- throw new Error(`Input property "recursive" must be true if "processSubmodules" is true.`);
159
- }
160
- this.processSubmodules = processSubmodules;
161
- this.unknown = unknown;
162
- this.accessToken = accessToken;
163
- this.ignoreFiles = ignoreFiles;
164
- this.verbose = verbose;
165
- this.maxConcurrency = maxConcurrency;
166
- this.maxRetries = maxRetries;
167
- this.headers = {
168
- "User-Agent": "langchain",
169
- };
170
- this.caller = new AsyncCaller({
171
- maxConcurrency,
172
- maxRetries,
173
- ...rest,
174
- });
175
- this.ignorePaths = ignorePaths;
176
- if (ignorePaths) {
177
- this.ignore = ignore.default().add(ignorePaths);
178
- }
179
- if (this.accessToken) {
180
- this.headers = {
181
- ...this.headers,
182
- Authorization: `Bearer ${this.accessToken}`,
183
- };
184
- }
185
- }
186
- /**
187
- * Extracts the owner, repository, and path from a GitHub URL.
188
- * @param url The GitHub URL to extract information from.
189
- * @returns An object containing the owner, repository, and path extracted from the GitHub URL.
190
- */
191
- extractOwnerAndRepoAndPath(url) {
192
- const match = url.match(new RegExp(`${this.baseUrl}/([^/]+)/([^/]+)(/tree/[^/]+/(.+))?`, "i"));
193
- if (!match) {
194
- throw new Error("Invalid GitHub URL format.");
195
- }
196
- return { owner: match[1], repo: match[2], path: match[4] || "" };
197
- }
198
- /**
199
- * Fetches the files from the GitHub repository and creates Document
200
- * instances for each file. It also handles error handling based on the
201
- * unknown handling option.
202
- * @returns A promise that resolves to an array of Document instances.
203
- */
204
- async load() {
205
- this.log(`Loading documents from ${this.baseUrl}/${this.owner}/${this.repo}/${this.initialPath}...`);
206
- // process repository without submodules
207
- const documents = (await this.processRepo()).map((fileResponse) => new Document({
208
- pageContent: fileResponse.contents,
209
- metadata: fileResponse.metadata,
210
- }));
211
- if (this.processSubmodules) {
212
- // process submodules
213
- await this.getSubmoduleInfo();
214
- for (const submoduleInfo of this.submoduleInfos) {
215
- documents.push(...(await this.loadSubmodule(submoduleInfo)));
216
- }
217
- }
218
- return documents;
219
- }
220
- /**
221
- * Asynchronously streams documents from the entire GitHub repository.
222
- * It is suitable for situations where processing large repositories in a memory-efficient manner is required.
223
- * @yields Yields a Promise that resolves to a Document object for each file or submodule content found in the repository.
224
- */
225
- async *loadAsStream() {
226
- this.log(`Loading documents from ${this.baseUrl}/${this.owner}/${this.repo}/${this.initialPath}...`);
227
- yield* await this.processRepoAsStream(this.initialPath);
228
- if (!this.processSubmodules) {
229
- return;
230
- }
231
- await this.getSubmoduleInfo();
232
- for (const submoduleInfo of this.submoduleInfos) {
233
- yield* await this.loadSubmoduleAsStream(submoduleInfo);
234
- }
235
- }
236
- /**
237
- * Loads the information about Git submodules from the repository, if available.
238
- */
239
- async getSubmoduleInfo() {
240
- this.log("Loading info about submodules...");
241
- // we have to fetch the files of the root directory to get the download url of the .gitmodules file
242
- // however, we cannot reuse the files retrieved in processRepo() as initialPath may be != ""
243
- // so it may be that we end up fetching this file list twice
244
- const repoFiles = await this.fetchRepoFiles("");
245
- const gitmodulesFile = repoFiles.filter(({ name }) => name === ".gitmodules")?.[0];
246
- if (gitmodulesFile) {
247
- const gitmodulesContent = await this.fetchFileContent({
248
- download_url: gitmodulesFile.download_url,
249
- });
250
- this.submoduleInfos = await this.parseGitmodules(gitmodulesContent);
251
- }
252
- else {
253
- this.submoduleInfos = [];
254
- }
255
- this.log(`Found ${this.submoduleInfos.length} submodules:`);
256
- for (const submoduleInfo of this.submoduleInfos) {
257
- this.log(JSON.stringify(submoduleInfo));
258
- }
259
- }
260
- /**
261
- * Parses the given content of a .gitmodules file. Furthermore, queries the current SHA ref of all submodules.
262
- * Returns the submodule information as array.
263
- * @param gitmodulesContent the content of a .gitmodules file
264
- */
265
- async parseGitmodules(gitmodulesContent) {
266
- let validGitmodulesContent = gitmodulesContent;
267
- // in case the .gitmodules file does not end with a newline, we add one to make the regex work
268
- if (!validGitmodulesContent.endsWith("\n")) {
269
- validGitmodulesContent += "\n";
270
- }
271
- // catches the initial line of submodule entries
272
- const submodulePattern = /\[submodule "(.*?)"]\n((\s+.*?\s*=\s*.*?\n)*)/g;
273
- // catches the properties of a submodule
274
- const keyValuePattern = /\s+(.*?)\s*=\s*(.*?)\s/g;
275
- const submoduleInfos = [];
276
- for (const [, name, propertyLines] of validGitmodulesContent.matchAll(submodulePattern)) {
277
- if (!name || !propertyLines) {
278
- throw new Error("Could not parse submodule entry");
279
- }
280
- const submodulePropertyLines = propertyLines.matchAll(keyValuePattern);
281
- let path;
282
- let url;
283
- for (const [, key, value] of submodulePropertyLines) {
284
- if (!key || !value) {
285
- throw new Error(`Could not parse key/value pairs for submodule ${name}`);
286
- }
287
- switch (key) {
288
- case "path":
289
- path = value;
290
- break;
291
- case "url":
292
- url = value;
293
- if (url.endsWith(".git")) {
294
- url = url.substring(0, url.length - 4);
295
- }
296
- break;
297
- default:
298
- // ignoring unused keys
299
- }
300
- }
301
- if (!path || !url) {
302
- throw new Error(`Missing properties for submodule ${name}`);
303
- }
304
- // fetch the current ref of the submodule
305
- const files = await this.fetchRepoFiles(path);
306
- const submoduleInfo = {
307
- name,
308
- path,
309
- url,
310
- ref: files[0].sha,
311
- };
312
- submoduleInfos.push(submoduleInfo);
313
- }
314
- return submoduleInfos;
315
- }
316
- /**
317
- * Loads the documents of the given submodule. Uses the same parameters as for the current repository.
318
- * External submodules, i.e. submodules pointing to another GitHub instance, are ignored.
319
- * @param submoduleInfo the info about the submodule to be loaded
320
- */
321
- async loadSubmodule(submoduleInfo) {
322
- if (!submoduleInfo.url.startsWith(this.baseUrl)) {
323
- this.log(`Ignoring external submodule ${submoduleInfo.url}.`);
324
- return [];
325
- }
326
- else if (!submoduleInfo.path.startsWith(this.initialPath)) {
327
- this.log(`Ignoring submodule ${submoduleInfo.url}, as it is not on initial path.`);
328
- return [];
329
- }
330
- else {
331
- this.log(`Accessing submodule ${submoduleInfo.name} (${submoduleInfo.url})...`);
332
- return new GithubRepoLoader(submoduleInfo.url, {
333
- accessToken: this.accessToken,
334
- apiUrl: this.apiUrl,
335
- baseUrl: this.baseUrl,
336
- branch: submoduleInfo.ref,
337
- recursive: this.recursive,
338
- processSubmodules: this.processSubmodules,
339
- unknown: this.unknown,
340
- ignoreFiles: this.ignoreFiles,
341
- ignorePaths: this.ignorePaths,
342
- verbose: this.verbose,
343
- maxConcurrency: this.maxConcurrency,
344
- maxRetries: this.maxRetries,
345
- }).load();
346
- }
347
- }
348
- /**
349
- * Asynchronously processes and streams the contents of a specified submodule in the GitHub repository.
350
- * @param submoduleInfo the info about the submodule to be loaded
351
- * @yields Yields a Promise that resolves to a Document object for each file found in the submodule.
352
- */
353
- async *loadSubmoduleAsStream(submoduleInfo) {
354
- if (!submoduleInfo.url.startsWith(this.baseUrl)) {
355
- this.log(`Ignoring external submodule ${submoduleInfo.url}.`);
356
- yield* [];
357
- }
358
- if (!submoduleInfo.path.startsWith(this.initialPath)) {
359
- this.log(`Ignoring submodule ${submoduleInfo.url}, as it is not on initial path.`);
360
- yield* [];
361
- }
362
- this.log(`Accessing submodule ${submoduleInfo.name} (${submoduleInfo.url})...`);
363
- const submoduleLoader = new GithubRepoLoader(submoduleInfo.url, {
364
- accessToken: this.accessToken,
365
- baseUrl: this.baseUrl,
366
- apiUrl: this.apiUrl,
367
- branch: submoduleInfo.ref,
368
- recursive: this.recursive,
369
- processSubmodules: this.processSubmodules,
370
- unknown: this.unknown,
371
- ignoreFiles: this.ignoreFiles,
372
- ignorePaths: this.ignorePaths,
373
- verbose: this.verbose,
374
- maxConcurrency: this.maxConcurrency,
375
- maxRetries: this.maxRetries,
376
- });
377
- yield* await submoduleLoader.processRepoAsStream(submoduleInfo.path);
378
- }
379
- /**
380
- * Determines whether a file or directory should be ignored based on its
381
- * path and type.
382
- * @param path The path of the file or directory.
383
- * @param fileType The type of the file or directory.
384
- * @returns A boolean indicating whether the file or directory should be ignored.
385
- */
386
- shouldIgnore(path, fileType) {
387
- if (fileType !== "dir" && isBinaryPath(path)) {
388
- return true;
389
- }
390
- if (this.ignore !== undefined) {
391
- return this.ignore.ignores(path);
392
- }
393
- return (fileType !== "dir" &&
394
- this.ignoreFiles.some((pattern) => {
395
- if (typeof pattern === "string") {
396
- return path === pattern;
397
- }
398
- try {
399
- return pattern.test(path);
400
- }
401
- catch {
402
- throw new Error(`Unknown ignore file pattern: ${pattern}`);
403
- }
404
- }));
405
- }
406
- /**
407
- * Takes the file info and wrap it in a promise that will resolve to the file content and metadata
408
- * @param file
409
- * @returns
410
- */
411
- async fetchFileContentWrapper(file) {
412
- const fileContent = await this.fetchFileContent(file).catch((error) => {
413
- this.handleError(`Failed wrap file content: ${file}, ${error}`);
414
- });
415
- return {
416
- contents: fileContent || "",
417
- metadata: {
418
- source: file.path,
419
- repository: `${this.baseUrl}/${this.owner}/${this.repo}`,
420
- branch: this.branch,
421
- },
422
- };
423
- }
424
- /**
425
- * Maps a list of files / directories to a list of promises that will fetch the file / directory contents
426
- */
427
- async getCurrentDirectoryFilePromises(files) {
428
- const currentDirectoryFilePromises = [];
429
- // Directories have nested files / directories, which is why this is a list of promises of promises
430
- const currentDirectoryDirectoryPromises = [];
431
- for (const file of files) {
432
- if (this.shouldIgnore(file.path, file.type)) {
433
- continue;
434
- }
435
- if (file.type === "file" && file.size === 0) {
436
- // this is a submodule. ignoring for the moment. submodule processing is done separately
437
- continue;
438
- }
439
- if (file.type !== "dir") {
440
- try {
441
- currentDirectoryFilePromises.push(this.fetchFileContentWrapper(file));
442
- }
443
- catch (e) {
444
- this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
445
- }
446
- }
447
- else if (this.recursive) {
448
- currentDirectoryDirectoryPromises.push(this.processDirectory(file.path));
449
- }
450
- }
451
- const curDirDirectories = await Promise.all(currentDirectoryDirectoryPromises);
452
- return [...currentDirectoryFilePromises, ...curDirDirectories.flat()];
453
- }
454
- /**
455
- * Begins the process of fetching the contents of the repository
456
- */
457
- async processRepo() {
458
- try {
459
- // Get the list of file / directory names in the root directory
460
- const files = await this.fetchRepoFiles(this.initialPath);
461
- // Map the file / directory paths to promises that will fetch the file / directory contents
462
- const currentDirectoryFilePromises = await this.getCurrentDirectoryFilePromises(files);
463
- return Promise.all(currentDirectoryFilePromises);
464
- }
465
- catch (error) {
466
- this.handleError(`Failed to process directory: ${this.initialPath}, ${error}`);
467
- return Promise.reject(error);
468
- }
469
- }
470
- /**
471
- * Asynchronously processes the contents of the entire GitHub repository,
472
- * streaming each file as a Document object.
473
- * @param path The path of the directory to process.
474
- * @yields Yields a Promise that resolves to a Document object for each file found in the repository.
475
- */
476
- async *processRepoAsStream(path) {
477
- const files = await this.fetchRepoFiles(path);
478
- for (const file of files) {
479
- if (this.shouldIgnore(file.path, file.type)) {
480
- continue;
481
- }
482
- if (file.type === "file") {
483
- try {
484
- const fileResponse = await this.fetchFileContentWrapper(file);
485
- yield new Document({
486
- pageContent: fileResponse.contents,
487
- metadata: fileResponse.metadata,
488
- });
489
- }
490
- catch (error) {
491
- this.handleError(`Failed to fetch file content: ${file.path}, ${error}`);
492
- }
493
- }
494
- else if (this.recursive) {
495
- yield* await this.processDirectoryAsStream(file.path);
496
- }
497
- }
498
- }
499
- /**
500
- * Fetches the contents of a directory and maps the file / directory paths
501
- * to promises that will fetch the file / directory contents.
502
- * @param path The path of the directory to process.
503
- * @returns A promise that resolves to an array of promises that will fetch the file / directory contents.
504
- */
505
- async processDirectory(path) {
506
- try {
507
- const files = await this.fetchRepoFiles(path);
508
- return this.getCurrentDirectoryFilePromises(files);
509
- }
510
- catch (error) {
511
- this.handleError(`Failed to process directory: ${path}, ${error}`);
512
- return Promise.reject(error);
513
- }
514
- }
515
- /**
516
- * Asynchronously processes the contents of a given directory in the GitHub repository,
517
- * streaming each file as a Document object.
518
- * @param path The path of the directory to process.
519
- * @yields Yields a Promise that resolves to a Document object for each file in the directory.
520
- */
521
- async *processDirectoryAsStream(path) {
522
- const files = await this.fetchRepoFiles(path);
523
- for (const file of files) {
524
- if (this.shouldIgnore(file.path, file.type)) {
525
- continue;
526
- }
527
- if (file.type === "file") {
528
- try {
529
- const fileResponse = await this.fetchFileContentWrapper(file);
530
- yield new Document({
531
- pageContent: fileResponse.contents,
532
- metadata: fileResponse.metadata,
533
- });
534
- }
535
- catch {
536
- this.handleError(`Failed to fetch file content: ${file.path}`);
537
- }
538
- }
539
- else if (this.recursive) {
540
- yield* await this.processDirectoryAsStream(file.path);
541
- }
542
- }
543
- }
544
- /**
545
- * Fetches the files from a GitHub repository.
546
- * If the path denotes a single file, the resulting array contains only one element.
547
- * @param path The path of the repository to fetch the files from.
548
- * @returns A promise that resolves to an array of GithubFile instances.
549
- */
550
- async fetchRepoFiles(path) {
551
- const url = `${this.apiUrl}/repos/${this.owner}/${this.repo}/contents/${path}?ref=${this.branch}`;
552
- return this.caller.call(async () => {
553
- this.log(`Fetching ${url}`);
554
- const response = await fetch(url, { headers: this.headers });
555
- const data = await response.json();
556
- if (!response.ok) {
557
- throw new Error(`Unable to fetch repository files: ${response.status} ${JSON.stringify(data)}`);
558
- }
559
- if (Array.isArray(data)) {
560
- return data;
561
- }
562
- else {
563
- return [data];
564
- }
565
- });
566
- }
567
- /**
568
- * Fetches the content of a file from a GitHub repository.
569
- * @param file The file to fetch the content from.
570
- * @returns A promise that resolves to the content of the file.
571
- */
572
- async fetchFileContent(file) {
573
- return this.caller.call(async () => {
574
- this.log(`Fetching ${file.download_url}`);
575
- const response = await fetch(file.download_url, {
576
- headers: this.headers,
577
- });
578
- return response.text();
579
- });
580
- }
581
- /**
582
- * Handles errors based on the unknown handling option.
583
- * @param message The error message.
584
- * @returns void
585
- */
586
- handleError(message) {
587
- switch (this.unknown) {
588
- case UnknownHandling.Ignore:
589
- break;
590
- case UnknownHandling.Warn:
591
- console.warn(message);
592
- break;
593
- case UnknownHandling.Error:
594
- throw new Error(message);
595
- default:
596
- throw new Error(`Unknown unknown handling: ${this.unknown}`);
597
- }
598
- }
599
- /**
600
- * Logs the given message to the console, if parameter 'verbose' is set to true.
601
- * @param message the message to be logged.
602
- */
603
- log(message) {
604
- if (this.verbose) {
605
- console.log(message);
606
- }
607
- }
608
- }
@@ -1,90 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.HNLoader = void 0;
4
- const documents_1 = require("@langchain/core/documents");
5
- const cheerio_js_1 = require("./cheerio.cjs");
6
- const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
7
- /* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
8
- oldEntrypointName: "document_loaders/web/hn",
9
- newPackageName: "@langchain/community",
10
- });
11
- /**
12
- * @deprecated - Import from "@langchain/community/document_loaders/web/hn" instead. This entrypoint will be removed in 0.3.0.
13
- *
14
- * A class that extends the CheerioWebBaseLoader class. It represents a
15
- * loader for loading web pages from the Hacker News website.
16
- */
17
- class HNLoader extends cheerio_js_1.CheerioWebBaseLoader {
18
- constructor(webPath) {
19
- super(webPath);
20
- Object.defineProperty(this, "webPath", {
21
- enumerable: true,
22
- configurable: true,
23
- writable: true,
24
- value: webPath
25
- });
26
- }
27
- /**
28
- * An asynchronous method that loads the web page. If the webPath includes
29
- * "item", it calls the loadComments() method to load the comments from
30
- * the web page. Otherwise, it calls the loadResults() method to load the
31
- * results from the web page.
32
- * @returns A Promise that resolves to an array of Document instances.
33
- */
34
- async load() {
35
- const $ = await this.scrape();
36
- if (this.webPath.includes("item")) {
37
- return this.loadComments($);
38
- }
39
- return this.loadResults($);
40
- }
41
- /**
42
- * A private method that loads the comments from the web page. It selects
43
- * the elements with the class "athing comtr" using the $ function
44
- * provided by Cheerio. It also extracts the title of the web page from
45
- * the element with the id "pagespace". It creates Document instances for
46
- * each comment, with the comment text as the page content and the source
47
- * and title as metadata.
48
- * @param $ A CheerioAPI instance.
49
- * @returns An array of Document instances.
50
- */
51
- loadComments($) {
52
- const comments = $("tr[class='athing comtr']");
53
- const title = $("tr[id='pagespace']").attr("title");
54
- const documents = [];
55
- comments.each((_index, comment) => {
56
- const text = $(comment).text().trim();
57
- const metadata = { source: this.webPath, title };
58
- documents.push(new documents_1.Document({ pageContent: text, metadata }));
59
- });
60
- return documents;
61
- }
62
- /**
63
- * A private method that loads the results from the web page. It selects
64
- * the elements with the class "athing" using the $ function provided by
65
- * Cheerio. It extracts the ranking, link, title, and other metadata from
66
- * each result item. It creates Document instances for each result item,
67
- * with the title as the page content and the source, title, link, and
68
- * ranking as metadata.
69
- * @param $ A CheerioAPI instance.
70
- * @returns An array of Document instances.
71
- */
72
- loadResults($) {
73
- const items = $("tr[class='athing']");
74
- const documents = [];
75
- items.each((_index, item) => {
76
- const ranking = $(item).find("span[class='rank']").text();
77
- const link = $(item).find("span[class='titleline'] a").attr("href");
78
- const title = $(item).find("span[class='titleline']").text().trim();
79
- const metadata = {
80
- source: this.webPath,
81
- title,
82
- link,
83
- ranking,
84
- };
85
- documents.push(new documents_1.Document({ pageContent: title, metadata }));
86
- });
87
- return documents;
88
- }
89
- }
90
- exports.HNLoader = HNLoader;