langchain 0.2.18 → 0.3.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (338) hide show
  1. package/dist/agents/openai_functions/index.cjs +2 -2
  2. package/dist/agents/openai_functions/index.js +2 -2
  3. package/dist/chains/combine_documents/stuff.cjs +2 -2
  4. package/dist/chains/combine_documents/stuff.js +2 -2
  5. package/dist/chains/openai_functions/openapi.cjs +3 -1
  6. package/dist/chains/openai_functions/openapi.js +3 -1
  7. package/dist/load/import_constants.cjs +2 -39
  8. package/dist/load/import_constants.js +2 -39
  9. package/dist/load/import_map.cjs +2 -3
  10. package/dist/load/import_map.d.ts +0 -1
  11. package/dist/load/import_map.js +0 -1
  12. package/dist/smith/config.d.ts +1 -5
  13. package/package.json +31 -854
  14. package/dist/document_loaders/fs/chatgpt.cjs +0 -90
  15. package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
  16. package/dist/document_loaders/fs/chatgpt.js +0 -86
  17. package/dist/document_loaders/fs/csv.cjs +0 -73
  18. package/dist/document_loaders/fs/csv.d.ts +0 -65
  19. package/dist/document_loaders/fs/csv.js +0 -69
  20. package/dist/document_loaders/fs/docx.cjs +0 -58
  21. package/dist/document_loaders/fs/docx.d.ts +0 -25
  22. package/dist/document_loaders/fs/docx.js +0 -54
  23. package/dist/document_loaders/fs/epub.cjs +0 -103
  24. package/dist/document_loaders/fs/epub.d.ts +0 -33
  25. package/dist/document_loaders/fs/epub.js +0 -99
  26. package/dist/document_loaders/fs/notion.cjs +0 -26
  27. package/dist/document_loaders/fs/notion.d.ts +0 -12
  28. package/dist/document_loaders/fs/notion.js +0 -22
  29. package/dist/document_loaders/fs/obsidian.cjs +0 -247
  30. package/dist/document_loaders/fs/obsidian.d.ts +0 -28
  31. package/dist/document_loaders/fs/obsidian.js +0 -240
  32. package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
  33. package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
  34. package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
  35. package/dist/document_loaders/fs/pdf.cjs +0 -148
  36. package/dist/document_loaders/fs/pdf.d.ts +0 -49
  37. package/dist/document_loaders/fs/pdf.js +0 -144
  38. package/dist/document_loaders/fs/pptx.cjs +0 -46
  39. package/dist/document_loaders/fs/pptx.d.ts +0 -25
  40. package/dist/document_loaders/fs/pptx.js +0 -42
  41. package/dist/document_loaders/fs/srt.cjs +0 -57
  42. package/dist/document_loaders/fs/srt.d.ts +0 -32
  43. package/dist/document_loaders/fs/srt.js +0 -50
  44. package/dist/document_loaders/fs/unstructured.cjs +0 -338
  45. package/dist/document_loaders/fs/unstructured.d.ts +0 -125
  46. package/dist/document_loaders/fs/unstructured.js +0 -333
  47. package/dist/document_loaders/web/apify_dataset.cjs +0 -130
  48. package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
  49. package/dist/document_loaders/web/apify_dataset.js +0 -126
  50. package/dist/document_loaders/web/assemblyai.cjs +0 -200
  51. package/dist/document_loaders/web/assemblyai.d.ts +0 -95
  52. package/dist/document_loaders/web/assemblyai.js +0 -193
  53. package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
  54. package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
  55. package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
  56. package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
  57. package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
  58. package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
  59. package/dist/document_loaders/web/browserbase.cjs +0 -93
  60. package/dist/document_loaders/web/browserbase.d.ts +0 -48
  61. package/dist/document_loaders/web/browserbase.js +0 -86
  62. package/dist/document_loaders/web/cheerio.cjs +0 -118
  63. package/dist/document_loaders/web/cheerio.d.ts +0 -77
  64. package/dist/document_loaders/web/cheerio.js +0 -114
  65. package/dist/document_loaders/web/college_confidential.cjs +0 -41
  66. package/dist/document_loaders/web/college_confidential.d.ts +0 -25
  67. package/dist/document_loaders/web/college_confidential.js +0 -37
  68. package/dist/document_loaders/web/confluence.cjs +0 -190
  69. package/dist/document_loaders/web/confluence.d.ts +0 -114
  70. package/dist/document_loaders/web/confluence.js +0 -186
  71. package/dist/document_loaders/web/couchbase.cjs +0 -95
  72. package/dist/document_loaders/web/couchbase.d.ts +0 -32
  73. package/dist/document_loaders/web/couchbase.js +0 -91
  74. package/dist/document_loaders/web/figma.cjs +0 -102
  75. package/dist/document_loaders/web/figma.d.ts +0 -82
  76. package/dist/document_loaders/web/figma.js +0 -98
  77. package/dist/document_loaders/web/firecrawl.cjs +0 -95
  78. package/dist/document_loaders/web/firecrawl.d.ts +0 -50
  79. package/dist/document_loaders/web/firecrawl.js +0 -88
  80. package/dist/document_loaders/web/gitbook.cjs +0 -110
  81. package/dist/document_loaders/web/gitbook.d.ts +0 -55
  82. package/dist/document_loaders/web/gitbook.js +0 -106
  83. package/dist/document_loaders/web/github.cjs +0 -615
  84. package/dist/document_loaders/web/github.d.ts +0 -203
  85. package/dist/document_loaders/web/github.js +0 -608
  86. package/dist/document_loaders/web/hn.cjs +0 -90
  87. package/dist/document_loaders/web/hn.d.ts +0 -42
  88. package/dist/document_loaders/web/hn.js +0 -86
  89. package/dist/document_loaders/web/imsdb.cjs +0 -44
  90. package/dist/document_loaders/web/imsdb.d.ts +0 -23
  91. package/dist/document_loaders/web/imsdb.js +0 -40
  92. package/dist/document_loaders/web/notionapi.cjs +0 -404
  93. package/dist/document_loaders/web/notionapi.d.ts +0 -133
  94. package/dist/document_loaders/web/notionapi.js +0 -392
  95. package/dist/document_loaders/web/notiondb.cjs +0 -199
  96. package/dist/document_loaders/web/notiondb.d.ts +0 -56
  97. package/dist/document_loaders/web/notiondb.js +0 -195
  98. package/dist/document_loaders/web/pdf.cjs +0 -140
  99. package/dist/document_loaders/web/pdf.d.ts +0 -35
  100. package/dist/document_loaders/web/pdf.js +0 -136
  101. package/dist/document_loaders/web/playwright.cjs +0 -89
  102. package/dist/document_loaders/web/playwright.d.ts +0 -58
  103. package/dist/document_loaders/web/playwright.js +0 -85
  104. package/dist/document_loaders/web/puppeteer.cjs +0 -139
  105. package/dist/document_loaders/web/puppeteer.d.ts +0 -82
  106. package/dist/document_loaders/web/puppeteer.js +0 -135
  107. package/dist/document_loaders/web/recursive_url.cjs +0 -198
  108. package/dist/document_loaders/web/recursive_url.d.ts +0 -33
  109. package/dist/document_loaders/web/recursive_url.js +0 -194
  110. package/dist/document_loaders/web/s3.cjs +0 -164
  111. package/dist/document_loaders/web/s3.d.ts +0 -78
  112. package/dist/document_loaders/web/s3.js +0 -137
  113. package/dist/document_loaders/web/searchapi.cjs +0 -150
  114. package/dist/document_loaders/web/searchapi.d.ts +0 -76
  115. package/dist/document_loaders/web/searchapi.js +0 -146
  116. package/dist/document_loaders/web/serpapi.cjs +0 -127
  117. package/dist/document_loaders/web/serpapi.d.ts +0 -62
  118. package/dist/document_loaders/web/serpapi.js +0 -123
  119. package/dist/document_loaders/web/sitemap.cjs +0 -118
  120. package/dist/document_loaders/web/sitemap.d.ts +0 -41
  121. package/dist/document_loaders/web/sitemap.js +0 -114
  122. package/dist/document_loaders/web/sonix_audio.cjs +0 -68
  123. package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
  124. package/dist/document_loaders/web/sonix_audio.js +0 -64
  125. package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
  126. package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
  127. package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
  128. package/dist/document_loaders/web/youtube.cjs +0 -116
  129. package/dist/document_loaders/web/youtube.d.ts +0 -55
  130. package/dist/document_loaders/web/youtube.js +0 -112
  131. package/dist/experimental/tools/pyinterpreter.cjs +0 -248
  132. package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
  133. package/dist/experimental/tools/pyinterpreter.js +0 -244
  134. package/dist/retrievers/self_query/chroma.cjs +0 -48
  135. package/dist/retrievers/self_query/chroma.d.ts +0 -26
  136. package/dist/retrievers/self_query/chroma.js +0 -44
  137. package/dist/retrievers/self_query/pinecone.cjs +0 -47
  138. package/dist/retrievers/self_query/pinecone.d.ts +0 -26
  139. package/dist/retrievers/self_query/pinecone.js +0 -43
  140. package/dist/retrievers/self_query/supabase.cjs +0 -278
  141. package/dist/retrievers/self_query/supabase.d.ts +0 -109
  142. package/dist/retrievers/self_query/supabase.js +0 -274
  143. package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
  144. package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
  145. package/dist/retrievers/self_query/supabase_utils.js +0 -259
  146. package/dist/retrievers/self_query/vectara.cjs +0 -143
  147. package/dist/retrievers/self_query/vectara.d.ts +0 -42
  148. package/dist/retrievers/self_query/vectara.js +0 -139
  149. package/dist/retrievers/self_query/weaviate.cjs +0 -201
  150. package/dist/retrievers/self_query/weaviate.d.ts +0 -99
  151. package/dist/retrievers/self_query/weaviate.js +0 -197
  152. package/dist/types/assemblyai-types.cjs +0 -2
  153. package/dist/types/assemblyai-types.d.ts +0 -4
  154. package/dist/types/assemblyai-types.js +0 -1
  155. package/document_loaders/fs/chatgpt.cjs +0 -1
  156. package/document_loaders/fs/chatgpt.d.cts +0 -1
  157. package/document_loaders/fs/chatgpt.d.ts +0 -1
  158. package/document_loaders/fs/chatgpt.js +0 -1
  159. package/document_loaders/fs/csv.cjs +0 -1
  160. package/document_loaders/fs/csv.d.cts +0 -1
  161. package/document_loaders/fs/csv.d.ts +0 -1
  162. package/document_loaders/fs/csv.js +0 -1
  163. package/document_loaders/fs/docx.cjs +0 -1
  164. package/document_loaders/fs/docx.d.cts +0 -1
  165. package/document_loaders/fs/docx.d.ts +0 -1
  166. package/document_loaders/fs/docx.js +0 -1
  167. package/document_loaders/fs/epub.cjs +0 -1
  168. package/document_loaders/fs/epub.d.cts +0 -1
  169. package/document_loaders/fs/epub.d.ts +0 -1
  170. package/document_loaders/fs/epub.js +0 -1
  171. package/document_loaders/fs/notion.cjs +0 -1
  172. package/document_loaders/fs/notion.d.cts +0 -1
  173. package/document_loaders/fs/notion.d.ts +0 -1
  174. package/document_loaders/fs/notion.js +0 -1
  175. package/document_loaders/fs/obsidian.cjs +0 -1
  176. package/document_loaders/fs/obsidian.d.cts +0 -1
  177. package/document_loaders/fs/obsidian.d.ts +0 -1
  178. package/document_loaders/fs/obsidian.js +0 -1
  179. package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
  180. package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
  181. package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
  182. package/document_loaders/fs/openai_whisper_audio.js +0 -1
  183. package/document_loaders/fs/pdf.cjs +0 -1
  184. package/document_loaders/fs/pdf.d.cts +0 -1
  185. package/document_loaders/fs/pdf.d.ts +0 -1
  186. package/document_loaders/fs/pdf.js +0 -1
  187. package/document_loaders/fs/pptx.cjs +0 -1
  188. package/document_loaders/fs/pptx.d.cts +0 -1
  189. package/document_loaders/fs/pptx.d.ts +0 -1
  190. package/document_loaders/fs/pptx.js +0 -1
  191. package/document_loaders/fs/srt.cjs +0 -1
  192. package/document_loaders/fs/srt.d.cts +0 -1
  193. package/document_loaders/fs/srt.d.ts +0 -1
  194. package/document_loaders/fs/srt.js +0 -1
  195. package/document_loaders/fs/unstructured.cjs +0 -1
  196. package/document_loaders/fs/unstructured.d.cts +0 -1
  197. package/document_loaders/fs/unstructured.d.ts +0 -1
  198. package/document_loaders/fs/unstructured.js +0 -1
  199. package/document_loaders/web/apify_dataset.cjs +0 -1
  200. package/document_loaders/web/apify_dataset.d.cts +0 -1
  201. package/document_loaders/web/apify_dataset.d.ts +0 -1
  202. package/document_loaders/web/apify_dataset.js +0 -1
  203. package/document_loaders/web/assemblyai.cjs +0 -1
  204. package/document_loaders/web/assemblyai.d.cts +0 -1
  205. package/document_loaders/web/assemblyai.d.ts +0 -1
  206. package/document_loaders/web/assemblyai.js +0 -1
  207. package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
  208. package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
  209. package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
  210. package/document_loaders/web/azure_blob_storage_container.js +0 -1
  211. package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
  212. package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
  213. package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
  214. package/document_loaders/web/azure_blob_storage_file.js +0 -1
  215. package/document_loaders/web/browserbase.cjs +0 -1
  216. package/document_loaders/web/browserbase.d.cts +0 -1
  217. package/document_loaders/web/browserbase.d.ts +0 -1
  218. package/document_loaders/web/browserbase.js +0 -1
  219. package/document_loaders/web/cheerio.cjs +0 -1
  220. package/document_loaders/web/cheerio.d.cts +0 -1
  221. package/document_loaders/web/cheerio.d.ts +0 -1
  222. package/document_loaders/web/cheerio.js +0 -1
  223. package/document_loaders/web/college_confidential.cjs +0 -1
  224. package/document_loaders/web/college_confidential.d.cts +0 -1
  225. package/document_loaders/web/college_confidential.d.ts +0 -1
  226. package/document_loaders/web/college_confidential.js +0 -1
  227. package/document_loaders/web/confluence.cjs +0 -1
  228. package/document_loaders/web/confluence.d.cts +0 -1
  229. package/document_loaders/web/confluence.d.ts +0 -1
  230. package/document_loaders/web/confluence.js +0 -1
  231. package/document_loaders/web/couchbase.cjs +0 -1
  232. package/document_loaders/web/couchbase.d.cts +0 -1
  233. package/document_loaders/web/couchbase.d.ts +0 -1
  234. package/document_loaders/web/couchbase.js +0 -1
  235. package/document_loaders/web/figma.cjs +0 -1
  236. package/document_loaders/web/figma.d.cts +0 -1
  237. package/document_loaders/web/figma.d.ts +0 -1
  238. package/document_loaders/web/figma.js +0 -1
  239. package/document_loaders/web/firecrawl.cjs +0 -1
  240. package/document_loaders/web/firecrawl.d.cts +0 -1
  241. package/document_loaders/web/firecrawl.d.ts +0 -1
  242. package/document_loaders/web/firecrawl.js +0 -1
  243. package/document_loaders/web/gitbook.cjs +0 -1
  244. package/document_loaders/web/gitbook.d.cts +0 -1
  245. package/document_loaders/web/gitbook.d.ts +0 -1
  246. package/document_loaders/web/gitbook.js +0 -1
  247. package/document_loaders/web/github.cjs +0 -1
  248. package/document_loaders/web/github.d.cts +0 -1
  249. package/document_loaders/web/github.d.ts +0 -1
  250. package/document_loaders/web/github.js +0 -1
  251. package/document_loaders/web/hn.cjs +0 -1
  252. package/document_loaders/web/hn.d.cts +0 -1
  253. package/document_loaders/web/hn.d.ts +0 -1
  254. package/document_loaders/web/hn.js +0 -1
  255. package/document_loaders/web/imsdb.cjs +0 -1
  256. package/document_loaders/web/imsdb.d.cts +0 -1
  257. package/document_loaders/web/imsdb.d.ts +0 -1
  258. package/document_loaders/web/imsdb.js +0 -1
  259. package/document_loaders/web/notionapi.cjs +0 -1
  260. package/document_loaders/web/notionapi.d.cts +0 -1
  261. package/document_loaders/web/notionapi.d.ts +0 -1
  262. package/document_loaders/web/notionapi.js +0 -1
  263. package/document_loaders/web/notiondb.cjs +0 -1
  264. package/document_loaders/web/notiondb.d.cts +0 -1
  265. package/document_loaders/web/notiondb.d.ts +0 -1
  266. package/document_loaders/web/notiondb.js +0 -1
  267. package/document_loaders/web/pdf.cjs +0 -1
  268. package/document_loaders/web/pdf.d.cts +0 -1
  269. package/document_loaders/web/pdf.d.ts +0 -1
  270. package/document_loaders/web/pdf.js +0 -1
  271. package/document_loaders/web/playwright.cjs +0 -1
  272. package/document_loaders/web/playwright.d.cts +0 -1
  273. package/document_loaders/web/playwright.d.ts +0 -1
  274. package/document_loaders/web/playwright.js +0 -1
  275. package/document_loaders/web/puppeteer.cjs +0 -1
  276. package/document_loaders/web/puppeteer.d.cts +0 -1
  277. package/document_loaders/web/puppeteer.d.ts +0 -1
  278. package/document_loaders/web/puppeteer.js +0 -1
  279. package/document_loaders/web/recursive_url.cjs +0 -1
  280. package/document_loaders/web/recursive_url.d.cts +0 -1
  281. package/document_loaders/web/recursive_url.d.ts +0 -1
  282. package/document_loaders/web/recursive_url.js +0 -1
  283. package/document_loaders/web/s3.cjs +0 -1
  284. package/document_loaders/web/s3.d.cts +0 -1
  285. package/document_loaders/web/s3.d.ts +0 -1
  286. package/document_loaders/web/s3.js +0 -1
  287. package/document_loaders/web/searchapi.cjs +0 -1
  288. package/document_loaders/web/searchapi.d.cts +0 -1
  289. package/document_loaders/web/searchapi.d.ts +0 -1
  290. package/document_loaders/web/searchapi.js +0 -1
  291. package/document_loaders/web/serpapi.cjs +0 -1
  292. package/document_loaders/web/serpapi.d.cts +0 -1
  293. package/document_loaders/web/serpapi.d.ts +0 -1
  294. package/document_loaders/web/serpapi.js +0 -1
  295. package/document_loaders/web/sitemap.cjs +0 -1
  296. package/document_loaders/web/sitemap.d.cts +0 -1
  297. package/document_loaders/web/sitemap.d.ts +0 -1
  298. package/document_loaders/web/sitemap.js +0 -1
  299. package/document_loaders/web/sonix_audio.cjs +0 -1
  300. package/document_loaders/web/sonix_audio.d.cts +0 -1
  301. package/document_loaders/web/sonix_audio.d.ts +0 -1
  302. package/document_loaders/web/sonix_audio.js +0 -1
  303. package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
  304. package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
  305. package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
  306. package/document_loaders/web/sort_xyz_blockchain.js +0 -1
  307. package/document_loaders/web/youtube.cjs +0 -1
  308. package/document_loaders/web/youtube.d.cts +0 -1
  309. package/document_loaders/web/youtube.d.ts +0 -1
  310. package/document_loaders/web/youtube.js +0 -1
  311. package/experimental/tools/pyinterpreter.cjs +0 -1
  312. package/experimental/tools/pyinterpreter.d.cts +0 -1
  313. package/experimental/tools/pyinterpreter.d.ts +0 -1
  314. package/experimental/tools/pyinterpreter.js +0 -1
  315. package/memory/index.cjs +0 -1
  316. package/memory/index.d.cts +0 -1
  317. package/memory/index.d.ts +0 -1
  318. package/memory/index.js +0 -1
  319. package/retrievers/self_query/chroma.cjs +0 -1
  320. package/retrievers/self_query/chroma.d.cts +0 -1
  321. package/retrievers/self_query/chroma.d.ts +0 -1
  322. package/retrievers/self_query/chroma.js +0 -1
  323. package/retrievers/self_query/pinecone.cjs +0 -1
  324. package/retrievers/self_query/pinecone.d.cts +0 -1
  325. package/retrievers/self_query/pinecone.d.ts +0 -1
  326. package/retrievers/self_query/pinecone.js +0 -1
  327. package/retrievers/self_query/supabase.cjs +0 -1
  328. package/retrievers/self_query/supabase.d.cts +0 -1
  329. package/retrievers/self_query/supabase.d.ts +0 -1
  330. package/retrievers/self_query/supabase.js +0 -1
  331. package/retrievers/self_query/vectara.cjs +0 -1
  332. package/retrievers/self_query/vectara.d.cts +0 -1
  333. package/retrievers/self_query/vectara.d.ts +0 -1
  334. package/retrievers/self_query/vectara.js +0 -1
  335. package/retrievers/self_query/weaviate.cjs +0 -1
  336. package/retrievers/self_query/weaviate.d.cts +0 -1
  337. package/retrievers/self_query/weaviate.d.ts +0 -1
  338. package/retrievers/self_query/weaviate.js +0 -1
@@ -1,95 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.FireCrawlLoader = void 0;
7
- const firecrawl_js_1 = __importDefault(require("@mendable/firecrawl-js"));
8
- const documents_1 = require("@langchain/core/documents");
9
- const env_1 = require("@langchain/core/utils/env");
10
- const base_js_1 = require("../base.cjs");
11
- const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
12
- /* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
13
- oldEntrypointName: "document_loaders/web/firecrawl",
14
- newPackageName: "@langchain/community",
15
- });
16
- /**
17
- * @deprecated - Import from "@langchain/community/document_loaders/web/firecrawl" instead. This entrypoint will be removed in 0.3.0.
18
- *
19
- * Class representing a document loader for loading data from
20
- * Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
21
- * @example
22
- * ```typescript
23
- * const loader = new FireCrawlLoader({
24
- * url: "{url}",
25
- * apiKey: "{apiKey}",
26
- * mode: "crawl"
27
- * });
28
- * const docs = await loader.load();
29
- * ```
30
- */
31
- class FireCrawlLoader extends base_js_1.BaseDocumentLoader {
32
- constructor(loaderParams) {
33
- super();
34
- Object.defineProperty(this, "apiKey", {
35
- enumerable: true,
36
- configurable: true,
37
- writable: true,
38
- value: void 0
39
- });
40
- Object.defineProperty(this, "url", {
41
- enumerable: true,
42
- configurable: true,
43
- writable: true,
44
- value: void 0
45
- });
46
- Object.defineProperty(this, "mode", {
47
- enumerable: true,
48
- configurable: true,
49
- writable: true,
50
- value: void 0
51
- });
52
- Object.defineProperty(this, "params", {
53
- enumerable: true,
54
- configurable: true,
55
- writable: true,
56
- value: void 0
57
- });
58
- const { apiKey = (0, env_1.getEnvironmentVariable)("FIRECRAWL_API_KEY"), url, mode = "crawl", params, } = loaderParams;
59
- if (!apiKey) {
60
- throw new Error("Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.");
61
- }
62
- this.apiKey = apiKey;
63
- this.url = url;
64
- this.mode = mode;
65
- this.params = params;
66
- }
67
- /**
68
- * Loads the data from the Firecrawl.
69
- * @returns An array of Documents representing the retrieved data.
70
- * @throws An error if the data could not be loaded.
71
- */
72
- async load() {
73
- const app = new firecrawl_js_1.default({ apiKey: this.apiKey });
74
- let firecrawlDocs;
75
- if (this.mode === "scrape") {
76
- const response = await app.scrapeUrl(this.url, this.params);
77
- if (!response.success) {
78
- throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`);
79
- }
80
- firecrawlDocs = [response.data];
81
- }
82
- else if (this.mode === "crawl") {
83
- const response = await app.crawlUrl(this.url, this.params, true);
84
- firecrawlDocs = response;
85
- }
86
- else {
87
- throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`);
88
- }
89
- return firecrawlDocs.map((doc) => new documents_1.Document({
90
- pageContent: doc.markdown || "",
91
- metadata: doc.metadata || {},
92
- }));
93
- }
94
- }
95
- exports.FireCrawlLoader = FireCrawlLoader;
@@ -1,50 +0,0 @@
1
- import { type DocumentInterface } from "@langchain/core/documents";
2
- import { BaseDocumentLoader } from "../base.js";
3
- /**
4
- * Interface representing the parameters for the Firecrawl loader. It
5
- * includes properties such as the URL to scrape or crawl and the API key.
6
- */
7
- interface FirecrawlLoaderParameters {
8
- /**
9
- * URL to scrape or crawl
10
- */
11
- url: string;
12
- /**
13
- * API key for Firecrawl. If not provided, the default value is the value of the FIRECRAWL_API_KEY environment variable.
14
- */
15
- apiKey?: string;
16
- /**
17
- * Mode of operation. Can be either "crawl" or "scrape". If not provided, the default value is "crawl".
18
- */
19
- mode?: "crawl" | "scrape";
20
- params?: Record<string, unknown>;
21
- }
22
- /**
23
- * @deprecated - Import from "@langchain/community/document_loaders/web/firecrawl" instead. This entrypoint will be removed in 0.3.0.
24
- *
25
- * Class representing a document loader for loading data from
26
- * Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
27
- * @example
28
- * ```typescript
29
- * const loader = new FireCrawlLoader({
30
- * url: "{url}",
31
- * apiKey: "{apiKey}",
32
- * mode: "crawl"
33
- * });
34
- * const docs = await loader.load();
35
- * ```
36
- */
37
- export declare class FireCrawlLoader extends BaseDocumentLoader {
38
- private apiKey;
39
- private url;
40
- private mode;
41
- private params?;
42
- constructor(loaderParams: FirecrawlLoaderParameters);
43
- /**
44
- * Loads the data from the Firecrawl.
45
- * @returns An array of Documents representing the retrieved data.
46
- * @throws An error if the data could not be loaded.
47
- */
48
- load(): Promise<DocumentInterface[]>;
49
- }
50
- export {};
@@ -1,88 +0,0 @@
1
- import FirecrawlApp from "@mendable/firecrawl-js";
2
- import { Document } from "@langchain/core/documents";
3
- import { getEnvironmentVariable } from "@langchain/core/utils/env";
4
- import { BaseDocumentLoader } from "../base.js";
5
- import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
6
- /* #__PURE__ */ logVersion020MigrationWarning({
7
- oldEntrypointName: "document_loaders/web/firecrawl",
8
- newPackageName: "@langchain/community",
9
- });
10
- /**
11
- * @deprecated - Import from "@langchain/community/document_loaders/web/firecrawl" instead. This entrypoint will be removed in 0.3.0.
12
- *
13
- * Class representing a document loader for loading data from
14
- * Firecrawl (firecrawl.dev). It extends the BaseDocumentLoader class.
15
- * @example
16
- * ```typescript
17
- * const loader = new FireCrawlLoader({
18
- * url: "{url}",
19
- * apiKey: "{apiKey}",
20
- * mode: "crawl"
21
- * });
22
- * const docs = await loader.load();
23
- * ```
24
- */
25
- export class FireCrawlLoader extends BaseDocumentLoader {
26
- constructor(loaderParams) {
27
- super();
28
- Object.defineProperty(this, "apiKey", {
29
- enumerable: true,
30
- configurable: true,
31
- writable: true,
32
- value: void 0
33
- });
34
- Object.defineProperty(this, "url", {
35
- enumerable: true,
36
- configurable: true,
37
- writable: true,
38
- value: void 0
39
- });
40
- Object.defineProperty(this, "mode", {
41
- enumerable: true,
42
- configurable: true,
43
- writable: true,
44
- value: void 0
45
- });
46
- Object.defineProperty(this, "params", {
47
- enumerable: true,
48
- configurable: true,
49
- writable: true,
50
- value: void 0
51
- });
52
- const { apiKey = getEnvironmentVariable("FIRECRAWL_API_KEY"), url, mode = "crawl", params, } = loaderParams;
53
- if (!apiKey) {
54
- throw new Error("Firecrawl API key not set. You can set it as FIRECRAWL_API_KEY in your .env file, or pass it to Firecrawl.");
55
- }
56
- this.apiKey = apiKey;
57
- this.url = url;
58
- this.mode = mode;
59
- this.params = params;
60
- }
61
- /**
62
- * Loads the data from the Firecrawl.
63
- * @returns An array of Documents representing the retrieved data.
64
- * @throws An error if the data could not be loaded.
65
- */
66
- async load() {
67
- const app = new FirecrawlApp({ apiKey: this.apiKey });
68
- let firecrawlDocs;
69
- if (this.mode === "scrape") {
70
- const response = await app.scrapeUrl(this.url, this.params);
71
- if (!response.success) {
72
- throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`);
73
- }
74
- firecrawlDocs = [response.data];
75
- }
76
- else if (this.mode === "crawl") {
77
- const response = await app.crawlUrl(this.url, this.params, true);
78
- firecrawlDocs = response;
79
- }
80
- else {
81
- throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`);
82
- }
83
- return firecrawlDocs.map((doc) => new Document({
84
- pageContent: doc.markdown || "",
85
- metadata: doc.metadata || {},
86
- }));
87
- }
88
- }
@@ -1,110 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.GitbookLoader = void 0;
4
- const documents_1 = require("@langchain/core/documents");
5
- const cheerio_js_1 = require("./cheerio.cjs");
6
- const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
7
- /* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
8
- oldEntrypointName: "document_loaders/web/gitbook",
9
- newPackageName: "@langchain/community",
10
- });
11
- /**
12
- * @deprecated - Import from "@langchain/community/document_loaders/web/gitbook" instead. This entrypoint will be removed in 0.3.0.
13
- *
14
- * Class representing a document loader specifically designed for loading
15
- * documents from Gitbook. It extends the CheerioWebBaseLoader.
16
- */
17
- class GitbookLoader extends cheerio_js_1.CheerioWebBaseLoader {
18
- constructor(webPath, params = {}) {
19
- const path = params.shouldLoadAllPaths === true ? `${webPath}/sitemap.xml` : webPath;
20
- super(path);
21
- Object.defineProperty(this, "webPath", {
22
- enumerable: true,
23
- configurable: true,
24
- writable: true,
25
- value: webPath
26
- });
27
- Object.defineProperty(this, "shouldLoadAllPaths", {
28
- enumerable: true,
29
- configurable: true,
30
- writable: true,
31
- value: false
32
- });
33
- Object.defineProperty(this, "baseUrl", {
34
- enumerable: true,
35
- configurable: true,
36
- writable: true,
37
- value: void 0
38
- });
39
- this.baseUrl = webPath;
40
- this.webPath = path;
41
- this.shouldLoadAllPaths =
42
- params.shouldLoadAllPaths ?? this.shouldLoadAllPaths;
43
- }
44
- /**
45
- * Method that scrapes the web document using Cheerio and loads the
46
- * content based on the value of shouldLoadAllPaths. If shouldLoadAllPaths
47
- * is true, it calls the loadAllPaths() method to load all paths.
48
- * Otherwise, it calls the loadPath() method to load a single path.
49
- * @returns Promise resolving to an array of Document instances.
50
- */
51
- async load() {
52
- const $ = await this.scrape();
53
- if (this.shouldLoadAllPaths === true) {
54
- return this.loadAllPaths($);
55
- }
56
- return this.loadPath($);
57
- }
58
- /**
59
- * Private method that loads the content of a single path from the Gitbook
60
- * web document. It extracts the page content by selecting all elements
61
- * inside the "main" element, filters out empty text nodes, and joins the
62
- * remaining text nodes with line breaks. It extracts the title by
63
- * selecting the first "h1" element inside the "main" element. It creates
64
- * a Document instance with the extracted page content and metadata
65
- * containing the source URL and title.
66
- * @param $ CheerioAPI instance representing the loaded web document.
67
- * @param url Optional string representing the URL of the web document.
68
- * @returns Array of Document instances.
69
- */
70
- loadPath($, url) {
71
- const pageContent = $("main *")
72
- .contents()
73
- .toArray()
74
- .map((element) => element.type === "text" ? $(element).text().trim() : null)
75
- .filter((text) => text)
76
- .join("\n");
77
- const title = $("main h1").first().text().trim();
78
- return [
79
- new documents_1.Document({
80
- pageContent,
81
- metadata: { source: url ?? this.webPath, title },
82
- }),
83
- ];
84
- }
85
- /**
86
- * Private method that loads the content of all paths from the Gitbook web
87
- * document. It extracts the URLs of all paths from the "loc" elements in
88
- * the sitemap.xml. It iterates over each URL, scrapes the web document
89
- * using the _scrape() method, and calls the loadPath() method to load the
90
- * content of each path. It collects all the loaded documents and returns
91
- * them as an array.
92
- * @param $ CheerioAPI instance representing the loaded web document.
93
- * @returns Promise resolving to an array of Document instances.
94
- */
95
- async loadAllPaths($) {
96
- const urls = $("loc")
97
- .toArray()
98
- .map((element) => $(element).text());
99
- const documents = [];
100
- for (const url of urls) {
101
- const buildUrl = url.includes(this.baseUrl) ? url : this.baseUrl + url;
102
- console.log(`Fetching text from ${buildUrl}`);
103
- const html = await GitbookLoader._scrape(buildUrl, this.caller, this.timeout);
104
- documents.push(...this.loadPath(html, buildUrl));
105
- }
106
- console.log(`Fetched ${documents.length} documents.`);
107
- return documents;
108
- }
109
- }
110
- exports.GitbookLoader = GitbookLoader;
@@ -1,55 +0,0 @@
1
- import { Document } from "@langchain/core/documents";
2
- import { CheerioWebBaseLoader } from "./cheerio.js";
3
- /**
4
- * Interface representing the parameters for configuring the
5
- * GitbookLoader. It has an optional property shouldLoadAllPaths, which
6
- * indicates whether all paths should be loaded.
7
- */
8
- interface GitbookLoaderParams {
9
- shouldLoadAllPaths?: boolean;
10
- }
11
- /**
12
- * @deprecated - Import from "@langchain/community/document_loaders/web/gitbook" instead. This entrypoint will be removed in 0.3.0.
13
- *
14
- * Class representing a document loader specifically designed for loading
15
- * documents from Gitbook. It extends the CheerioWebBaseLoader.
16
- */
17
- export declare class GitbookLoader extends CheerioWebBaseLoader {
18
- webPath: string;
19
- shouldLoadAllPaths: boolean;
20
- private readonly baseUrl;
21
- constructor(webPath: string, params?: GitbookLoaderParams);
22
- /**
23
- * Method that scrapes the web document using Cheerio and loads the
24
- * content based on the value of shouldLoadAllPaths. If shouldLoadAllPaths
25
- * is true, it calls the loadAllPaths() method to load all paths.
26
- * Otherwise, it calls the loadPath() method to load a single path.
27
- * @returns Promise resolving to an array of Document instances.
28
- */
29
- load(): Promise<Document[]>;
30
- /**
31
- * Private method that loads the content of a single path from the Gitbook
32
- * web document. It extracts the page content by selecting all elements
33
- * inside the "main" element, filters out empty text nodes, and joins the
34
- * remaining text nodes with line breaks. It extracts the title by
35
- * selecting the first "h1" element inside the "main" element. It creates
36
- * a Document instance with the extracted page content and metadata
37
- * containing the source URL and title.
38
- * @param $ CheerioAPI instance representing the loaded web document.
39
- * @param url Optional string representing the URL of the web document.
40
- * @returns Array of Document instances.
41
- */
42
- private loadPath;
43
- /**
44
- * Private method that loads the content of all paths from the Gitbook web
45
- * document. It extracts the URLs of all paths from the "loc" elements in
46
- * the sitemap.xml. It iterates over each URL, scrapes the web document
47
- * using the _scrape() method, and calls the loadPath() method to load the
48
- * content of each path. It collects all the loaded documents and returns
49
- * them as an array.
50
- * @param $ CheerioAPI instance representing the loaded web document.
51
- * @returns Promise resolving to an array of Document instances.
52
- */
53
- private loadAllPaths;
54
- }
55
- export {};
@@ -1,106 +0,0 @@
1
- import { Document } from "@langchain/core/documents";
2
- import { CheerioWebBaseLoader } from "./cheerio.js";
3
- import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
4
- /* #__PURE__ */ logVersion020MigrationWarning({
5
- oldEntrypointName: "document_loaders/web/gitbook",
6
- newPackageName: "@langchain/community",
7
- });
8
- /**
9
- * @deprecated - Import from "@langchain/community/document_loaders/web/gitbook" instead. This entrypoint will be removed in 0.3.0.
10
- *
11
- * Class representing a document loader specifically designed for loading
12
- * documents from Gitbook. It extends the CheerioWebBaseLoader.
13
- */
14
- export class GitbookLoader extends CheerioWebBaseLoader {
15
- constructor(webPath, params = {}) {
16
- const path = params.shouldLoadAllPaths === true ? `${webPath}/sitemap.xml` : webPath;
17
- super(path);
18
- Object.defineProperty(this, "webPath", {
19
- enumerable: true,
20
- configurable: true,
21
- writable: true,
22
- value: webPath
23
- });
24
- Object.defineProperty(this, "shouldLoadAllPaths", {
25
- enumerable: true,
26
- configurable: true,
27
- writable: true,
28
- value: false
29
- });
30
- Object.defineProperty(this, "baseUrl", {
31
- enumerable: true,
32
- configurable: true,
33
- writable: true,
34
- value: void 0
35
- });
36
- this.baseUrl = webPath;
37
- this.webPath = path;
38
- this.shouldLoadAllPaths =
39
- params.shouldLoadAllPaths ?? this.shouldLoadAllPaths;
40
- }
41
- /**
42
- * Method that scrapes the web document using Cheerio and loads the
43
- * content based on the value of shouldLoadAllPaths. If shouldLoadAllPaths
44
- * is true, it calls the loadAllPaths() method to load all paths.
45
- * Otherwise, it calls the loadPath() method to load a single path.
46
- * @returns Promise resolving to an array of Document instances.
47
- */
48
- async load() {
49
- const $ = await this.scrape();
50
- if (this.shouldLoadAllPaths === true) {
51
- return this.loadAllPaths($);
52
- }
53
- return this.loadPath($);
54
- }
55
- /**
56
- * Private method that loads the content of a single path from the Gitbook
57
- * web document. It extracts the page content by selecting all elements
58
- * inside the "main" element, filters out empty text nodes, and joins the
59
- * remaining text nodes with line breaks. It extracts the title by
60
- * selecting the first "h1" element inside the "main" element. It creates
61
- * a Document instance with the extracted page content and metadata
62
- * containing the source URL and title.
63
- * @param $ CheerioAPI instance representing the loaded web document.
64
- * @param url Optional string representing the URL of the web document.
65
- * @returns Array of Document instances.
66
- */
67
- loadPath($, url) {
68
- const pageContent = $("main *")
69
- .contents()
70
- .toArray()
71
- .map((element) => element.type === "text" ? $(element).text().trim() : null)
72
- .filter((text) => text)
73
- .join("\n");
74
- const title = $("main h1").first().text().trim();
75
- return [
76
- new Document({
77
- pageContent,
78
- metadata: { source: url ?? this.webPath, title },
79
- }),
80
- ];
81
- }
82
- /**
83
- * Private method that loads the content of all paths from the Gitbook web
84
- * document. It extracts the URLs of all paths from the "loc" elements in
85
- * the sitemap.xml. It iterates over each URL, scrapes the web document
86
- * using the _scrape() method, and calls the loadPath() method to load the
87
- * content of each path. It collects all the loaded documents and returns
88
- * them as an array.
89
- * @param $ CheerioAPI instance representing the loaded web document.
90
- * @returns Promise resolving to an array of Document instances.
91
- */
92
- async loadAllPaths($) {
93
- const urls = $("loc")
94
- .toArray()
95
- .map((element) => $(element).text());
96
- const documents = [];
97
- for (const url of urls) {
98
- const buildUrl = url.includes(this.baseUrl) ? url : this.baseUrl + url;
99
- console.log(`Fetching text from ${buildUrl}`);
100
- const html = await GitbookLoader._scrape(buildUrl, this.caller, this.timeout);
101
- documents.push(...this.loadPath(html, buildUrl));
102
- }
103
- console.log(`Fetched ${documents.length} documents.`);
104
- return documents;
105
- }
106
- }