langchain 0.2.17 → 0.3.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. package/dist/agents/executor.cjs +11 -0
  2. package/dist/agents/executor.d.ts +2 -0
  3. package/dist/agents/executor.js +11 -0
  4. package/dist/agents/openai_functions/index.cjs +2 -2
  5. package/dist/agents/openai_functions/index.js +2 -2
  6. package/dist/agents/react/output_parser.cjs +3 -2
  7. package/dist/agents/react/output_parser.js +3 -2
  8. package/dist/chains/combine_documents/stuff.cjs +2 -2
  9. package/dist/chains/combine_documents/stuff.js +2 -2
  10. package/dist/chains/conversational_retrieval_chain.cjs +1 -1
  11. package/dist/chains/conversational_retrieval_chain.d.ts +1 -1
  12. package/dist/chains/conversational_retrieval_chain.js +1 -1
  13. package/dist/chains/llm_chain.cjs +1 -1
  14. package/dist/chains/llm_chain.d.ts +1 -1
  15. package/dist/chains/llm_chain.js +1 -1
  16. package/dist/chains/openai_functions/openapi.cjs +3 -1
  17. package/dist/chains/openai_functions/openapi.js +3 -1
  18. package/dist/chains/retrieval_qa.cjs +1 -1
  19. package/dist/chains/retrieval_qa.d.ts +1 -1
  20. package/dist/chains/retrieval_qa.js +1 -1
  21. package/dist/load/import_constants.cjs +2 -39
  22. package/dist/load/import_constants.js +2 -39
  23. package/dist/load/import_map.cjs +2 -3
  24. package/dist/load/import_map.d.ts +0 -1
  25. package/dist/load/import_map.js +0 -1
  26. package/dist/smith/config.d.ts +1 -5
  27. package/package.json +35 -858
  28. package/dist/document_loaders/fs/chatgpt.cjs +0 -90
  29. package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
  30. package/dist/document_loaders/fs/chatgpt.js +0 -86
  31. package/dist/document_loaders/fs/csv.cjs +0 -73
  32. package/dist/document_loaders/fs/csv.d.ts +0 -65
  33. package/dist/document_loaders/fs/csv.js +0 -69
  34. package/dist/document_loaders/fs/docx.cjs +0 -58
  35. package/dist/document_loaders/fs/docx.d.ts +0 -25
  36. package/dist/document_loaders/fs/docx.js +0 -54
  37. package/dist/document_loaders/fs/epub.cjs +0 -103
  38. package/dist/document_loaders/fs/epub.d.ts +0 -33
  39. package/dist/document_loaders/fs/epub.js +0 -99
  40. package/dist/document_loaders/fs/notion.cjs +0 -26
  41. package/dist/document_loaders/fs/notion.d.ts +0 -12
  42. package/dist/document_loaders/fs/notion.js +0 -22
  43. package/dist/document_loaders/fs/obsidian.cjs +0 -247
  44. package/dist/document_loaders/fs/obsidian.d.ts +0 -28
  45. package/dist/document_loaders/fs/obsidian.js +0 -240
  46. package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
  47. package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
  48. package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
  49. package/dist/document_loaders/fs/pdf.cjs +0 -148
  50. package/dist/document_loaders/fs/pdf.d.ts +0 -49
  51. package/dist/document_loaders/fs/pdf.js +0 -144
  52. package/dist/document_loaders/fs/pptx.cjs +0 -46
  53. package/dist/document_loaders/fs/pptx.d.ts +0 -25
  54. package/dist/document_loaders/fs/pptx.js +0 -42
  55. package/dist/document_loaders/fs/srt.cjs +0 -57
  56. package/dist/document_loaders/fs/srt.d.ts +0 -32
  57. package/dist/document_loaders/fs/srt.js +0 -50
  58. package/dist/document_loaders/fs/unstructured.cjs +0 -338
  59. package/dist/document_loaders/fs/unstructured.d.ts +0 -125
  60. package/dist/document_loaders/fs/unstructured.js +0 -333
  61. package/dist/document_loaders/web/apify_dataset.cjs +0 -130
  62. package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
  63. package/dist/document_loaders/web/apify_dataset.js +0 -126
  64. package/dist/document_loaders/web/assemblyai.cjs +0 -200
  65. package/dist/document_loaders/web/assemblyai.d.ts +0 -95
  66. package/dist/document_loaders/web/assemblyai.js +0 -193
  67. package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
  68. package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
  69. package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
  70. package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
  71. package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
  72. package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
  73. package/dist/document_loaders/web/browserbase.cjs +0 -93
  74. package/dist/document_loaders/web/browserbase.d.ts +0 -48
  75. package/dist/document_loaders/web/browserbase.js +0 -86
  76. package/dist/document_loaders/web/cheerio.cjs +0 -118
  77. package/dist/document_loaders/web/cheerio.d.ts +0 -77
  78. package/dist/document_loaders/web/cheerio.js +0 -114
  79. package/dist/document_loaders/web/college_confidential.cjs +0 -41
  80. package/dist/document_loaders/web/college_confidential.d.ts +0 -25
  81. package/dist/document_loaders/web/college_confidential.js +0 -37
  82. package/dist/document_loaders/web/confluence.cjs +0 -190
  83. package/dist/document_loaders/web/confluence.d.ts +0 -114
  84. package/dist/document_loaders/web/confluence.js +0 -186
  85. package/dist/document_loaders/web/couchbase.cjs +0 -95
  86. package/dist/document_loaders/web/couchbase.d.ts +0 -32
  87. package/dist/document_loaders/web/couchbase.js +0 -91
  88. package/dist/document_loaders/web/figma.cjs +0 -102
  89. package/dist/document_loaders/web/figma.d.ts +0 -82
  90. package/dist/document_loaders/web/figma.js +0 -98
  91. package/dist/document_loaders/web/firecrawl.cjs +0 -95
  92. package/dist/document_loaders/web/firecrawl.d.ts +0 -50
  93. package/dist/document_loaders/web/firecrawl.js +0 -88
  94. package/dist/document_loaders/web/gitbook.cjs +0 -110
  95. package/dist/document_loaders/web/gitbook.d.ts +0 -55
  96. package/dist/document_loaders/web/gitbook.js +0 -106
  97. package/dist/document_loaders/web/github.cjs +0 -615
  98. package/dist/document_loaders/web/github.d.ts +0 -203
  99. package/dist/document_loaders/web/github.js +0 -608
  100. package/dist/document_loaders/web/hn.cjs +0 -90
  101. package/dist/document_loaders/web/hn.d.ts +0 -42
  102. package/dist/document_loaders/web/hn.js +0 -86
  103. package/dist/document_loaders/web/imsdb.cjs +0 -44
  104. package/dist/document_loaders/web/imsdb.d.ts +0 -23
  105. package/dist/document_loaders/web/imsdb.js +0 -40
  106. package/dist/document_loaders/web/notionapi.cjs +0 -404
  107. package/dist/document_loaders/web/notionapi.d.ts +0 -133
  108. package/dist/document_loaders/web/notionapi.js +0 -392
  109. package/dist/document_loaders/web/notiondb.cjs +0 -199
  110. package/dist/document_loaders/web/notiondb.d.ts +0 -56
  111. package/dist/document_loaders/web/notiondb.js +0 -195
  112. package/dist/document_loaders/web/pdf.cjs +0 -140
  113. package/dist/document_loaders/web/pdf.d.ts +0 -35
  114. package/dist/document_loaders/web/pdf.js +0 -136
  115. package/dist/document_loaders/web/playwright.cjs +0 -89
  116. package/dist/document_loaders/web/playwright.d.ts +0 -58
  117. package/dist/document_loaders/web/playwright.js +0 -85
  118. package/dist/document_loaders/web/puppeteer.cjs +0 -139
  119. package/dist/document_loaders/web/puppeteer.d.ts +0 -82
  120. package/dist/document_loaders/web/puppeteer.js +0 -135
  121. package/dist/document_loaders/web/recursive_url.cjs +0 -198
  122. package/dist/document_loaders/web/recursive_url.d.ts +0 -33
  123. package/dist/document_loaders/web/recursive_url.js +0 -194
  124. package/dist/document_loaders/web/s3.cjs +0 -164
  125. package/dist/document_loaders/web/s3.d.ts +0 -78
  126. package/dist/document_loaders/web/s3.js +0 -137
  127. package/dist/document_loaders/web/searchapi.cjs +0 -150
  128. package/dist/document_loaders/web/searchapi.d.ts +0 -76
  129. package/dist/document_loaders/web/searchapi.js +0 -146
  130. package/dist/document_loaders/web/serpapi.cjs +0 -127
  131. package/dist/document_loaders/web/serpapi.d.ts +0 -62
  132. package/dist/document_loaders/web/serpapi.js +0 -123
  133. package/dist/document_loaders/web/sitemap.cjs +0 -118
  134. package/dist/document_loaders/web/sitemap.d.ts +0 -41
  135. package/dist/document_loaders/web/sitemap.js +0 -114
  136. package/dist/document_loaders/web/sonix_audio.cjs +0 -68
  137. package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
  138. package/dist/document_loaders/web/sonix_audio.js +0 -64
  139. package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
  140. package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
  141. package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
  142. package/dist/document_loaders/web/youtube.cjs +0 -116
  143. package/dist/document_loaders/web/youtube.d.ts +0 -55
  144. package/dist/document_loaders/web/youtube.js +0 -112
  145. package/dist/experimental/chrome_ai/app/dist/bundle.cjs +0 -1250
  146. package/dist/experimental/chrome_ai/app/dist/bundle.d.ts +0 -1
  147. package/dist/experimental/chrome_ai/app/dist/bundle.js +0 -1249
  148. package/dist/experimental/tools/pyinterpreter.cjs +0 -248
  149. package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
  150. package/dist/experimental/tools/pyinterpreter.js +0 -244
  151. package/dist/retrievers/self_query/chroma.cjs +0 -48
  152. package/dist/retrievers/self_query/chroma.d.ts +0 -26
  153. package/dist/retrievers/self_query/chroma.js +0 -44
  154. package/dist/retrievers/self_query/pinecone.cjs +0 -47
  155. package/dist/retrievers/self_query/pinecone.d.ts +0 -26
  156. package/dist/retrievers/self_query/pinecone.js +0 -43
  157. package/dist/retrievers/self_query/supabase.cjs +0 -278
  158. package/dist/retrievers/self_query/supabase.d.ts +0 -109
  159. package/dist/retrievers/self_query/supabase.js +0 -274
  160. package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
  161. package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
  162. package/dist/retrievers/self_query/supabase_utils.js +0 -259
  163. package/dist/retrievers/self_query/vectara.cjs +0 -143
  164. package/dist/retrievers/self_query/vectara.d.ts +0 -42
  165. package/dist/retrievers/self_query/vectara.js +0 -139
  166. package/dist/retrievers/self_query/weaviate.cjs +0 -201
  167. package/dist/retrievers/self_query/weaviate.d.ts +0 -99
  168. package/dist/retrievers/self_query/weaviate.js +0 -197
  169. package/dist/types/assemblyai-types.cjs +0 -2
  170. package/dist/types/assemblyai-types.d.ts +0 -4
  171. package/dist/types/assemblyai-types.js +0 -1
  172. package/document_loaders/fs/chatgpt.cjs +0 -1
  173. package/document_loaders/fs/chatgpt.d.cts +0 -1
  174. package/document_loaders/fs/chatgpt.d.ts +0 -1
  175. package/document_loaders/fs/chatgpt.js +0 -1
  176. package/document_loaders/fs/csv.cjs +0 -1
  177. package/document_loaders/fs/csv.d.cts +0 -1
  178. package/document_loaders/fs/csv.d.ts +0 -1
  179. package/document_loaders/fs/csv.js +0 -1
  180. package/document_loaders/fs/docx.cjs +0 -1
  181. package/document_loaders/fs/docx.d.cts +0 -1
  182. package/document_loaders/fs/docx.d.ts +0 -1
  183. package/document_loaders/fs/docx.js +0 -1
  184. package/document_loaders/fs/epub.cjs +0 -1
  185. package/document_loaders/fs/epub.d.cts +0 -1
  186. package/document_loaders/fs/epub.d.ts +0 -1
  187. package/document_loaders/fs/epub.js +0 -1
  188. package/document_loaders/fs/notion.cjs +0 -1
  189. package/document_loaders/fs/notion.d.cts +0 -1
  190. package/document_loaders/fs/notion.d.ts +0 -1
  191. package/document_loaders/fs/notion.js +0 -1
  192. package/document_loaders/fs/obsidian.cjs +0 -1
  193. package/document_loaders/fs/obsidian.d.cts +0 -1
  194. package/document_loaders/fs/obsidian.d.ts +0 -1
  195. package/document_loaders/fs/obsidian.js +0 -1
  196. package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
  197. package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
  198. package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
  199. package/document_loaders/fs/openai_whisper_audio.js +0 -1
  200. package/document_loaders/fs/pdf.cjs +0 -1
  201. package/document_loaders/fs/pdf.d.cts +0 -1
  202. package/document_loaders/fs/pdf.d.ts +0 -1
  203. package/document_loaders/fs/pdf.js +0 -1
  204. package/document_loaders/fs/pptx.cjs +0 -1
  205. package/document_loaders/fs/pptx.d.cts +0 -1
  206. package/document_loaders/fs/pptx.d.ts +0 -1
  207. package/document_loaders/fs/pptx.js +0 -1
  208. package/document_loaders/fs/srt.cjs +0 -1
  209. package/document_loaders/fs/srt.d.cts +0 -1
  210. package/document_loaders/fs/srt.d.ts +0 -1
  211. package/document_loaders/fs/srt.js +0 -1
  212. package/document_loaders/fs/unstructured.cjs +0 -1
  213. package/document_loaders/fs/unstructured.d.cts +0 -1
  214. package/document_loaders/fs/unstructured.d.ts +0 -1
  215. package/document_loaders/fs/unstructured.js +0 -1
  216. package/document_loaders/web/apify_dataset.cjs +0 -1
  217. package/document_loaders/web/apify_dataset.d.cts +0 -1
  218. package/document_loaders/web/apify_dataset.d.ts +0 -1
  219. package/document_loaders/web/apify_dataset.js +0 -1
  220. package/document_loaders/web/assemblyai.cjs +0 -1
  221. package/document_loaders/web/assemblyai.d.cts +0 -1
  222. package/document_loaders/web/assemblyai.d.ts +0 -1
  223. package/document_loaders/web/assemblyai.js +0 -1
  224. package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
  225. package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
  226. package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
  227. package/document_loaders/web/azure_blob_storage_container.js +0 -1
  228. package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
  229. package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
  230. package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
  231. package/document_loaders/web/azure_blob_storage_file.js +0 -1
  232. package/document_loaders/web/browserbase.cjs +0 -1
  233. package/document_loaders/web/browserbase.d.cts +0 -1
  234. package/document_loaders/web/browserbase.d.ts +0 -1
  235. package/document_loaders/web/browserbase.js +0 -1
  236. package/document_loaders/web/cheerio.cjs +0 -1
  237. package/document_loaders/web/cheerio.d.cts +0 -1
  238. package/document_loaders/web/cheerio.d.ts +0 -1
  239. package/document_loaders/web/cheerio.js +0 -1
  240. package/document_loaders/web/college_confidential.cjs +0 -1
  241. package/document_loaders/web/college_confidential.d.cts +0 -1
  242. package/document_loaders/web/college_confidential.d.ts +0 -1
  243. package/document_loaders/web/college_confidential.js +0 -1
  244. package/document_loaders/web/confluence.cjs +0 -1
  245. package/document_loaders/web/confluence.d.cts +0 -1
  246. package/document_loaders/web/confluence.d.ts +0 -1
  247. package/document_loaders/web/confluence.js +0 -1
  248. package/document_loaders/web/couchbase.cjs +0 -1
  249. package/document_loaders/web/couchbase.d.cts +0 -1
  250. package/document_loaders/web/couchbase.d.ts +0 -1
  251. package/document_loaders/web/couchbase.js +0 -1
  252. package/document_loaders/web/figma.cjs +0 -1
  253. package/document_loaders/web/figma.d.cts +0 -1
  254. package/document_loaders/web/figma.d.ts +0 -1
  255. package/document_loaders/web/figma.js +0 -1
  256. package/document_loaders/web/firecrawl.cjs +0 -1
  257. package/document_loaders/web/firecrawl.d.cts +0 -1
  258. package/document_loaders/web/firecrawl.d.ts +0 -1
  259. package/document_loaders/web/firecrawl.js +0 -1
  260. package/document_loaders/web/gitbook.cjs +0 -1
  261. package/document_loaders/web/gitbook.d.cts +0 -1
  262. package/document_loaders/web/gitbook.d.ts +0 -1
  263. package/document_loaders/web/gitbook.js +0 -1
  264. package/document_loaders/web/github.cjs +0 -1
  265. package/document_loaders/web/github.d.cts +0 -1
  266. package/document_loaders/web/github.d.ts +0 -1
  267. package/document_loaders/web/github.js +0 -1
  268. package/document_loaders/web/hn.cjs +0 -1
  269. package/document_loaders/web/hn.d.cts +0 -1
  270. package/document_loaders/web/hn.d.ts +0 -1
  271. package/document_loaders/web/hn.js +0 -1
  272. package/document_loaders/web/imsdb.cjs +0 -1
  273. package/document_loaders/web/imsdb.d.cts +0 -1
  274. package/document_loaders/web/imsdb.d.ts +0 -1
  275. package/document_loaders/web/imsdb.js +0 -1
  276. package/document_loaders/web/notionapi.cjs +0 -1
  277. package/document_loaders/web/notionapi.d.cts +0 -1
  278. package/document_loaders/web/notionapi.d.ts +0 -1
  279. package/document_loaders/web/notionapi.js +0 -1
  280. package/document_loaders/web/notiondb.cjs +0 -1
  281. package/document_loaders/web/notiondb.d.cts +0 -1
  282. package/document_loaders/web/notiondb.d.ts +0 -1
  283. package/document_loaders/web/notiondb.js +0 -1
  284. package/document_loaders/web/pdf.cjs +0 -1
  285. package/document_loaders/web/pdf.d.cts +0 -1
  286. package/document_loaders/web/pdf.d.ts +0 -1
  287. package/document_loaders/web/pdf.js +0 -1
  288. package/document_loaders/web/playwright.cjs +0 -1
  289. package/document_loaders/web/playwright.d.cts +0 -1
  290. package/document_loaders/web/playwright.d.ts +0 -1
  291. package/document_loaders/web/playwright.js +0 -1
  292. package/document_loaders/web/puppeteer.cjs +0 -1
  293. package/document_loaders/web/puppeteer.d.cts +0 -1
  294. package/document_loaders/web/puppeteer.d.ts +0 -1
  295. package/document_loaders/web/puppeteer.js +0 -1
  296. package/document_loaders/web/recursive_url.cjs +0 -1
  297. package/document_loaders/web/recursive_url.d.cts +0 -1
  298. package/document_loaders/web/recursive_url.d.ts +0 -1
  299. package/document_loaders/web/recursive_url.js +0 -1
  300. package/document_loaders/web/s3.cjs +0 -1
  301. package/document_loaders/web/s3.d.cts +0 -1
  302. package/document_loaders/web/s3.d.ts +0 -1
  303. package/document_loaders/web/s3.js +0 -1
  304. package/document_loaders/web/searchapi.cjs +0 -1
  305. package/document_loaders/web/searchapi.d.cts +0 -1
  306. package/document_loaders/web/searchapi.d.ts +0 -1
  307. package/document_loaders/web/searchapi.js +0 -1
  308. package/document_loaders/web/serpapi.cjs +0 -1
  309. package/document_loaders/web/serpapi.d.cts +0 -1
  310. package/document_loaders/web/serpapi.d.ts +0 -1
  311. package/document_loaders/web/serpapi.js +0 -1
  312. package/document_loaders/web/sitemap.cjs +0 -1
  313. package/document_loaders/web/sitemap.d.cts +0 -1
  314. package/document_loaders/web/sitemap.d.ts +0 -1
  315. package/document_loaders/web/sitemap.js +0 -1
  316. package/document_loaders/web/sonix_audio.cjs +0 -1
  317. package/document_loaders/web/sonix_audio.d.cts +0 -1
  318. package/document_loaders/web/sonix_audio.d.ts +0 -1
  319. package/document_loaders/web/sonix_audio.js +0 -1
  320. package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
  321. package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
  322. package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
  323. package/document_loaders/web/sort_xyz_blockchain.js +0 -1
  324. package/document_loaders/web/youtube.cjs +0 -1
  325. package/document_loaders/web/youtube.d.cts +0 -1
  326. package/document_loaders/web/youtube.d.ts +0 -1
  327. package/document_loaders/web/youtube.js +0 -1
  328. package/experimental/tools/pyinterpreter.cjs +0 -1
  329. package/experimental/tools/pyinterpreter.d.cts +0 -1
  330. package/experimental/tools/pyinterpreter.d.ts +0 -1
  331. package/experimental/tools/pyinterpreter.js +0 -1
  332. package/memory/index.cjs +0 -1
  333. package/memory/index.d.cts +0 -1
  334. package/memory/index.d.ts +0 -1
  335. package/memory/index.js +0 -1
  336. package/retrievers/self_query/chroma.cjs +0 -1
  337. package/retrievers/self_query/chroma.d.cts +0 -1
  338. package/retrievers/self_query/chroma.d.ts +0 -1
  339. package/retrievers/self_query/chroma.js +0 -1
  340. package/retrievers/self_query/pinecone.cjs +0 -1
  341. package/retrievers/self_query/pinecone.d.cts +0 -1
  342. package/retrievers/self_query/pinecone.d.ts +0 -1
  343. package/retrievers/self_query/pinecone.js +0 -1
  344. package/retrievers/self_query/supabase.cjs +0 -1
  345. package/retrievers/self_query/supabase.d.cts +0 -1
  346. package/retrievers/self_query/supabase.d.ts +0 -1
  347. package/retrievers/self_query/supabase.js +0 -1
  348. package/retrievers/self_query/vectara.cjs +0 -1
  349. package/retrievers/self_query/vectara.d.cts +0 -1
  350. package/retrievers/self_query/vectara.d.ts +0 -1
  351. package/retrievers/self_query/vectara.js +0 -1
  352. package/retrievers/self_query/weaviate.cjs +0 -1
  353. package/retrievers/self_query/weaviate.d.cts +0 -1
  354. package/retrievers/self_query/weaviate.d.ts +0 -1
  355. package/retrievers/self_query/weaviate.js +0 -1
@@ -1,333 +0,0 @@
1
- import { Document } from "@langchain/core/documents";
2
- import { getEnv } from "@langchain/core/utils/env";
3
- import { DirectoryLoader, UnknownHandling, } from "./directory.js";
4
- import { BaseDocumentLoader } from "../base.js";
5
- import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
6
- /* #__PURE__ */ logVersion020MigrationWarning({
7
- oldEntrypointName: "document_loaders/fs/unstructured",
8
- newPackageName: "@langchain/community",
9
- });
10
- const UNSTRUCTURED_API_FILETYPES = [
11
- ".txt",
12
- ".text",
13
- ".pdf",
14
- ".docx",
15
- ".doc",
16
- ".jpg",
17
- ".jpeg",
18
- ".eml",
19
- ".html",
20
- ".htm",
21
- ".md",
22
- ".pptx",
23
- ".ppt",
24
- ".msg",
25
- ".rtf",
26
- ".xlsx",
27
- ".xls",
28
- ".odt",
29
- ".epub",
30
- ];
31
- /**
32
- * @deprecated - Import from "@langchain/community/document_loaders/fs/unstructured" instead. This entrypoint will be removed in 0.3.0.
33
- *
34
- * A document loader that uses the Unstructured API to load unstructured
35
- * documents. It supports both the new syntax with options object and the
36
- * legacy syntax for backward compatibility. The load() method sends a
37
- * partitioning request to the Unstructured API and retrieves the
38
- * partitioned elements. It creates a Document instance for each element
39
- * and returns an array of Document instances.
40
- */
41
- export class UnstructuredLoader extends BaseDocumentLoader {
42
- constructor(filePathOrLegacyApiUrlOrMemoryBuffer, optionsOrLegacyFilePath = {}) {
43
- super();
44
- Object.defineProperty(this, "filePath", {
45
- enumerable: true,
46
- configurable: true,
47
- writable: true,
48
- value: void 0
49
- });
50
- Object.defineProperty(this, "buffer", {
51
- enumerable: true,
52
- configurable: true,
53
- writable: true,
54
- value: void 0
55
- });
56
- Object.defineProperty(this, "fileName", {
57
- enumerable: true,
58
- configurable: true,
59
- writable: true,
60
- value: void 0
61
- });
62
- Object.defineProperty(this, "apiUrl", {
63
- enumerable: true,
64
- configurable: true,
65
- writable: true,
66
- value: "https://api.unstructured.io/general/v0/general"
67
- });
68
- Object.defineProperty(this, "apiKey", {
69
- enumerable: true,
70
- configurable: true,
71
- writable: true,
72
- value: void 0
73
- });
74
- Object.defineProperty(this, "strategy", {
75
- enumerable: true,
76
- configurable: true,
77
- writable: true,
78
- value: "hi_res"
79
- });
80
- Object.defineProperty(this, "encoding", {
81
- enumerable: true,
82
- configurable: true,
83
- writable: true,
84
- value: void 0
85
- });
86
- Object.defineProperty(this, "ocrLanguages", {
87
- enumerable: true,
88
- configurable: true,
89
- writable: true,
90
- value: []
91
- });
92
- Object.defineProperty(this, "coordinates", {
93
- enumerable: true,
94
- configurable: true,
95
- writable: true,
96
- value: void 0
97
- });
98
- Object.defineProperty(this, "pdfInferTableStructure", {
99
- enumerable: true,
100
- configurable: true,
101
- writable: true,
102
- value: void 0
103
- });
104
- Object.defineProperty(this, "xmlKeepTags", {
105
- enumerable: true,
106
- configurable: true,
107
- writable: true,
108
- value: void 0
109
- });
110
- Object.defineProperty(this, "skipInferTableTypes", {
111
- enumerable: true,
112
- configurable: true,
113
- writable: true,
114
- value: void 0
115
- });
116
- Object.defineProperty(this, "hiResModelName", {
117
- enumerable: true,
118
- configurable: true,
119
- writable: true,
120
- value: void 0
121
- });
122
- Object.defineProperty(this, "includePageBreaks", {
123
- enumerable: true,
124
- configurable: true,
125
- writable: true,
126
- value: void 0
127
- });
128
- Object.defineProperty(this, "chunkingStrategy", {
129
- enumerable: true,
130
- configurable: true,
131
- writable: true,
132
- value: void 0
133
- });
134
- Object.defineProperty(this, "multiPageSections", {
135
- enumerable: true,
136
- configurable: true,
137
- writable: true,
138
- value: void 0
139
- });
140
- Object.defineProperty(this, "combineUnderNChars", {
141
- enumerable: true,
142
- configurable: true,
143
- writable: true,
144
- value: void 0
145
- });
146
- Object.defineProperty(this, "newAfterNChars", {
147
- enumerable: true,
148
- configurable: true,
149
- writable: true,
150
- value: void 0
151
- });
152
- Object.defineProperty(this, "maxCharacters", {
153
- enumerable: true,
154
- configurable: true,
155
- writable: true,
156
- value: void 0
157
- });
158
- // Temporary shim to avoid breaking existing users
159
- // Remove when API keys are enforced by Unstructured and existing code will break anyway
160
- const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
161
- const isMemorySyntax = typeof filePathOrLegacyApiUrlOrMemoryBuffer === "object";
162
- if (isMemorySyntax) {
163
- this.buffer = filePathOrLegacyApiUrlOrMemoryBuffer.buffer;
164
- this.fileName = filePathOrLegacyApiUrlOrMemoryBuffer.fileName;
165
- }
166
- else if (isLegacySyntax) {
167
- this.filePath = optionsOrLegacyFilePath;
168
- this.apiUrl = filePathOrLegacyApiUrlOrMemoryBuffer;
169
- }
170
- else {
171
- this.filePath = filePathOrLegacyApiUrlOrMemoryBuffer;
172
- }
173
- if (!isLegacySyntax) {
174
- const options = optionsOrLegacyFilePath;
175
- this.apiKey = options.apiKey;
176
- this.apiUrl = options.apiUrl ?? this.apiUrl;
177
- this.strategy = options.strategy ?? this.strategy;
178
- this.encoding = options.encoding;
179
- this.ocrLanguages = options.ocrLanguages ?? this.ocrLanguages;
180
- this.coordinates = options.coordinates;
181
- this.pdfInferTableStructure = options.pdfInferTableStructure;
182
- this.xmlKeepTags = options.xmlKeepTags;
183
- this.skipInferTableTypes = options.skipInferTableTypes;
184
- this.hiResModelName = options.hiResModelName;
185
- this.includePageBreaks = options.includePageBreaks;
186
- this.chunkingStrategy = options.chunkingStrategy;
187
- this.multiPageSections = options.multiPageSections;
188
- this.combineUnderNChars = options.combineUnderNChars;
189
- this.newAfterNChars = options.newAfterNChars;
190
- this.maxCharacters = options.maxCharacters;
191
- }
192
- }
193
- async _partition() {
194
- let { buffer } = this;
195
- let { fileName } = this;
196
- if (!buffer) {
197
- const { readFile, basename } = await this.imports();
198
- buffer = await readFile(this.filePath);
199
- fileName = basename(this.filePath);
200
- // I'm aware this reads the file into memory first, but we have lots of work
201
- // to do on then consuming Documents in a streaming fashion anyway, so not
202
- // worried about this for now.
203
- }
204
- const formData = new FormData();
205
- formData.append("files", new Blob([buffer]), fileName);
206
- formData.append("strategy", this.strategy);
207
- this.ocrLanguages.forEach((language) => {
208
- formData.append("ocr_languages", language);
209
- });
210
- if (this.encoding) {
211
- formData.append("encoding", this.encoding);
212
- }
213
- if (this.coordinates === true) {
214
- formData.append("coordinates", "true");
215
- }
216
- if (this.pdfInferTableStructure === true) {
217
- formData.append("pdf_infer_table_structure", "true");
218
- }
219
- if (this.xmlKeepTags === true) {
220
- formData.append("xml_keep_tags", "true");
221
- }
222
- if (this.skipInferTableTypes) {
223
- formData.append("skip_infer_table_types", JSON.stringify(this.skipInferTableTypes));
224
- }
225
- if (this.hiResModelName) {
226
- formData.append("hi_res_model_name", this.hiResModelName);
227
- }
228
- if (this.includePageBreaks) {
229
- formData.append("include_page_breaks", "true");
230
- }
231
- if (this.chunkingStrategy) {
232
- formData.append("chunking_strategy", this.chunkingStrategy);
233
- }
234
- if (this.multiPageSections !== undefined) {
235
- formData.append("multipage_sections", this.multiPageSections ? "true" : "false");
236
- }
237
- if (this.combineUnderNChars !== undefined) {
238
- formData.append("combine_under_n_chars", String(this.combineUnderNChars));
239
- }
240
- if (this.newAfterNChars !== undefined) {
241
- formData.append("new_after_n_chars", String(this.newAfterNChars));
242
- }
243
- if (this.maxCharacters !== undefined) {
244
- formData.append("max_characters", String(this.maxCharacters));
245
- }
246
- const headers = {
247
- "UNSTRUCTURED-API-KEY": this.apiKey ?? "",
248
- };
249
- const response = await fetch(this.apiUrl, {
250
- method: "POST",
251
- body: formData,
252
- headers,
253
- });
254
- if (!response.ok) {
255
- throw new Error(`Failed to partition file ${this.filePath} with error ${response.status} and message ${await response.text()}`);
256
- }
257
- const elements = await response.json();
258
- if (!Array.isArray(elements)) {
259
- throw new Error(`Expected partitioning request to return an array, but got ${elements}`);
260
- }
261
- return elements.filter((el) => typeof el.text === "string");
262
- }
263
- async load() {
264
- const elements = await this._partition();
265
- const documents = [];
266
- for (const element of elements) {
267
- const { metadata, text } = element;
268
- if (typeof text === "string" && text !== "") {
269
- documents.push(new Document({
270
- pageContent: text,
271
- metadata: {
272
- ...metadata,
273
- category: element.type,
274
- },
275
- }));
276
- }
277
- }
278
- return documents;
279
- }
280
- async imports() {
281
- try {
282
- const { readFile } = await import("node:fs/promises");
283
- const { basename } = await import("node:path");
284
- return { readFile, basename };
285
- }
286
- catch (e) {
287
- console.error(e);
288
- throw new Error(`Failed to load fs/promises. TextLoader available only on environment 'node'. It appears you are running environment '${getEnv()}'. See https://<link to docs> for alternatives.`);
289
- }
290
- }
291
- }
292
- /**
293
- * A document loader that loads unstructured documents from a directory
294
- * using the UnstructuredLoader. It creates a UnstructuredLoader instance
295
- * for each supported file type and passes it to the DirectoryLoader
296
- * constructor.
297
- * @example
298
- * ```typescript
299
- * const loader = new UnstructuredDirectoryLoader("path/to/directory", {
300
- * apiKey: "MY_API_KEY",
301
- * });
302
- * const docs = await loader.load();
303
- * ```
304
- */
305
- export class UnstructuredDirectoryLoader extends DirectoryLoader {
306
- constructor(directoryPathOrLegacyApiUrl, optionsOrLegacyDirectoryPath, legacyOptionRecursive = true, legacyOptionUnknown = UnknownHandling.Warn) {
307
- let directoryPath;
308
- let options;
309
- // Temporary shim to avoid breaking existing users
310
- // Remove when API keys are enforced by Unstructured and existing code will break anyway
311
- const isLegacySyntax = typeof optionsOrLegacyDirectoryPath === "string";
312
- if (isLegacySyntax) {
313
- directoryPath = optionsOrLegacyDirectoryPath;
314
- options = {
315
- apiUrl: directoryPathOrLegacyApiUrl,
316
- recursive: legacyOptionRecursive,
317
- unknown: legacyOptionUnknown,
318
- };
319
- }
320
- else {
321
- directoryPath = directoryPathOrLegacyApiUrl;
322
- options = optionsOrLegacyDirectoryPath;
323
- }
324
- const loader = (p) => new UnstructuredLoader(p, options);
325
- const loaders = UNSTRUCTURED_API_FILETYPES.reduce((loadersObject, filetype) => {
326
- // eslint-disable-next-line no-param-reassign
327
- loadersObject[filetype] = loader;
328
- return loadersObject;
329
- }, {});
330
- super(directoryPath, loaders, options.recursive, options.unknown);
331
- }
332
- }
333
- export { UnknownHandling };
@@ -1,130 +0,0 @@
1
- "use strict";
2
- /* eslint-disable @typescript-eslint/no-explicit-any */
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.ApifyDatasetLoader = void 0;
5
- const apify_client_1 = require("apify-client");
6
- const async_caller_1 = require("@langchain/core/utils/async_caller");
7
- const env_1 = require("@langchain/core/utils/env");
8
- const base_js_1 = require("../base.cjs");
9
- const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
10
- /* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
11
- oldEntrypointName: "document_loaders/web/apify_dataset",
12
- newPackageName: "@langchain/community",
13
- });
14
- /**
15
- * @deprecated - Import from "@langchain/community/document_loaders/web/apify_dataset" instead. This entrypoint will be removed in 0.3.0.
16
- * A class that extends the BaseDocumentLoader and implements the
17
- * DocumentLoader interface. It represents a document loader that loads
18
- * documents from an Apify dataset.
19
- * @example
20
- * ```typescript
21
- * const loader = new ApifyDatasetLoader("your-dataset-id", {
22
- * datasetMappingFunction: (item) =>
23
- * new Document({
24
- * pageContent: item.text || "",
25
- * metadata: { source: item.url },
26
- * }),
27
- * clientOptions: {
28
- * token: "your-apify-token",
29
- * },
30
- * });
31
- *
32
- * const docs = await loader.load();
33
- *
34
- * const chain = new RetrievalQAChain();
35
- * const res = await chain.invoke({ query: "What is LangChain?" });
36
- *
37
- * console.log(res.text);
38
- * console.log(res.sourceDocuments.map((d) => d.metadata.source));
39
- * ```
40
- */
41
- class ApifyDatasetLoader extends base_js_1.BaseDocumentLoader {
42
- constructor(datasetId, config) {
43
- super();
44
- Object.defineProperty(this, "apifyClient", {
45
- enumerable: true,
46
- configurable: true,
47
- writable: true,
48
- value: void 0
49
- });
50
- Object.defineProperty(this, "datasetId", {
51
- enumerable: true,
52
- configurable: true,
53
- writable: true,
54
- value: void 0
55
- });
56
- Object.defineProperty(this, "datasetMappingFunction", {
57
- enumerable: true,
58
- configurable: true,
59
- writable: true,
60
- value: void 0
61
- });
62
- Object.defineProperty(this, "caller", {
63
- enumerable: true,
64
- configurable: true,
65
- writable: true,
66
- value: void 0
67
- });
68
- const { clientOptions, datasetMappingFunction, ...asyncCallerParams } = config;
69
- const token = ApifyDatasetLoader._getApifyApiToken(clientOptions);
70
- this.apifyClient = new apify_client_1.ApifyClient({ ...clientOptions, token });
71
- this.datasetId = datasetId;
72
- this.datasetMappingFunction = datasetMappingFunction;
73
- this.caller = new async_caller_1.AsyncCaller(asyncCallerParams);
74
- }
75
- static _getApifyApiToken(config) {
76
- return config?.token ?? (0, env_1.getEnvironmentVariable)("APIFY_API_TOKEN");
77
- }
78
- /**
79
- * Retrieves the dataset items from the Apify platform and applies the
80
- * datasetMappingFunction to each item to create an array of Document
81
- * instances.
82
- * @returns An array of Document instances.
83
- */
84
- async load() {
85
- const dataset = await this.apifyClient
86
- .dataset(this.datasetId)
87
- .listItems({ clean: true });
88
- const documentList = await Promise.all(dataset.items.map((item) => this.caller.call(async () => this.datasetMappingFunction(item))));
89
- return documentList.flat();
90
- }
91
- /**
92
- * Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.
93
- * @param actorId The ID or name of the Actor on the Apify platform.
94
- * @param input The input object of the Actor that you're trying to run.
95
- * @param options Options specifying settings for the Actor run.
96
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
97
- * @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.
98
- */
99
- static async fromActorCall(actorId, input, config) {
100
- const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
101
- const apifyClient = new apify_client_1.ApifyClient({ token: apifyApiToken });
102
- const actorCall = await apifyClient
103
- .actor(actorId)
104
- .call(input, config.callOptions ?? {});
105
- return new ApifyDatasetLoader(actorCall.defaultDatasetId, {
106
- datasetMappingFunction: config.datasetMappingFunction,
107
- clientOptions: { ...config.clientOptions, token: apifyApiToken },
108
- });
109
- }
110
- /**
111
- * Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.
112
- * @param taskId The ID or name of the task on the Apify platform.
113
- * @param input The input object of the task that you're trying to run. Overrides the task's saved input.
114
- * @param options Options specifying settings for the task run.
115
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
116
- * @returns An instance of `ApifyDatasetLoader` with the results from the task's run.
117
- */
118
- static async fromActorTaskCall(taskId, input, config) {
119
- const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
120
- const apifyClient = new apify_client_1.ApifyClient({ token: apifyApiToken });
121
- const taskCall = await apifyClient
122
- .task(taskId)
123
- .call(input, config.callOptions ?? {});
124
- return new ApifyDatasetLoader(taskCall.defaultDatasetId, {
125
- datasetMappingFunction: config.datasetMappingFunction,
126
- clientOptions: { ...config.clientOptions, token: apifyApiToken },
127
- });
128
- }
129
- }
130
- exports.ApifyDatasetLoader = ApifyDatasetLoader;
@@ -1,85 +0,0 @@
1
- import { ActorCallOptions, ApifyClient, ApifyClientOptions, TaskCallOptions } from "apify-client";
2
- import { Document } from "@langchain/core/documents";
3
- import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller";
4
- import { BaseDocumentLoader, DocumentLoader } from "../base.js";
5
- /**
6
- * @deprecated - Import from "@langchain/community/document_loaders/web/apify_dataset" instead. This entrypoint will be removed in 0.3.0.
7
- * A type that represents a function that takes a single object (an Apify
8
- * dataset item) and converts it to an instance of the Document class.
9
- *
10
- * Change function signature to only be asynchronous for simplicity in v0.1.0
11
- * https://github.com/langchain-ai/langchainjs/pull/3262
12
- */
13
- export type ApifyDatasetMappingFunction<Metadata extends Record<string, any>> = (item: Record<string | number, unknown>) => Document<Metadata> | Array<Document<Metadata>> | Promise<Document<Metadata> | Array<Document<Metadata>>>;
14
- export interface ApifyDatasetLoaderConfig<Metadata extends Record<string, any>> extends AsyncCallerParams {
15
- datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;
16
- clientOptions?: ApifyClientOptions;
17
- }
18
- /**
19
- * @deprecated - Import from "@langchain/community/document_loaders/web/apify_dataset" instead. This entrypoint will be removed in 0.3.0.
20
- * A class that extends the BaseDocumentLoader and implements the
21
- * DocumentLoader interface. It represents a document loader that loads
22
- * documents from an Apify dataset.
23
- * @example
24
- * ```typescript
25
- * const loader = new ApifyDatasetLoader("your-dataset-id", {
26
- * datasetMappingFunction: (item) =>
27
- * new Document({
28
- * pageContent: item.text || "",
29
- * metadata: { source: item.url },
30
- * }),
31
- * clientOptions: {
32
- * token: "your-apify-token",
33
- * },
34
- * });
35
- *
36
- * const docs = await loader.load();
37
- *
38
- * const chain = new RetrievalQAChain();
39
- * const res = await chain.invoke({ query: "What is LangChain?" });
40
- *
41
- * console.log(res.text);
42
- * console.log(res.sourceDocuments.map((d) => d.metadata.source));
43
- * ```
44
- */
45
- export declare class ApifyDatasetLoader<Metadata extends Record<string, any>> extends BaseDocumentLoader implements DocumentLoader {
46
- protected apifyClient: ApifyClient;
47
- protected datasetId: string;
48
- protected datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;
49
- protected caller: AsyncCaller;
50
- constructor(datasetId: string, config: ApifyDatasetLoaderConfig<Metadata>);
51
- private static _getApifyApiToken;
52
- /**
53
- * Retrieves the dataset items from the Apify platform and applies the
54
- * datasetMappingFunction to each item to create an array of Document
55
- * instances.
56
- * @returns An array of Document instances.
57
- */
58
- load(): Promise<Document<Metadata>[]>;
59
- /**
60
- * Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.
61
- * @param actorId The ID or name of the Actor on the Apify platform.
62
- * @param input The input object of the Actor that you're trying to run.
63
- * @param options Options specifying settings for the Actor run.
64
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
65
- * @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.
66
- */
67
- static fromActorCall<Metadata extends Record<string, any>>(actorId: string, input: Record<string | number, unknown>, config: {
68
- callOptions?: ActorCallOptions;
69
- clientOptions?: ApifyClientOptions;
70
- datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;
71
- }): Promise<ApifyDatasetLoader<Metadata>>;
72
- /**
73
- * Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.
74
- * @param taskId The ID or name of the task on the Apify platform.
75
- * @param input The input object of the task that you're trying to run. Overrides the task's saved input.
76
- * @param options Options specifying settings for the task run.
77
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
78
- * @returns An instance of `ApifyDatasetLoader` with the results from the task's run.
79
- */
80
- static fromActorTaskCall<Metadata extends Record<string, any>>(taskId: string, input: Record<string | number, unknown>, config: {
81
- callOptions?: TaskCallOptions;
82
- clientOptions?: ApifyClientOptions;
83
- datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;
84
- }): Promise<ApifyDatasetLoader<Metadata>>;
85
- }
@@ -1,126 +0,0 @@
1
- /* eslint-disable @typescript-eslint/no-explicit-any */
2
- import { ApifyClient, } from "apify-client";
3
- import { AsyncCaller, } from "@langchain/core/utils/async_caller";
4
- import { getEnvironmentVariable } from "@langchain/core/utils/env";
5
- import { BaseDocumentLoader } from "../base.js";
6
- import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
7
- /* #__PURE__ */ logVersion020MigrationWarning({
8
- oldEntrypointName: "document_loaders/web/apify_dataset",
9
- newPackageName: "@langchain/community",
10
- });
11
- /**
12
- * @deprecated - Import from "@langchain/community/document_loaders/web/apify_dataset" instead. This entrypoint will be removed in 0.3.0.
13
- * A class that extends the BaseDocumentLoader and implements the
14
- * DocumentLoader interface. It represents a document loader that loads
15
- * documents from an Apify dataset.
16
- * @example
17
- * ```typescript
18
- * const loader = new ApifyDatasetLoader("your-dataset-id", {
19
- * datasetMappingFunction: (item) =>
20
- * new Document({
21
- * pageContent: item.text || "",
22
- * metadata: { source: item.url },
23
- * }),
24
- * clientOptions: {
25
- * token: "your-apify-token",
26
- * },
27
- * });
28
- *
29
- * const docs = await loader.load();
30
- *
31
- * const chain = new RetrievalQAChain();
32
- * const res = await chain.invoke({ query: "What is LangChain?" });
33
- *
34
- * console.log(res.text);
35
- * console.log(res.sourceDocuments.map((d) => d.metadata.source));
36
- * ```
37
- */
38
- export class ApifyDatasetLoader extends BaseDocumentLoader {
39
- constructor(datasetId, config) {
40
- super();
41
- Object.defineProperty(this, "apifyClient", {
42
- enumerable: true,
43
- configurable: true,
44
- writable: true,
45
- value: void 0
46
- });
47
- Object.defineProperty(this, "datasetId", {
48
- enumerable: true,
49
- configurable: true,
50
- writable: true,
51
- value: void 0
52
- });
53
- Object.defineProperty(this, "datasetMappingFunction", {
54
- enumerable: true,
55
- configurable: true,
56
- writable: true,
57
- value: void 0
58
- });
59
- Object.defineProperty(this, "caller", {
60
- enumerable: true,
61
- configurable: true,
62
- writable: true,
63
- value: void 0
64
- });
65
- const { clientOptions, datasetMappingFunction, ...asyncCallerParams } = config;
66
- const token = ApifyDatasetLoader._getApifyApiToken(clientOptions);
67
- this.apifyClient = new ApifyClient({ ...clientOptions, token });
68
- this.datasetId = datasetId;
69
- this.datasetMappingFunction = datasetMappingFunction;
70
- this.caller = new AsyncCaller(asyncCallerParams);
71
- }
72
- static _getApifyApiToken(config) {
73
- return config?.token ?? getEnvironmentVariable("APIFY_API_TOKEN");
74
- }
75
- /**
76
- * Retrieves the dataset items from the Apify platform and applies the
77
- * datasetMappingFunction to each item to create an array of Document
78
- * instances.
79
- * @returns An array of Document instances.
80
- */
81
- async load() {
82
- const dataset = await this.apifyClient
83
- .dataset(this.datasetId)
84
- .listItems({ clean: true });
85
- const documentList = await Promise.all(dataset.items.map((item) => this.caller.call(async () => this.datasetMappingFunction(item))));
86
- return documentList.flat();
87
- }
88
- /**
89
- * Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.
90
- * @param actorId The ID or name of the Actor on the Apify platform.
91
- * @param input The input object of the Actor that you're trying to run.
92
- * @param options Options specifying settings for the Actor run.
93
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
94
- * @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.
95
- */
96
- static async fromActorCall(actorId, input, config) {
97
- const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
98
- const apifyClient = new ApifyClient({ token: apifyApiToken });
99
- const actorCall = await apifyClient
100
- .actor(actorId)
101
- .call(input, config.callOptions ?? {});
102
- return new ApifyDatasetLoader(actorCall.defaultDatasetId, {
103
- datasetMappingFunction: config.datasetMappingFunction,
104
- clientOptions: { ...config.clientOptions, token: apifyApiToken },
105
- });
106
- }
107
- /**
108
- * Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.
109
- * @param taskId The ID or name of the task on the Apify platform.
110
- * @param input The input object of the task that you're trying to run. Overrides the task's saved input.
111
- * @param options Options specifying settings for the task run.
112
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
113
- * @returns An instance of `ApifyDatasetLoader` with the results from the task's run.
114
- */
115
- static async fromActorTaskCall(taskId, input, config) {
116
- const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
117
- const apifyClient = new ApifyClient({ token: apifyApiToken });
118
- const taskCall = await apifyClient
119
- .task(taskId)
120
- .call(input, config.callOptions ?? {});
121
- return new ApifyDatasetLoader(taskCall.defaultDatasetId, {
122
- datasetMappingFunction: config.datasetMappingFunction,
123
- clientOptions: { ...config.clientOptions, token: apifyApiToken },
124
- });
125
- }
126
- }