langchain 0.2.18 → 0.3.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (338) hide show
  1. package/dist/agents/openai_functions/index.cjs +2 -2
  2. package/dist/agents/openai_functions/index.js +2 -2
  3. package/dist/chains/combine_documents/stuff.cjs +2 -2
  4. package/dist/chains/combine_documents/stuff.js +2 -2
  5. package/dist/chains/openai_functions/openapi.cjs +3 -1
  6. package/dist/chains/openai_functions/openapi.js +3 -1
  7. package/dist/load/import_constants.cjs +2 -39
  8. package/dist/load/import_constants.js +2 -39
  9. package/dist/load/import_map.cjs +2 -3
  10. package/dist/load/import_map.d.ts +0 -1
  11. package/dist/load/import_map.js +0 -1
  12. package/dist/smith/config.d.ts +1 -5
  13. package/package.json +31 -854
  14. package/dist/document_loaders/fs/chatgpt.cjs +0 -90
  15. package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
  16. package/dist/document_loaders/fs/chatgpt.js +0 -86
  17. package/dist/document_loaders/fs/csv.cjs +0 -73
  18. package/dist/document_loaders/fs/csv.d.ts +0 -65
  19. package/dist/document_loaders/fs/csv.js +0 -69
  20. package/dist/document_loaders/fs/docx.cjs +0 -58
  21. package/dist/document_loaders/fs/docx.d.ts +0 -25
  22. package/dist/document_loaders/fs/docx.js +0 -54
  23. package/dist/document_loaders/fs/epub.cjs +0 -103
  24. package/dist/document_loaders/fs/epub.d.ts +0 -33
  25. package/dist/document_loaders/fs/epub.js +0 -99
  26. package/dist/document_loaders/fs/notion.cjs +0 -26
  27. package/dist/document_loaders/fs/notion.d.ts +0 -12
  28. package/dist/document_loaders/fs/notion.js +0 -22
  29. package/dist/document_loaders/fs/obsidian.cjs +0 -247
  30. package/dist/document_loaders/fs/obsidian.d.ts +0 -28
  31. package/dist/document_loaders/fs/obsidian.js +0 -240
  32. package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
  33. package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
  34. package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
  35. package/dist/document_loaders/fs/pdf.cjs +0 -148
  36. package/dist/document_loaders/fs/pdf.d.ts +0 -49
  37. package/dist/document_loaders/fs/pdf.js +0 -144
  38. package/dist/document_loaders/fs/pptx.cjs +0 -46
  39. package/dist/document_loaders/fs/pptx.d.ts +0 -25
  40. package/dist/document_loaders/fs/pptx.js +0 -42
  41. package/dist/document_loaders/fs/srt.cjs +0 -57
  42. package/dist/document_loaders/fs/srt.d.ts +0 -32
  43. package/dist/document_loaders/fs/srt.js +0 -50
  44. package/dist/document_loaders/fs/unstructured.cjs +0 -338
  45. package/dist/document_loaders/fs/unstructured.d.ts +0 -125
  46. package/dist/document_loaders/fs/unstructured.js +0 -333
  47. package/dist/document_loaders/web/apify_dataset.cjs +0 -130
  48. package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
  49. package/dist/document_loaders/web/apify_dataset.js +0 -126
  50. package/dist/document_loaders/web/assemblyai.cjs +0 -200
  51. package/dist/document_loaders/web/assemblyai.d.ts +0 -95
  52. package/dist/document_loaders/web/assemblyai.js +0 -193
  53. package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
  54. package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
  55. package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
  56. package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
  57. package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
  58. package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
  59. package/dist/document_loaders/web/browserbase.cjs +0 -93
  60. package/dist/document_loaders/web/browserbase.d.ts +0 -48
  61. package/dist/document_loaders/web/browserbase.js +0 -86
  62. package/dist/document_loaders/web/cheerio.cjs +0 -118
  63. package/dist/document_loaders/web/cheerio.d.ts +0 -77
  64. package/dist/document_loaders/web/cheerio.js +0 -114
  65. package/dist/document_loaders/web/college_confidential.cjs +0 -41
  66. package/dist/document_loaders/web/college_confidential.d.ts +0 -25
  67. package/dist/document_loaders/web/college_confidential.js +0 -37
  68. package/dist/document_loaders/web/confluence.cjs +0 -190
  69. package/dist/document_loaders/web/confluence.d.ts +0 -114
  70. package/dist/document_loaders/web/confluence.js +0 -186
  71. package/dist/document_loaders/web/couchbase.cjs +0 -95
  72. package/dist/document_loaders/web/couchbase.d.ts +0 -32
  73. package/dist/document_loaders/web/couchbase.js +0 -91
  74. package/dist/document_loaders/web/figma.cjs +0 -102
  75. package/dist/document_loaders/web/figma.d.ts +0 -82
  76. package/dist/document_loaders/web/figma.js +0 -98
  77. package/dist/document_loaders/web/firecrawl.cjs +0 -95
  78. package/dist/document_loaders/web/firecrawl.d.ts +0 -50
  79. package/dist/document_loaders/web/firecrawl.js +0 -88
  80. package/dist/document_loaders/web/gitbook.cjs +0 -110
  81. package/dist/document_loaders/web/gitbook.d.ts +0 -55
  82. package/dist/document_loaders/web/gitbook.js +0 -106
  83. package/dist/document_loaders/web/github.cjs +0 -615
  84. package/dist/document_loaders/web/github.d.ts +0 -203
  85. package/dist/document_loaders/web/github.js +0 -608
  86. package/dist/document_loaders/web/hn.cjs +0 -90
  87. package/dist/document_loaders/web/hn.d.ts +0 -42
  88. package/dist/document_loaders/web/hn.js +0 -86
  89. package/dist/document_loaders/web/imsdb.cjs +0 -44
  90. package/dist/document_loaders/web/imsdb.d.ts +0 -23
  91. package/dist/document_loaders/web/imsdb.js +0 -40
  92. package/dist/document_loaders/web/notionapi.cjs +0 -404
  93. package/dist/document_loaders/web/notionapi.d.ts +0 -133
  94. package/dist/document_loaders/web/notionapi.js +0 -392
  95. package/dist/document_loaders/web/notiondb.cjs +0 -199
  96. package/dist/document_loaders/web/notiondb.d.ts +0 -56
  97. package/dist/document_loaders/web/notiondb.js +0 -195
  98. package/dist/document_loaders/web/pdf.cjs +0 -140
  99. package/dist/document_loaders/web/pdf.d.ts +0 -35
  100. package/dist/document_loaders/web/pdf.js +0 -136
  101. package/dist/document_loaders/web/playwright.cjs +0 -89
  102. package/dist/document_loaders/web/playwright.d.ts +0 -58
  103. package/dist/document_loaders/web/playwright.js +0 -85
  104. package/dist/document_loaders/web/puppeteer.cjs +0 -139
  105. package/dist/document_loaders/web/puppeteer.d.ts +0 -82
  106. package/dist/document_loaders/web/puppeteer.js +0 -135
  107. package/dist/document_loaders/web/recursive_url.cjs +0 -198
  108. package/dist/document_loaders/web/recursive_url.d.ts +0 -33
  109. package/dist/document_loaders/web/recursive_url.js +0 -194
  110. package/dist/document_loaders/web/s3.cjs +0 -164
  111. package/dist/document_loaders/web/s3.d.ts +0 -78
  112. package/dist/document_loaders/web/s3.js +0 -137
  113. package/dist/document_loaders/web/searchapi.cjs +0 -150
  114. package/dist/document_loaders/web/searchapi.d.ts +0 -76
  115. package/dist/document_loaders/web/searchapi.js +0 -146
  116. package/dist/document_loaders/web/serpapi.cjs +0 -127
  117. package/dist/document_loaders/web/serpapi.d.ts +0 -62
  118. package/dist/document_loaders/web/serpapi.js +0 -123
  119. package/dist/document_loaders/web/sitemap.cjs +0 -118
  120. package/dist/document_loaders/web/sitemap.d.ts +0 -41
  121. package/dist/document_loaders/web/sitemap.js +0 -114
  122. package/dist/document_loaders/web/sonix_audio.cjs +0 -68
  123. package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
  124. package/dist/document_loaders/web/sonix_audio.js +0 -64
  125. package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
  126. package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
  127. package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
  128. package/dist/document_loaders/web/youtube.cjs +0 -116
  129. package/dist/document_loaders/web/youtube.d.ts +0 -55
  130. package/dist/document_loaders/web/youtube.js +0 -112
  131. package/dist/experimental/tools/pyinterpreter.cjs +0 -248
  132. package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
  133. package/dist/experimental/tools/pyinterpreter.js +0 -244
  134. package/dist/retrievers/self_query/chroma.cjs +0 -48
  135. package/dist/retrievers/self_query/chroma.d.ts +0 -26
  136. package/dist/retrievers/self_query/chroma.js +0 -44
  137. package/dist/retrievers/self_query/pinecone.cjs +0 -47
  138. package/dist/retrievers/self_query/pinecone.d.ts +0 -26
  139. package/dist/retrievers/self_query/pinecone.js +0 -43
  140. package/dist/retrievers/self_query/supabase.cjs +0 -278
  141. package/dist/retrievers/self_query/supabase.d.ts +0 -109
  142. package/dist/retrievers/self_query/supabase.js +0 -274
  143. package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
  144. package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
  145. package/dist/retrievers/self_query/supabase_utils.js +0 -259
  146. package/dist/retrievers/self_query/vectara.cjs +0 -143
  147. package/dist/retrievers/self_query/vectara.d.ts +0 -42
  148. package/dist/retrievers/self_query/vectara.js +0 -139
  149. package/dist/retrievers/self_query/weaviate.cjs +0 -201
  150. package/dist/retrievers/self_query/weaviate.d.ts +0 -99
  151. package/dist/retrievers/self_query/weaviate.js +0 -197
  152. package/dist/types/assemblyai-types.cjs +0 -2
  153. package/dist/types/assemblyai-types.d.ts +0 -4
  154. package/dist/types/assemblyai-types.js +0 -1
  155. package/document_loaders/fs/chatgpt.cjs +0 -1
  156. package/document_loaders/fs/chatgpt.d.cts +0 -1
  157. package/document_loaders/fs/chatgpt.d.ts +0 -1
  158. package/document_loaders/fs/chatgpt.js +0 -1
  159. package/document_loaders/fs/csv.cjs +0 -1
  160. package/document_loaders/fs/csv.d.cts +0 -1
  161. package/document_loaders/fs/csv.d.ts +0 -1
  162. package/document_loaders/fs/csv.js +0 -1
  163. package/document_loaders/fs/docx.cjs +0 -1
  164. package/document_loaders/fs/docx.d.cts +0 -1
  165. package/document_loaders/fs/docx.d.ts +0 -1
  166. package/document_loaders/fs/docx.js +0 -1
  167. package/document_loaders/fs/epub.cjs +0 -1
  168. package/document_loaders/fs/epub.d.cts +0 -1
  169. package/document_loaders/fs/epub.d.ts +0 -1
  170. package/document_loaders/fs/epub.js +0 -1
  171. package/document_loaders/fs/notion.cjs +0 -1
  172. package/document_loaders/fs/notion.d.cts +0 -1
  173. package/document_loaders/fs/notion.d.ts +0 -1
  174. package/document_loaders/fs/notion.js +0 -1
  175. package/document_loaders/fs/obsidian.cjs +0 -1
  176. package/document_loaders/fs/obsidian.d.cts +0 -1
  177. package/document_loaders/fs/obsidian.d.ts +0 -1
  178. package/document_loaders/fs/obsidian.js +0 -1
  179. package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
  180. package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
  181. package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
  182. package/document_loaders/fs/openai_whisper_audio.js +0 -1
  183. package/document_loaders/fs/pdf.cjs +0 -1
  184. package/document_loaders/fs/pdf.d.cts +0 -1
  185. package/document_loaders/fs/pdf.d.ts +0 -1
  186. package/document_loaders/fs/pdf.js +0 -1
  187. package/document_loaders/fs/pptx.cjs +0 -1
  188. package/document_loaders/fs/pptx.d.cts +0 -1
  189. package/document_loaders/fs/pptx.d.ts +0 -1
  190. package/document_loaders/fs/pptx.js +0 -1
  191. package/document_loaders/fs/srt.cjs +0 -1
  192. package/document_loaders/fs/srt.d.cts +0 -1
  193. package/document_loaders/fs/srt.d.ts +0 -1
  194. package/document_loaders/fs/srt.js +0 -1
  195. package/document_loaders/fs/unstructured.cjs +0 -1
  196. package/document_loaders/fs/unstructured.d.cts +0 -1
  197. package/document_loaders/fs/unstructured.d.ts +0 -1
  198. package/document_loaders/fs/unstructured.js +0 -1
  199. package/document_loaders/web/apify_dataset.cjs +0 -1
  200. package/document_loaders/web/apify_dataset.d.cts +0 -1
  201. package/document_loaders/web/apify_dataset.d.ts +0 -1
  202. package/document_loaders/web/apify_dataset.js +0 -1
  203. package/document_loaders/web/assemblyai.cjs +0 -1
  204. package/document_loaders/web/assemblyai.d.cts +0 -1
  205. package/document_loaders/web/assemblyai.d.ts +0 -1
  206. package/document_loaders/web/assemblyai.js +0 -1
  207. package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
  208. package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
  209. package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
  210. package/document_loaders/web/azure_blob_storage_container.js +0 -1
  211. package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
  212. package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
  213. package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
  214. package/document_loaders/web/azure_blob_storage_file.js +0 -1
  215. package/document_loaders/web/browserbase.cjs +0 -1
  216. package/document_loaders/web/browserbase.d.cts +0 -1
  217. package/document_loaders/web/browserbase.d.ts +0 -1
  218. package/document_loaders/web/browserbase.js +0 -1
  219. package/document_loaders/web/cheerio.cjs +0 -1
  220. package/document_loaders/web/cheerio.d.cts +0 -1
  221. package/document_loaders/web/cheerio.d.ts +0 -1
  222. package/document_loaders/web/cheerio.js +0 -1
  223. package/document_loaders/web/college_confidential.cjs +0 -1
  224. package/document_loaders/web/college_confidential.d.cts +0 -1
  225. package/document_loaders/web/college_confidential.d.ts +0 -1
  226. package/document_loaders/web/college_confidential.js +0 -1
  227. package/document_loaders/web/confluence.cjs +0 -1
  228. package/document_loaders/web/confluence.d.cts +0 -1
  229. package/document_loaders/web/confluence.d.ts +0 -1
  230. package/document_loaders/web/confluence.js +0 -1
  231. package/document_loaders/web/couchbase.cjs +0 -1
  232. package/document_loaders/web/couchbase.d.cts +0 -1
  233. package/document_loaders/web/couchbase.d.ts +0 -1
  234. package/document_loaders/web/couchbase.js +0 -1
  235. package/document_loaders/web/figma.cjs +0 -1
  236. package/document_loaders/web/figma.d.cts +0 -1
  237. package/document_loaders/web/figma.d.ts +0 -1
  238. package/document_loaders/web/figma.js +0 -1
  239. package/document_loaders/web/firecrawl.cjs +0 -1
  240. package/document_loaders/web/firecrawl.d.cts +0 -1
  241. package/document_loaders/web/firecrawl.d.ts +0 -1
  242. package/document_loaders/web/firecrawl.js +0 -1
  243. package/document_loaders/web/gitbook.cjs +0 -1
  244. package/document_loaders/web/gitbook.d.cts +0 -1
  245. package/document_loaders/web/gitbook.d.ts +0 -1
  246. package/document_loaders/web/gitbook.js +0 -1
  247. package/document_loaders/web/github.cjs +0 -1
  248. package/document_loaders/web/github.d.cts +0 -1
  249. package/document_loaders/web/github.d.ts +0 -1
  250. package/document_loaders/web/github.js +0 -1
  251. package/document_loaders/web/hn.cjs +0 -1
  252. package/document_loaders/web/hn.d.cts +0 -1
  253. package/document_loaders/web/hn.d.ts +0 -1
  254. package/document_loaders/web/hn.js +0 -1
  255. package/document_loaders/web/imsdb.cjs +0 -1
  256. package/document_loaders/web/imsdb.d.cts +0 -1
  257. package/document_loaders/web/imsdb.d.ts +0 -1
  258. package/document_loaders/web/imsdb.js +0 -1
  259. package/document_loaders/web/notionapi.cjs +0 -1
  260. package/document_loaders/web/notionapi.d.cts +0 -1
  261. package/document_loaders/web/notionapi.d.ts +0 -1
  262. package/document_loaders/web/notionapi.js +0 -1
  263. package/document_loaders/web/notiondb.cjs +0 -1
  264. package/document_loaders/web/notiondb.d.cts +0 -1
  265. package/document_loaders/web/notiondb.d.ts +0 -1
  266. package/document_loaders/web/notiondb.js +0 -1
  267. package/document_loaders/web/pdf.cjs +0 -1
  268. package/document_loaders/web/pdf.d.cts +0 -1
  269. package/document_loaders/web/pdf.d.ts +0 -1
  270. package/document_loaders/web/pdf.js +0 -1
  271. package/document_loaders/web/playwright.cjs +0 -1
  272. package/document_loaders/web/playwright.d.cts +0 -1
  273. package/document_loaders/web/playwright.d.ts +0 -1
  274. package/document_loaders/web/playwright.js +0 -1
  275. package/document_loaders/web/puppeteer.cjs +0 -1
  276. package/document_loaders/web/puppeteer.d.cts +0 -1
  277. package/document_loaders/web/puppeteer.d.ts +0 -1
  278. package/document_loaders/web/puppeteer.js +0 -1
  279. package/document_loaders/web/recursive_url.cjs +0 -1
  280. package/document_loaders/web/recursive_url.d.cts +0 -1
  281. package/document_loaders/web/recursive_url.d.ts +0 -1
  282. package/document_loaders/web/recursive_url.js +0 -1
  283. package/document_loaders/web/s3.cjs +0 -1
  284. package/document_loaders/web/s3.d.cts +0 -1
  285. package/document_loaders/web/s3.d.ts +0 -1
  286. package/document_loaders/web/s3.js +0 -1
  287. package/document_loaders/web/searchapi.cjs +0 -1
  288. package/document_loaders/web/searchapi.d.cts +0 -1
  289. package/document_loaders/web/searchapi.d.ts +0 -1
  290. package/document_loaders/web/searchapi.js +0 -1
  291. package/document_loaders/web/serpapi.cjs +0 -1
  292. package/document_loaders/web/serpapi.d.cts +0 -1
  293. package/document_loaders/web/serpapi.d.ts +0 -1
  294. package/document_loaders/web/serpapi.js +0 -1
  295. package/document_loaders/web/sitemap.cjs +0 -1
  296. package/document_loaders/web/sitemap.d.cts +0 -1
  297. package/document_loaders/web/sitemap.d.ts +0 -1
  298. package/document_loaders/web/sitemap.js +0 -1
  299. package/document_loaders/web/sonix_audio.cjs +0 -1
  300. package/document_loaders/web/sonix_audio.d.cts +0 -1
  301. package/document_loaders/web/sonix_audio.d.ts +0 -1
  302. package/document_loaders/web/sonix_audio.js +0 -1
  303. package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
  304. package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
  305. package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
  306. package/document_loaders/web/sort_xyz_blockchain.js +0 -1
  307. package/document_loaders/web/youtube.cjs +0 -1
  308. package/document_loaders/web/youtube.d.cts +0 -1
  309. package/document_loaders/web/youtube.d.ts +0 -1
  310. package/document_loaders/web/youtube.js +0 -1
  311. package/experimental/tools/pyinterpreter.cjs +0 -1
  312. package/experimental/tools/pyinterpreter.d.cts +0 -1
  313. package/experimental/tools/pyinterpreter.d.ts +0 -1
  314. package/experimental/tools/pyinterpreter.js +0 -1
  315. package/memory/index.cjs +0 -1
  316. package/memory/index.d.cts +0 -1
  317. package/memory/index.d.ts +0 -1
  318. package/memory/index.js +0 -1
  319. package/retrievers/self_query/chroma.cjs +0 -1
  320. package/retrievers/self_query/chroma.d.cts +0 -1
  321. package/retrievers/self_query/chroma.d.ts +0 -1
  322. package/retrievers/self_query/chroma.js +0 -1
  323. package/retrievers/self_query/pinecone.cjs +0 -1
  324. package/retrievers/self_query/pinecone.d.cts +0 -1
  325. package/retrievers/self_query/pinecone.d.ts +0 -1
  326. package/retrievers/self_query/pinecone.js +0 -1
  327. package/retrievers/self_query/supabase.cjs +0 -1
  328. package/retrievers/self_query/supabase.d.cts +0 -1
  329. package/retrievers/self_query/supabase.d.ts +0 -1
  330. package/retrievers/self_query/supabase.js +0 -1
  331. package/retrievers/self_query/vectara.cjs +0 -1
  332. package/retrievers/self_query/vectara.d.cts +0 -1
  333. package/retrievers/self_query/vectara.d.ts +0 -1
  334. package/retrievers/self_query/vectara.js +0 -1
  335. package/retrievers/self_query/weaviate.cjs +0 -1
  336. package/retrievers/self_query/weaviate.d.cts +0 -1
  337. package/retrievers/self_query/weaviate.d.ts +0 -1
  338. package/retrievers/self_query/weaviate.js +0 -1
@@ -1,333 +0,0 @@
1
- import { Document } from "@langchain/core/documents";
2
- import { getEnv } from "@langchain/core/utils/env";
3
- import { DirectoryLoader, UnknownHandling, } from "./directory.js";
4
- import { BaseDocumentLoader } from "../base.js";
5
- import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
6
- /* #__PURE__ */ logVersion020MigrationWarning({
7
- oldEntrypointName: "document_loaders/fs/unstructured",
8
- newPackageName: "@langchain/community",
9
- });
10
- const UNSTRUCTURED_API_FILETYPES = [
11
- ".txt",
12
- ".text",
13
- ".pdf",
14
- ".docx",
15
- ".doc",
16
- ".jpg",
17
- ".jpeg",
18
- ".eml",
19
- ".html",
20
- ".htm",
21
- ".md",
22
- ".pptx",
23
- ".ppt",
24
- ".msg",
25
- ".rtf",
26
- ".xlsx",
27
- ".xls",
28
- ".odt",
29
- ".epub",
30
- ];
31
- /**
32
- * @deprecated - Import from "@langchain/community/document_loaders/fs/unstructured" instead. This entrypoint will be removed in 0.3.0.
33
- *
34
- * A document loader that uses the Unstructured API to load unstructured
35
- * documents. It supports both the new syntax with options object and the
36
- * legacy syntax for backward compatibility. The load() method sends a
37
- * partitioning request to the Unstructured API and retrieves the
38
- * partitioned elements. It creates a Document instance for each element
39
- * and returns an array of Document instances.
40
- */
41
- export class UnstructuredLoader extends BaseDocumentLoader {
42
- constructor(filePathOrLegacyApiUrlOrMemoryBuffer, optionsOrLegacyFilePath = {}) {
43
- super();
44
- Object.defineProperty(this, "filePath", {
45
- enumerable: true,
46
- configurable: true,
47
- writable: true,
48
- value: void 0
49
- });
50
- Object.defineProperty(this, "buffer", {
51
- enumerable: true,
52
- configurable: true,
53
- writable: true,
54
- value: void 0
55
- });
56
- Object.defineProperty(this, "fileName", {
57
- enumerable: true,
58
- configurable: true,
59
- writable: true,
60
- value: void 0
61
- });
62
- Object.defineProperty(this, "apiUrl", {
63
- enumerable: true,
64
- configurable: true,
65
- writable: true,
66
- value: "https://api.unstructured.io/general/v0/general"
67
- });
68
- Object.defineProperty(this, "apiKey", {
69
- enumerable: true,
70
- configurable: true,
71
- writable: true,
72
- value: void 0
73
- });
74
- Object.defineProperty(this, "strategy", {
75
- enumerable: true,
76
- configurable: true,
77
- writable: true,
78
- value: "hi_res"
79
- });
80
- Object.defineProperty(this, "encoding", {
81
- enumerable: true,
82
- configurable: true,
83
- writable: true,
84
- value: void 0
85
- });
86
- Object.defineProperty(this, "ocrLanguages", {
87
- enumerable: true,
88
- configurable: true,
89
- writable: true,
90
- value: []
91
- });
92
- Object.defineProperty(this, "coordinates", {
93
- enumerable: true,
94
- configurable: true,
95
- writable: true,
96
- value: void 0
97
- });
98
- Object.defineProperty(this, "pdfInferTableStructure", {
99
- enumerable: true,
100
- configurable: true,
101
- writable: true,
102
- value: void 0
103
- });
104
- Object.defineProperty(this, "xmlKeepTags", {
105
- enumerable: true,
106
- configurable: true,
107
- writable: true,
108
- value: void 0
109
- });
110
- Object.defineProperty(this, "skipInferTableTypes", {
111
- enumerable: true,
112
- configurable: true,
113
- writable: true,
114
- value: void 0
115
- });
116
- Object.defineProperty(this, "hiResModelName", {
117
- enumerable: true,
118
- configurable: true,
119
- writable: true,
120
- value: void 0
121
- });
122
- Object.defineProperty(this, "includePageBreaks", {
123
- enumerable: true,
124
- configurable: true,
125
- writable: true,
126
- value: void 0
127
- });
128
- Object.defineProperty(this, "chunkingStrategy", {
129
- enumerable: true,
130
- configurable: true,
131
- writable: true,
132
- value: void 0
133
- });
134
- Object.defineProperty(this, "multiPageSections", {
135
- enumerable: true,
136
- configurable: true,
137
- writable: true,
138
- value: void 0
139
- });
140
- Object.defineProperty(this, "combineUnderNChars", {
141
- enumerable: true,
142
- configurable: true,
143
- writable: true,
144
- value: void 0
145
- });
146
- Object.defineProperty(this, "newAfterNChars", {
147
- enumerable: true,
148
- configurable: true,
149
- writable: true,
150
- value: void 0
151
- });
152
- Object.defineProperty(this, "maxCharacters", {
153
- enumerable: true,
154
- configurable: true,
155
- writable: true,
156
- value: void 0
157
- });
158
- // Temporary shim to avoid breaking existing users
159
- // Remove when API keys are enforced by Unstructured and existing code will break anyway
160
- const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
161
- const isMemorySyntax = typeof filePathOrLegacyApiUrlOrMemoryBuffer === "object";
162
- if (isMemorySyntax) {
163
- this.buffer = filePathOrLegacyApiUrlOrMemoryBuffer.buffer;
164
- this.fileName = filePathOrLegacyApiUrlOrMemoryBuffer.fileName;
165
- }
166
- else if (isLegacySyntax) {
167
- this.filePath = optionsOrLegacyFilePath;
168
- this.apiUrl = filePathOrLegacyApiUrlOrMemoryBuffer;
169
- }
170
- else {
171
- this.filePath = filePathOrLegacyApiUrlOrMemoryBuffer;
172
- }
173
- if (!isLegacySyntax) {
174
- const options = optionsOrLegacyFilePath;
175
- this.apiKey = options.apiKey;
176
- this.apiUrl = options.apiUrl ?? this.apiUrl;
177
- this.strategy = options.strategy ?? this.strategy;
178
- this.encoding = options.encoding;
179
- this.ocrLanguages = options.ocrLanguages ?? this.ocrLanguages;
180
- this.coordinates = options.coordinates;
181
- this.pdfInferTableStructure = options.pdfInferTableStructure;
182
- this.xmlKeepTags = options.xmlKeepTags;
183
- this.skipInferTableTypes = options.skipInferTableTypes;
184
- this.hiResModelName = options.hiResModelName;
185
- this.includePageBreaks = options.includePageBreaks;
186
- this.chunkingStrategy = options.chunkingStrategy;
187
- this.multiPageSections = options.multiPageSections;
188
- this.combineUnderNChars = options.combineUnderNChars;
189
- this.newAfterNChars = options.newAfterNChars;
190
- this.maxCharacters = options.maxCharacters;
191
- }
192
- }
193
- async _partition() {
194
- let { buffer } = this;
195
- let { fileName } = this;
196
- if (!buffer) {
197
- const { readFile, basename } = await this.imports();
198
- buffer = await readFile(this.filePath);
199
- fileName = basename(this.filePath);
200
- // I'm aware this reads the file into memory first, but we have lots of work
201
- // to do on then consuming Documents in a streaming fashion anyway, so not
202
- // worried about this for now.
203
- }
204
- const formData = new FormData();
205
- formData.append("files", new Blob([buffer]), fileName);
206
- formData.append("strategy", this.strategy);
207
- this.ocrLanguages.forEach((language) => {
208
- formData.append("ocr_languages", language);
209
- });
210
- if (this.encoding) {
211
- formData.append("encoding", this.encoding);
212
- }
213
- if (this.coordinates === true) {
214
- formData.append("coordinates", "true");
215
- }
216
- if (this.pdfInferTableStructure === true) {
217
- formData.append("pdf_infer_table_structure", "true");
218
- }
219
- if (this.xmlKeepTags === true) {
220
- formData.append("xml_keep_tags", "true");
221
- }
222
- if (this.skipInferTableTypes) {
223
- formData.append("skip_infer_table_types", JSON.stringify(this.skipInferTableTypes));
224
- }
225
- if (this.hiResModelName) {
226
- formData.append("hi_res_model_name", this.hiResModelName);
227
- }
228
- if (this.includePageBreaks) {
229
- formData.append("include_page_breaks", "true");
230
- }
231
- if (this.chunkingStrategy) {
232
- formData.append("chunking_strategy", this.chunkingStrategy);
233
- }
234
- if (this.multiPageSections !== undefined) {
235
- formData.append("multipage_sections", this.multiPageSections ? "true" : "false");
236
- }
237
- if (this.combineUnderNChars !== undefined) {
238
- formData.append("combine_under_n_chars", String(this.combineUnderNChars));
239
- }
240
- if (this.newAfterNChars !== undefined) {
241
- formData.append("new_after_n_chars", String(this.newAfterNChars));
242
- }
243
- if (this.maxCharacters !== undefined) {
244
- formData.append("max_characters", String(this.maxCharacters));
245
- }
246
- const headers = {
247
- "UNSTRUCTURED-API-KEY": this.apiKey ?? "",
248
- };
249
- const response = await fetch(this.apiUrl, {
250
- method: "POST",
251
- body: formData,
252
- headers,
253
- });
254
- if (!response.ok) {
255
- throw new Error(`Failed to partition file ${this.filePath} with error ${response.status} and message ${await response.text()}`);
256
- }
257
- const elements = await response.json();
258
- if (!Array.isArray(elements)) {
259
- throw new Error(`Expected partitioning request to return an array, but got ${elements}`);
260
- }
261
- return elements.filter((el) => typeof el.text === "string");
262
- }
263
- async load() {
264
- const elements = await this._partition();
265
- const documents = [];
266
- for (const element of elements) {
267
- const { metadata, text } = element;
268
- if (typeof text === "string" && text !== "") {
269
- documents.push(new Document({
270
- pageContent: text,
271
- metadata: {
272
- ...metadata,
273
- category: element.type,
274
- },
275
- }));
276
- }
277
- }
278
- return documents;
279
- }
280
- async imports() {
281
- try {
282
- const { readFile } = await import("node:fs/promises");
283
- const { basename } = await import("node:path");
284
- return { readFile, basename };
285
- }
286
- catch (e) {
287
- console.error(e);
288
- throw new Error(`Failed to load fs/promises. TextLoader available only on environment 'node'. It appears you are running environment '${getEnv()}'. See https://<link to docs> for alternatives.`);
289
- }
290
- }
291
- }
292
- /**
293
- * A document loader that loads unstructured documents from a directory
294
- * using the UnstructuredLoader. It creates a UnstructuredLoader instance
295
- * for each supported file type and passes it to the DirectoryLoader
296
- * constructor.
297
- * @example
298
- * ```typescript
299
- * const loader = new UnstructuredDirectoryLoader("path/to/directory", {
300
- * apiKey: "MY_API_KEY",
301
- * });
302
- * const docs = await loader.load();
303
- * ```
304
- */
305
- export class UnstructuredDirectoryLoader extends DirectoryLoader {
306
- constructor(directoryPathOrLegacyApiUrl, optionsOrLegacyDirectoryPath, legacyOptionRecursive = true, legacyOptionUnknown = UnknownHandling.Warn) {
307
- let directoryPath;
308
- let options;
309
- // Temporary shim to avoid breaking existing users
310
- // Remove when API keys are enforced by Unstructured and existing code will break anyway
311
- const isLegacySyntax = typeof optionsOrLegacyDirectoryPath === "string";
312
- if (isLegacySyntax) {
313
- directoryPath = optionsOrLegacyDirectoryPath;
314
- options = {
315
- apiUrl: directoryPathOrLegacyApiUrl,
316
- recursive: legacyOptionRecursive,
317
- unknown: legacyOptionUnknown,
318
- };
319
- }
320
- else {
321
- directoryPath = directoryPathOrLegacyApiUrl;
322
- options = optionsOrLegacyDirectoryPath;
323
- }
324
- const loader = (p) => new UnstructuredLoader(p, options);
325
- const loaders = UNSTRUCTURED_API_FILETYPES.reduce((loadersObject, filetype) => {
326
- // eslint-disable-next-line no-param-reassign
327
- loadersObject[filetype] = loader;
328
- return loadersObject;
329
- }, {});
330
- super(directoryPath, loaders, options.recursive, options.unknown);
331
- }
332
- }
333
- export { UnknownHandling };
@@ -1,130 +0,0 @@
1
- "use strict";
2
- /* eslint-disable @typescript-eslint/no-explicit-any */
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.ApifyDatasetLoader = void 0;
5
- const apify_client_1 = require("apify-client");
6
- const async_caller_1 = require("@langchain/core/utils/async_caller");
7
- const env_1 = require("@langchain/core/utils/env");
8
- const base_js_1 = require("../base.cjs");
9
- const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
10
- /* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
11
- oldEntrypointName: "document_loaders/web/apify_dataset",
12
- newPackageName: "@langchain/community",
13
- });
14
- /**
15
- * @deprecated - Import from "@langchain/community/document_loaders/web/apify_dataset" instead. This entrypoint will be removed in 0.3.0.
16
- * A class that extends the BaseDocumentLoader and implements the
17
- * DocumentLoader interface. It represents a document loader that loads
18
- * documents from an Apify dataset.
19
- * @example
20
- * ```typescript
21
- * const loader = new ApifyDatasetLoader("your-dataset-id", {
22
- * datasetMappingFunction: (item) =>
23
- * new Document({
24
- * pageContent: item.text || "",
25
- * metadata: { source: item.url },
26
- * }),
27
- * clientOptions: {
28
- * token: "your-apify-token",
29
- * },
30
- * });
31
- *
32
- * const docs = await loader.load();
33
- *
34
- * const chain = new RetrievalQAChain();
35
- * const res = await chain.invoke({ query: "What is LangChain?" });
36
- *
37
- * console.log(res.text);
38
- * console.log(res.sourceDocuments.map((d) => d.metadata.source));
39
- * ```
40
- */
41
- class ApifyDatasetLoader extends base_js_1.BaseDocumentLoader {
42
- constructor(datasetId, config) {
43
- super();
44
- Object.defineProperty(this, "apifyClient", {
45
- enumerable: true,
46
- configurable: true,
47
- writable: true,
48
- value: void 0
49
- });
50
- Object.defineProperty(this, "datasetId", {
51
- enumerable: true,
52
- configurable: true,
53
- writable: true,
54
- value: void 0
55
- });
56
- Object.defineProperty(this, "datasetMappingFunction", {
57
- enumerable: true,
58
- configurable: true,
59
- writable: true,
60
- value: void 0
61
- });
62
- Object.defineProperty(this, "caller", {
63
- enumerable: true,
64
- configurable: true,
65
- writable: true,
66
- value: void 0
67
- });
68
- const { clientOptions, datasetMappingFunction, ...asyncCallerParams } = config;
69
- const token = ApifyDatasetLoader._getApifyApiToken(clientOptions);
70
- this.apifyClient = new apify_client_1.ApifyClient({ ...clientOptions, token });
71
- this.datasetId = datasetId;
72
- this.datasetMappingFunction = datasetMappingFunction;
73
- this.caller = new async_caller_1.AsyncCaller(asyncCallerParams);
74
- }
75
- static _getApifyApiToken(config) {
76
- return config?.token ?? (0, env_1.getEnvironmentVariable)("APIFY_API_TOKEN");
77
- }
78
- /**
79
- * Retrieves the dataset items from the Apify platform and applies the
80
- * datasetMappingFunction to each item to create an array of Document
81
- * instances.
82
- * @returns An array of Document instances.
83
- */
84
- async load() {
85
- const dataset = await this.apifyClient
86
- .dataset(this.datasetId)
87
- .listItems({ clean: true });
88
- const documentList = await Promise.all(dataset.items.map((item) => this.caller.call(async () => this.datasetMappingFunction(item))));
89
- return documentList.flat();
90
- }
91
- /**
92
- * Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.
93
- * @param actorId The ID or name of the Actor on the Apify platform.
94
- * @param input The input object of the Actor that you're trying to run.
95
- * @param options Options specifying settings for the Actor run.
96
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
97
- * @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.
98
- */
99
- static async fromActorCall(actorId, input, config) {
100
- const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
101
- const apifyClient = new apify_client_1.ApifyClient({ token: apifyApiToken });
102
- const actorCall = await apifyClient
103
- .actor(actorId)
104
- .call(input, config.callOptions ?? {});
105
- return new ApifyDatasetLoader(actorCall.defaultDatasetId, {
106
- datasetMappingFunction: config.datasetMappingFunction,
107
- clientOptions: { ...config.clientOptions, token: apifyApiToken },
108
- });
109
- }
110
- /**
111
- * Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.
112
- * @param taskId The ID or name of the task on the Apify platform.
113
- * @param input The input object of the task that you're trying to run. Overrides the task's saved input.
114
- * @param options Options specifying settings for the task run.
115
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
116
- * @returns An instance of `ApifyDatasetLoader` with the results from the task's run.
117
- */
118
- static async fromActorTaskCall(taskId, input, config) {
119
- const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
120
- const apifyClient = new apify_client_1.ApifyClient({ token: apifyApiToken });
121
- const taskCall = await apifyClient
122
- .task(taskId)
123
- .call(input, config.callOptions ?? {});
124
- return new ApifyDatasetLoader(taskCall.defaultDatasetId, {
125
- datasetMappingFunction: config.datasetMappingFunction,
126
- clientOptions: { ...config.clientOptions, token: apifyApiToken },
127
- });
128
- }
129
- }
130
- exports.ApifyDatasetLoader = ApifyDatasetLoader;
@@ -1,85 +0,0 @@
1
- import { ActorCallOptions, ApifyClient, ApifyClientOptions, TaskCallOptions } from "apify-client";
2
- import { Document } from "@langchain/core/documents";
3
- import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller";
4
- import { BaseDocumentLoader, DocumentLoader } from "../base.js";
5
- /**
6
- * @deprecated - Import from "@langchain/community/document_loaders/web/apify_dataset" instead. This entrypoint will be removed in 0.3.0.
7
- * A type that represents a function that takes a single object (an Apify
8
- * dataset item) and converts it to an instance of the Document class.
9
- *
10
- * Change function signature to only be asynchronous for simplicity in v0.1.0
11
- * https://github.com/langchain-ai/langchainjs/pull/3262
12
- */
13
- export type ApifyDatasetMappingFunction<Metadata extends Record<string, any>> = (item: Record<string | number, unknown>) => Document<Metadata> | Array<Document<Metadata>> | Promise<Document<Metadata> | Array<Document<Metadata>>>;
14
- export interface ApifyDatasetLoaderConfig<Metadata extends Record<string, any>> extends AsyncCallerParams {
15
- datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;
16
- clientOptions?: ApifyClientOptions;
17
- }
18
- /**
19
- * @deprecated - Import from "@langchain/community/document_loaders/web/apify_dataset" instead. This entrypoint will be removed in 0.3.0.
20
- * A class that extends the BaseDocumentLoader and implements the
21
- * DocumentLoader interface. It represents a document loader that loads
22
- * documents from an Apify dataset.
23
- * @example
24
- * ```typescript
25
- * const loader = new ApifyDatasetLoader("your-dataset-id", {
26
- * datasetMappingFunction: (item) =>
27
- * new Document({
28
- * pageContent: item.text || "",
29
- * metadata: { source: item.url },
30
- * }),
31
- * clientOptions: {
32
- * token: "your-apify-token",
33
- * },
34
- * });
35
- *
36
- * const docs = await loader.load();
37
- *
38
- * const chain = new RetrievalQAChain();
39
- * const res = await chain.invoke({ query: "What is LangChain?" });
40
- *
41
- * console.log(res.text);
42
- * console.log(res.sourceDocuments.map((d) => d.metadata.source));
43
- * ```
44
- */
45
- export declare class ApifyDatasetLoader<Metadata extends Record<string, any>> extends BaseDocumentLoader implements DocumentLoader {
46
- protected apifyClient: ApifyClient;
47
- protected datasetId: string;
48
- protected datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;
49
- protected caller: AsyncCaller;
50
- constructor(datasetId: string, config: ApifyDatasetLoaderConfig<Metadata>);
51
- private static _getApifyApiToken;
52
- /**
53
- * Retrieves the dataset items from the Apify platform and applies the
54
- * datasetMappingFunction to each item to create an array of Document
55
- * instances.
56
- * @returns An array of Document instances.
57
- */
58
- load(): Promise<Document<Metadata>[]>;
59
- /**
60
- * Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.
61
- * @param actorId The ID or name of the Actor on the Apify platform.
62
- * @param input The input object of the Actor that you're trying to run.
63
- * @param options Options specifying settings for the Actor run.
64
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
65
- * @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.
66
- */
67
- static fromActorCall<Metadata extends Record<string, any>>(actorId: string, input: Record<string | number, unknown>, config: {
68
- callOptions?: ActorCallOptions;
69
- clientOptions?: ApifyClientOptions;
70
- datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;
71
- }): Promise<ApifyDatasetLoader<Metadata>>;
72
- /**
73
- * Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.
74
- * @param taskId The ID or name of the task on the Apify platform.
75
- * @param input The input object of the task that you're trying to run. Overrides the task's saved input.
76
- * @param options Options specifying settings for the task run.
77
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
78
- * @returns An instance of `ApifyDatasetLoader` with the results from the task's run.
79
- */
80
- static fromActorTaskCall<Metadata extends Record<string, any>>(taskId: string, input: Record<string | number, unknown>, config: {
81
- callOptions?: TaskCallOptions;
82
- clientOptions?: ApifyClientOptions;
83
- datasetMappingFunction: ApifyDatasetMappingFunction<Metadata>;
84
- }): Promise<ApifyDatasetLoader<Metadata>>;
85
- }
@@ -1,126 +0,0 @@
1
- /* eslint-disable @typescript-eslint/no-explicit-any */
2
- import { ApifyClient, } from "apify-client";
3
- import { AsyncCaller, } from "@langchain/core/utils/async_caller";
4
- import { getEnvironmentVariable } from "@langchain/core/utils/env";
5
- import { BaseDocumentLoader } from "../base.js";
6
- import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
7
- /* #__PURE__ */ logVersion020MigrationWarning({
8
- oldEntrypointName: "document_loaders/web/apify_dataset",
9
- newPackageName: "@langchain/community",
10
- });
11
- /**
12
- * @deprecated - Import from "@langchain/community/document_loaders/web/apify_dataset" instead. This entrypoint will be removed in 0.3.0.
13
- * A class that extends the BaseDocumentLoader and implements the
14
- * DocumentLoader interface. It represents a document loader that loads
15
- * documents from an Apify dataset.
16
- * @example
17
- * ```typescript
18
- * const loader = new ApifyDatasetLoader("your-dataset-id", {
19
- * datasetMappingFunction: (item) =>
20
- * new Document({
21
- * pageContent: item.text || "",
22
- * metadata: { source: item.url },
23
- * }),
24
- * clientOptions: {
25
- * token: "your-apify-token",
26
- * },
27
- * });
28
- *
29
- * const docs = await loader.load();
30
- *
31
- * const chain = new RetrievalQAChain();
32
- * const res = await chain.invoke({ query: "What is LangChain?" });
33
- *
34
- * console.log(res.text);
35
- * console.log(res.sourceDocuments.map((d) => d.metadata.source));
36
- * ```
37
- */
38
- export class ApifyDatasetLoader extends BaseDocumentLoader {
39
- constructor(datasetId, config) {
40
- super();
41
- Object.defineProperty(this, "apifyClient", {
42
- enumerable: true,
43
- configurable: true,
44
- writable: true,
45
- value: void 0
46
- });
47
- Object.defineProperty(this, "datasetId", {
48
- enumerable: true,
49
- configurable: true,
50
- writable: true,
51
- value: void 0
52
- });
53
- Object.defineProperty(this, "datasetMappingFunction", {
54
- enumerable: true,
55
- configurable: true,
56
- writable: true,
57
- value: void 0
58
- });
59
- Object.defineProperty(this, "caller", {
60
- enumerable: true,
61
- configurable: true,
62
- writable: true,
63
- value: void 0
64
- });
65
- const { clientOptions, datasetMappingFunction, ...asyncCallerParams } = config;
66
- const token = ApifyDatasetLoader._getApifyApiToken(clientOptions);
67
- this.apifyClient = new ApifyClient({ ...clientOptions, token });
68
- this.datasetId = datasetId;
69
- this.datasetMappingFunction = datasetMappingFunction;
70
- this.caller = new AsyncCaller(asyncCallerParams);
71
- }
72
- static _getApifyApiToken(config) {
73
- return config?.token ?? getEnvironmentVariable("APIFY_API_TOKEN");
74
- }
75
- /**
76
- * Retrieves the dataset items from the Apify platform and applies the
77
- * datasetMappingFunction to each item to create an array of Document
78
- * instances.
79
- * @returns An array of Document instances.
80
- */
81
- async load() {
82
- const dataset = await this.apifyClient
83
- .dataset(this.datasetId)
84
- .listItems({ clean: true });
85
- const documentList = await Promise.all(dataset.items.map((item) => this.caller.call(async () => this.datasetMappingFunction(item))));
86
- return documentList.flat();
87
- }
88
- /**
89
- * Create an ApifyDatasetLoader by calling an Actor on the Apify platform and waiting for its results to be ready.
90
- * @param actorId The ID or name of the Actor on the Apify platform.
91
- * @param input The input object of the Actor that you're trying to run.
92
- * @param options Options specifying settings for the Actor run.
93
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
94
- * @returns An instance of `ApifyDatasetLoader` with the results from the Actor run.
95
- */
96
- static async fromActorCall(actorId, input, config) {
97
- const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
98
- const apifyClient = new ApifyClient({ token: apifyApiToken });
99
- const actorCall = await apifyClient
100
- .actor(actorId)
101
- .call(input, config.callOptions ?? {});
102
- return new ApifyDatasetLoader(actorCall.defaultDatasetId, {
103
- datasetMappingFunction: config.datasetMappingFunction,
104
- clientOptions: { ...config.clientOptions, token: apifyApiToken },
105
- });
106
- }
107
- /**
108
- * Create an ApifyDatasetLoader by calling a saved Actor task on the Apify platform and waiting for its results to be ready.
109
- * @param taskId The ID or name of the task on the Apify platform.
110
- * @param input The input object of the task that you're trying to run. Overrides the task's saved input.
111
- * @param options Options specifying settings for the task run.
112
- * @param options.datasetMappingFunction A function that takes a single object (an Apify dataset item) and converts it to an instance of the Document class.
113
- * @returns An instance of `ApifyDatasetLoader` with the results from the task's run.
114
- */
115
- static async fromActorTaskCall(taskId, input, config) {
116
- const apifyApiToken = ApifyDatasetLoader._getApifyApiToken(config.clientOptions);
117
- const apifyClient = new ApifyClient({ token: apifyApiToken });
118
- const taskCall = await apifyClient
119
- .task(taskId)
120
- .call(input, config.callOptions ?? {});
121
- return new ApifyDatasetLoader(taskCall.defaultDatasetId, {
122
- datasetMappingFunction: config.datasetMappingFunction,
123
- clientOptions: { ...config.clientOptions, token: apifyApiToken },
124
- });
125
- }
126
- }