langchain 0.2.19 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/dist/agents/chat/index.cjs +1 -1
- package/dist/agents/chat/index.d.ts +1 -1
- package/dist/agents/chat/index.js +1 -1
- package/dist/agents/chat_convo/index.cjs +1 -1
- package/dist/agents/chat_convo/index.d.ts +1 -1
- package/dist/agents/chat_convo/index.js +1 -1
- package/dist/agents/mrkl/index.cjs +1 -1
- package/dist/agents/mrkl/index.d.ts +1 -1
- package/dist/agents/mrkl/index.js +1 -1
- package/dist/agents/openai_functions/index.cjs +3 -3
- package/dist/agents/openai_functions/index.d.ts +1 -1
- package/dist/agents/openai_functions/index.js +3 -3
- package/dist/agents/structured_chat/index.cjs +1 -1
- package/dist/agents/structured_chat/index.d.ts +1 -1
- package/dist/agents/structured_chat/index.js +1 -1
- package/dist/agents/xml/index.cjs +1 -1
- package/dist/agents/xml/index.d.ts +1 -1
- package/dist/agents/xml/index.js +1 -1
- package/dist/chains/combine_documents/stuff.cjs +2 -2
- package/dist/chains/combine_documents/stuff.js +2 -2
- package/dist/chains/load.cjs +1 -1
- package/dist/chains/load.d.ts +1 -1
- package/dist/chains/load.js +1 -1
- package/dist/chains/openai_functions/structured_output.cjs +2 -2
- package/dist/chains/openai_functions/structured_output.d.ts +2 -2
- package/dist/chains/openai_functions/structured_output.js +2 -2
- package/dist/chains/serde.d.ts +12 -12
- package/dist/experimental/plan_and_execute/agent_executor.cjs +1 -1
- package/dist/experimental/plan_and_execute/agent_executor.js +1 -1
- package/dist/load/import_constants.cjs +2 -39
- package/dist/load/import_constants.js +2 -39
- package/dist/load/import_map.cjs +2 -3
- package/dist/load/import_map.d.ts +0 -1
- package/dist/load/import_map.js +0 -1
- package/dist/output_parsers/openai_tools.cjs +2 -4
- package/dist/output_parsers/openai_tools.d.ts +8 -3
- package/dist/output_parsers/openai_tools.js +2 -4
- package/dist/retrievers/multi_query.d.ts +1 -0
- package/dist/smith/config.d.ts +1 -5
- package/package.json +31 -854
- package/dist/document_loaders/fs/chatgpt.cjs +0 -90
- package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
- package/dist/document_loaders/fs/chatgpt.js +0 -86
- package/dist/document_loaders/fs/csv.cjs +0 -73
- package/dist/document_loaders/fs/csv.d.ts +0 -65
- package/dist/document_loaders/fs/csv.js +0 -69
- package/dist/document_loaders/fs/docx.cjs +0 -58
- package/dist/document_loaders/fs/docx.d.ts +0 -25
- package/dist/document_loaders/fs/docx.js +0 -54
- package/dist/document_loaders/fs/epub.cjs +0 -103
- package/dist/document_loaders/fs/epub.d.ts +0 -33
- package/dist/document_loaders/fs/epub.js +0 -99
- package/dist/document_loaders/fs/notion.cjs +0 -26
- package/dist/document_loaders/fs/notion.d.ts +0 -12
- package/dist/document_loaders/fs/notion.js +0 -22
- package/dist/document_loaders/fs/obsidian.cjs +0 -247
- package/dist/document_loaders/fs/obsidian.d.ts +0 -28
- package/dist/document_loaders/fs/obsidian.js +0 -240
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
- package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
- package/dist/document_loaders/fs/pdf.cjs +0 -148
- package/dist/document_loaders/fs/pdf.d.ts +0 -49
- package/dist/document_loaders/fs/pdf.js +0 -144
- package/dist/document_loaders/fs/pptx.cjs +0 -46
- package/dist/document_loaders/fs/pptx.d.ts +0 -25
- package/dist/document_loaders/fs/pptx.js +0 -42
- package/dist/document_loaders/fs/srt.cjs +0 -57
- package/dist/document_loaders/fs/srt.d.ts +0 -32
- package/dist/document_loaders/fs/srt.js +0 -50
- package/dist/document_loaders/fs/unstructured.cjs +0 -338
- package/dist/document_loaders/fs/unstructured.d.ts +0 -125
- package/dist/document_loaders/fs/unstructured.js +0 -333
- package/dist/document_loaders/web/apify_dataset.cjs +0 -130
- package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
- package/dist/document_loaders/web/apify_dataset.js +0 -126
- package/dist/document_loaders/web/assemblyai.cjs +0 -200
- package/dist/document_loaders/web/assemblyai.d.ts +0 -95
- package/dist/document_loaders/web/assemblyai.js +0 -193
- package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
- package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
- package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
- package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
- package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
- package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
- package/dist/document_loaders/web/browserbase.cjs +0 -93
- package/dist/document_loaders/web/browserbase.d.ts +0 -48
- package/dist/document_loaders/web/browserbase.js +0 -86
- package/dist/document_loaders/web/cheerio.cjs +0 -118
- package/dist/document_loaders/web/cheerio.d.ts +0 -77
- package/dist/document_loaders/web/cheerio.js +0 -114
- package/dist/document_loaders/web/college_confidential.cjs +0 -41
- package/dist/document_loaders/web/college_confidential.d.ts +0 -25
- package/dist/document_loaders/web/college_confidential.js +0 -37
- package/dist/document_loaders/web/confluence.cjs +0 -190
- package/dist/document_loaders/web/confluence.d.ts +0 -114
- package/dist/document_loaders/web/confluence.js +0 -186
- package/dist/document_loaders/web/couchbase.cjs +0 -95
- package/dist/document_loaders/web/couchbase.d.ts +0 -32
- package/dist/document_loaders/web/couchbase.js +0 -91
- package/dist/document_loaders/web/figma.cjs +0 -102
- package/dist/document_loaders/web/figma.d.ts +0 -82
- package/dist/document_loaders/web/figma.js +0 -98
- package/dist/document_loaders/web/firecrawl.cjs +0 -95
- package/dist/document_loaders/web/firecrawl.d.ts +0 -50
- package/dist/document_loaders/web/firecrawl.js +0 -88
- package/dist/document_loaders/web/gitbook.cjs +0 -110
- package/dist/document_loaders/web/gitbook.d.ts +0 -55
- package/dist/document_loaders/web/gitbook.js +0 -106
- package/dist/document_loaders/web/github.cjs +0 -615
- package/dist/document_loaders/web/github.d.ts +0 -203
- package/dist/document_loaders/web/github.js +0 -608
- package/dist/document_loaders/web/hn.cjs +0 -90
- package/dist/document_loaders/web/hn.d.ts +0 -42
- package/dist/document_loaders/web/hn.js +0 -86
- package/dist/document_loaders/web/imsdb.cjs +0 -44
- package/dist/document_loaders/web/imsdb.d.ts +0 -23
- package/dist/document_loaders/web/imsdb.js +0 -40
- package/dist/document_loaders/web/notionapi.cjs +0 -404
- package/dist/document_loaders/web/notionapi.d.ts +0 -133
- package/dist/document_loaders/web/notionapi.js +0 -392
- package/dist/document_loaders/web/notiondb.cjs +0 -199
- package/dist/document_loaders/web/notiondb.d.ts +0 -56
- package/dist/document_loaders/web/notiondb.js +0 -195
- package/dist/document_loaders/web/pdf.cjs +0 -140
- package/dist/document_loaders/web/pdf.d.ts +0 -35
- package/dist/document_loaders/web/pdf.js +0 -136
- package/dist/document_loaders/web/playwright.cjs +0 -89
- package/dist/document_loaders/web/playwright.d.ts +0 -58
- package/dist/document_loaders/web/playwright.js +0 -85
- package/dist/document_loaders/web/puppeteer.cjs +0 -139
- package/dist/document_loaders/web/puppeteer.d.ts +0 -82
- package/dist/document_loaders/web/puppeteer.js +0 -135
- package/dist/document_loaders/web/recursive_url.cjs +0 -198
- package/dist/document_loaders/web/recursive_url.d.ts +0 -33
- package/dist/document_loaders/web/recursive_url.js +0 -194
- package/dist/document_loaders/web/s3.cjs +0 -164
- package/dist/document_loaders/web/s3.d.ts +0 -78
- package/dist/document_loaders/web/s3.js +0 -137
- package/dist/document_loaders/web/searchapi.cjs +0 -150
- package/dist/document_loaders/web/searchapi.d.ts +0 -76
- package/dist/document_loaders/web/searchapi.js +0 -146
- package/dist/document_loaders/web/serpapi.cjs +0 -127
- package/dist/document_loaders/web/serpapi.d.ts +0 -62
- package/dist/document_loaders/web/serpapi.js +0 -123
- package/dist/document_loaders/web/sitemap.cjs +0 -118
- package/dist/document_loaders/web/sitemap.d.ts +0 -41
- package/dist/document_loaders/web/sitemap.js +0 -114
- package/dist/document_loaders/web/sonix_audio.cjs +0 -68
- package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
- package/dist/document_loaders/web/sonix_audio.js +0 -64
- package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
- package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
- package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
- package/dist/document_loaders/web/youtube.cjs +0 -116
- package/dist/document_loaders/web/youtube.d.ts +0 -55
- package/dist/document_loaders/web/youtube.js +0 -112
- package/dist/experimental/tools/pyinterpreter.cjs +0 -248
- package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
- package/dist/experimental/tools/pyinterpreter.js +0 -244
- package/dist/retrievers/self_query/chroma.cjs +0 -48
- package/dist/retrievers/self_query/chroma.d.ts +0 -26
- package/dist/retrievers/self_query/chroma.js +0 -44
- package/dist/retrievers/self_query/pinecone.cjs +0 -47
- package/dist/retrievers/self_query/pinecone.d.ts +0 -26
- package/dist/retrievers/self_query/pinecone.js +0 -43
- package/dist/retrievers/self_query/supabase.cjs +0 -278
- package/dist/retrievers/self_query/supabase.d.ts +0 -109
- package/dist/retrievers/self_query/supabase.js +0 -274
- package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
- package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
- package/dist/retrievers/self_query/supabase_utils.js +0 -259
- package/dist/retrievers/self_query/vectara.cjs +0 -143
- package/dist/retrievers/self_query/vectara.d.ts +0 -42
- package/dist/retrievers/self_query/vectara.js +0 -139
- package/dist/retrievers/self_query/weaviate.cjs +0 -201
- package/dist/retrievers/self_query/weaviate.d.ts +0 -99
- package/dist/retrievers/self_query/weaviate.js +0 -197
- package/dist/types/assemblyai-types.cjs +0 -2
- package/dist/types/assemblyai-types.d.ts +0 -4
- package/dist/types/assemblyai-types.js +0 -1
- package/document_loaders/fs/chatgpt.cjs +0 -1
- package/document_loaders/fs/chatgpt.d.cts +0 -1
- package/document_loaders/fs/chatgpt.d.ts +0 -1
- package/document_loaders/fs/chatgpt.js +0 -1
- package/document_loaders/fs/csv.cjs +0 -1
- package/document_loaders/fs/csv.d.cts +0 -1
- package/document_loaders/fs/csv.d.ts +0 -1
- package/document_loaders/fs/csv.js +0 -1
- package/document_loaders/fs/docx.cjs +0 -1
- package/document_loaders/fs/docx.d.cts +0 -1
- package/document_loaders/fs/docx.d.ts +0 -1
- package/document_loaders/fs/docx.js +0 -1
- package/document_loaders/fs/epub.cjs +0 -1
- package/document_loaders/fs/epub.d.cts +0 -1
- package/document_loaders/fs/epub.d.ts +0 -1
- package/document_loaders/fs/epub.js +0 -1
- package/document_loaders/fs/notion.cjs +0 -1
- package/document_loaders/fs/notion.d.cts +0 -1
- package/document_loaders/fs/notion.d.ts +0 -1
- package/document_loaders/fs/notion.js +0 -1
- package/document_loaders/fs/obsidian.cjs +0 -1
- package/document_loaders/fs/obsidian.d.cts +0 -1
- package/document_loaders/fs/obsidian.d.ts +0 -1
- package/document_loaders/fs/obsidian.js +0 -1
- package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
- package/document_loaders/fs/openai_whisper_audio.js +0 -1
- package/document_loaders/fs/pdf.cjs +0 -1
- package/document_loaders/fs/pdf.d.cts +0 -1
- package/document_loaders/fs/pdf.d.ts +0 -1
- package/document_loaders/fs/pdf.js +0 -1
- package/document_loaders/fs/pptx.cjs +0 -1
- package/document_loaders/fs/pptx.d.cts +0 -1
- package/document_loaders/fs/pptx.d.ts +0 -1
- package/document_loaders/fs/pptx.js +0 -1
- package/document_loaders/fs/srt.cjs +0 -1
- package/document_loaders/fs/srt.d.cts +0 -1
- package/document_loaders/fs/srt.d.ts +0 -1
- package/document_loaders/fs/srt.js +0 -1
- package/document_loaders/fs/unstructured.cjs +0 -1
- package/document_loaders/fs/unstructured.d.cts +0 -1
- package/document_loaders/fs/unstructured.d.ts +0 -1
- package/document_loaders/fs/unstructured.js +0 -1
- package/document_loaders/web/apify_dataset.cjs +0 -1
- package/document_loaders/web/apify_dataset.d.cts +0 -1
- package/document_loaders/web/apify_dataset.d.ts +0 -1
- package/document_loaders/web/apify_dataset.js +0 -1
- package/document_loaders/web/assemblyai.cjs +0 -1
- package/document_loaders/web/assemblyai.d.cts +0 -1
- package/document_loaders/web/assemblyai.d.ts +0 -1
- package/document_loaders/web/assemblyai.js +0 -1
- package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_container.js +0 -1
- package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_file.js +0 -1
- package/document_loaders/web/browserbase.cjs +0 -1
- package/document_loaders/web/browserbase.d.cts +0 -1
- package/document_loaders/web/browserbase.d.ts +0 -1
- package/document_loaders/web/browserbase.js +0 -1
- package/document_loaders/web/cheerio.cjs +0 -1
- package/document_loaders/web/cheerio.d.cts +0 -1
- package/document_loaders/web/cheerio.d.ts +0 -1
- package/document_loaders/web/cheerio.js +0 -1
- package/document_loaders/web/college_confidential.cjs +0 -1
- package/document_loaders/web/college_confidential.d.cts +0 -1
- package/document_loaders/web/college_confidential.d.ts +0 -1
- package/document_loaders/web/college_confidential.js +0 -1
- package/document_loaders/web/confluence.cjs +0 -1
- package/document_loaders/web/confluence.d.cts +0 -1
- package/document_loaders/web/confluence.d.ts +0 -1
- package/document_loaders/web/confluence.js +0 -1
- package/document_loaders/web/couchbase.cjs +0 -1
- package/document_loaders/web/couchbase.d.cts +0 -1
- package/document_loaders/web/couchbase.d.ts +0 -1
- package/document_loaders/web/couchbase.js +0 -1
- package/document_loaders/web/figma.cjs +0 -1
- package/document_loaders/web/figma.d.cts +0 -1
- package/document_loaders/web/figma.d.ts +0 -1
- package/document_loaders/web/figma.js +0 -1
- package/document_loaders/web/firecrawl.cjs +0 -1
- package/document_loaders/web/firecrawl.d.cts +0 -1
- package/document_loaders/web/firecrawl.d.ts +0 -1
- package/document_loaders/web/firecrawl.js +0 -1
- package/document_loaders/web/gitbook.cjs +0 -1
- package/document_loaders/web/gitbook.d.cts +0 -1
- package/document_loaders/web/gitbook.d.ts +0 -1
- package/document_loaders/web/gitbook.js +0 -1
- package/document_loaders/web/github.cjs +0 -1
- package/document_loaders/web/github.d.cts +0 -1
- package/document_loaders/web/github.d.ts +0 -1
- package/document_loaders/web/github.js +0 -1
- package/document_loaders/web/hn.cjs +0 -1
- package/document_loaders/web/hn.d.cts +0 -1
- package/document_loaders/web/hn.d.ts +0 -1
- package/document_loaders/web/hn.js +0 -1
- package/document_loaders/web/imsdb.cjs +0 -1
- package/document_loaders/web/imsdb.d.cts +0 -1
- package/document_loaders/web/imsdb.d.ts +0 -1
- package/document_loaders/web/imsdb.js +0 -1
- package/document_loaders/web/notionapi.cjs +0 -1
- package/document_loaders/web/notionapi.d.cts +0 -1
- package/document_loaders/web/notionapi.d.ts +0 -1
- package/document_loaders/web/notionapi.js +0 -1
- package/document_loaders/web/notiondb.cjs +0 -1
- package/document_loaders/web/notiondb.d.cts +0 -1
- package/document_loaders/web/notiondb.d.ts +0 -1
- package/document_loaders/web/notiondb.js +0 -1
- package/document_loaders/web/pdf.cjs +0 -1
- package/document_loaders/web/pdf.d.cts +0 -1
- package/document_loaders/web/pdf.d.ts +0 -1
- package/document_loaders/web/pdf.js +0 -1
- package/document_loaders/web/playwright.cjs +0 -1
- package/document_loaders/web/playwright.d.cts +0 -1
- package/document_loaders/web/playwright.d.ts +0 -1
- package/document_loaders/web/playwright.js +0 -1
- package/document_loaders/web/puppeteer.cjs +0 -1
- package/document_loaders/web/puppeteer.d.cts +0 -1
- package/document_loaders/web/puppeteer.d.ts +0 -1
- package/document_loaders/web/puppeteer.js +0 -1
- package/document_loaders/web/recursive_url.cjs +0 -1
- package/document_loaders/web/recursive_url.d.cts +0 -1
- package/document_loaders/web/recursive_url.d.ts +0 -1
- package/document_loaders/web/recursive_url.js +0 -1
- package/document_loaders/web/s3.cjs +0 -1
- package/document_loaders/web/s3.d.cts +0 -1
- package/document_loaders/web/s3.d.ts +0 -1
- package/document_loaders/web/s3.js +0 -1
- package/document_loaders/web/searchapi.cjs +0 -1
- package/document_loaders/web/searchapi.d.cts +0 -1
- package/document_loaders/web/searchapi.d.ts +0 -1
- package/document_loaders/web/searchapi.js +0 -1
- package/document_loaders/web/serpapi.cjs +0 -1
- package/document_loaders/web/serpapi.d.cts +0 -1
- package/document_loaders/web/serpapi.d.ts +0 -1
- package/document_loaders/web/serpapi.js +0 -1
- package/document_loaders/web/sitemap.cjs +0 -1
- package/document_loaders/web/sitemap.d.cts +0 -1
- package/document_loaders/web/sitemap.d.ts +0 -1
- package/document_loaders/web/sitemap.js +0 -1
- package/document_loaders/web/sonix_audio.cjs +0 -1
- package/document_loaders/web/sonix_audio.d.cts +0 -1
- package/document_loaders/web/sonix_audio.d.ts +0 -1
- package/document_loaders/web/sonix_audio.js +0 -1
- package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.js +0 -1
- package/document_loaders/web/youtube.cjs +0 -1
- package/document_loaders/web/youtube.d.cts +0 -1
- package/document_loaders/web/youtube.d.ts +0 -1
- package/document_loaders/web/youtube.js +0 -1
- package/experimental/tools/pyinterpreter.cjs +0 -1
- package/experimental/tools/pyinterpreter.d.cts +0 -1
- package/experimental/tools/pyinterpreter.d.ts +0 -1
- package/experimental/tools/pyinterpreter.js +0 -1
- package/memory/index.cjs +0 -1
- package/memory/index.d.cts +0 -1
- package/memory/index.d.ts +0 -1
- package/memory/index.js +0 -1
- package/retrievers/self_query/chroma.cjs +0 -1
- package/retrievers/self_query/chroma.d.cts +0 -1
- package/retrievers/self_query/chroma.d.ts +0 -1
- package/retrievers/self_query/chroma.js +0 -1
- package/retrievers/self_query/pinecone.cjs +0 -1
- package/retrievers/self_query/pinecone.d.cts +0 -1
- package/retrievers/self_query/pinecone.d.ts +0 -1
- package/retrievers/self_query/pinecone.js +0 -1
- package/retrievers/self_query/supabase.cjs +0 -1
- package/retrievers/self_query/supabase.d.cts +0 -1
- package/retrievers/self_query/supabase.d.ts +0 -1
- package/retrievers/self_query/supabase.js +0 -1
- package/retrievers/self_query/vectara.cjs +0 -1
- package/retrievers/self_query/vectara.d.cts +0 -1
- package/retrievers/self_query/vectara.d.ts +0 -1
- package/retrievers/self_query/vectara.js +0 -1
- package/retrievers/self_query/weaviate.cjs +0 -1
- package/retrievers/self_query/weaviate.d.cts +0 -1
- package/retrievers/self_query/weaviate.d.ts +0 -1
- package/retrievers/self_query/weaviate.js +0 -1
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
-
if (k2 === undefined) k2 = k;
|
|
4
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
-
}
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
-
if (mod && mod.__esModule) return mod;
|
|
20
|
-
var result = {};
|
|
21
|
-
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
-
__setModuleDefault(result, mod);
|
|
23
|
-
return result;
|
|
24
|
-
};
|
|
25
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.AzureBlobStorageFileLoader = void 0;
|
|
27
|
-
const fs = __importStar(require("node:fs"));
|
|
28
|
-
const path = __importStar(require("node:path"));
|
|
29
|
-
const os = __importStar(require("node:os"));
|
|
30
|
-
const storage_blob_1 = require("@azure/storage-blob");
|
|
31
|
-
const base_js_1 = require("../base.cjs");
|
|
32
|
-
const unstructured_js_1 = require("../fs/unstructured.cjs");
|
|
33
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
34
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
35
|
-
oldEntrypointName: "document_loaders/web/azure_blog_storage_file",
|
|
36
|
-
newPackageName: "@langchain/community",
|
|
37
|
-
});
|
|
38
|
-
/**
|
|
39
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/azure_blog_storage_file" instead. This entrypoint will be removed in 0.3.0.
|
|
40
|
-
* Class representing a document loader that loads a specific file from
|
|
41
|
-
* Azure Blob Storage. It extends the BaseDocumentLoader class and
|
|
42
|
-
* implements the DocumentLoader interface.
|
|
43
|
-
* @example
|
|
44
|
-
* ```typescript
|
|
45
|
-
* const loader = new AzureBlobStorageFileLoader({
|
|
46
|
-
* azureConfig: {
|
|
47
|
-
* connectionString: "{connectionString}",
|
|
48
|
-
* container: "{containerName}",
|
|
49
|
-
* blobName: "{blobName}",
|
|
50
|
-
* },
|
|
51
|
-
* });
|
|
52
|
-
* const docs = await loader.load();
|
|
53
|
-
* ```
|
|
54
|
-
*/
|
|
55
|
-
class AzureBlobStorageFileLoader extends base_js_1.BaseDocumentLoader {
|
|
56
|
-
constructor({ azureConfig, unstructuredConfig, }) {
|
|
57
|
-
super();
|
|
58
|
-
Object.defineProperty(this, "connectionString", {
|
|
59
|
-
enumerable: true,
|
|
60
|
-
configurable: true,
|
|
61
|
-
writable: true,
|
|
62
|
-
value: void 0
|
|
63
|
-
});
|
|
64
|
-
Object.defineProperty(this, "container", {
|
|
65
|
-
enumerable: true,
|
|
66
|
-
configurable: true,
|
|
67
|
-
writable: true,
|
|
68
|
-
value: void 0
|
|
69
|
-
});
|
|
70
|
-
Object.defineProperty(this, "blobName", {
|
|
71
|
-
enumerable: true,
|
|
72
|
-
configurable: true,
|
|
73
|
-
writable: true,
|
|
74
|
-
value: void 0
|
|
75
|
-
});
|
|
76
|
-
Object.defineProperty(this, "unstructuredConfig", {
|
|
77
|
-
enumerable: true,
|
|
78
|
-
configurable: true,
|
|
79
|
-
writable: true,
|
|
80
|
-
value: void 0
|
|
81
|
-
});
|
|
82
|
-
this.connectionString = azureConfig.connectionString;
|
|
83
|
-
this.container = azureConfig.container;
|
|
84
|
-
this.blobName = azureConfig.blobName;
|
|
85
|
-
this.unstructuredConfig = unstructuredConfig;
|
|
86
|
-
}
|
|
87
|
-
/**
|
|
88
|
-
* Method to load a specific file from Azure Blob Storage. It creates a
|
|
89
|
-
* temporary directory, constructs the file path, downloads the file, and
|
|
90
|
-
* loads the documents using the UnstructuredLoader. The loaded documents
|
|
91
|
-
* are returned, and the temporary directory is deleted.
|
|
92
|
-
* @returns An array of documents loaded from the file in Azure Blob Storage.
|
|
93
|
-
*/
|
|
94
|
-
async load() {
|
|
95
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "azureblobfileloader-"));
|
|
96
|
-
const filePath = path.join(tempDir, this.blobName);
|
|
97
|
-
try {
|
|
98
|
-
const blobServiceClient = storage_blob_1.BlobServiceClient.fromConnectionString(this.connectionString, {
|
|
99
|
-
userAgentOptions: {
|
|
100
|
-
userAgentPrefix: "langchainjs-blob-storage-file",
|
|
101
|
-
},
|
|
102
|
-
});
|
|
103
|
-
const containerClient = blobServiceClient.getContainerClient(this.container);
|
|
104
|
-
const blobClient = containerClient.getBlobClient(this.blobName);
|
|
105
|
-
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
106
|
-
await blobClient.downloadToFile(filePath);
|
|
107
|
-
}
|
|
108
|
-
catch (e) {
|
|
109
|
-
throw new Error(`Failed to download file ${this.blobName} from Azure Blob Storage container ${this.container}: ${e.message}`);
|
|
110
|
-
}
|
|
111
|
-
try {
|
|
112
|
-
const unstructuredLoader = new unstructured_js_1.UnstructuredLoader(filePath, this.unstructuredConfig);
|
|
113
|
-
const docs = await unstructuredLoader.load();
|
|
114
|
-
return docs;
|
|
115
|
-
}
|
|
116
|
-
catch {
|
|
117
|
-
throw new Error(`Failed to load file ${filePath} using unstructured loader.`);
|
|
118
|
-
}
|
|
119
|
-
finally {
|
|
120
|
-
fs.rmSync(path.dirname(filePath), { recursive: true, force: true });
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
exports.AzureBlobStorageFileLoader = AzureBlobStorageFileLoader;
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
2
|
-
import { UnstructuredLoaderOptions } from "../fs/unstructured.js";
|
|
3
|
-
/**
|
|
4
|
-
* Interface representing the configuration for accessing a specific file
|
|
5
|
-
* in Azure Blob Storage.
|
|
6
|
-
*/
|
|
7
|
-
interface AzureBlobStorageFileConfig {
|
|
8
|
-
connectionString: string;
|
|
9
|
-
container: string;
|
|
10
|
-
blobName: string;
|
|
11
|
-
}
|
|
12
|
-
/**
|
|
13
|
-
* Interface representing the configuration for the
|
|
14
|
-
* AzureBlobStorageFileLoader. It contains the Azure Blob Storage file
|
|
15
|
-
* configuration and the options for the UnstructuredLoader.
|
|
16
|
-
*/
|
|
17
|
-
interface AzureBlobStorageFileLoaderConfig {
|
|
18
|
-
azureConfig: AzureBlobStorageFileConfig;
|
|
19
|
-
unstructuredConfig?: UnstructuredLoaderOptions;
|
|
20
|
-
}
|
|
21
|
-
/**
|
|
22
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/azure_blog_storage_file" instead. This entrypoint will be removed in 0.3.0.
|
|
23
|
-
* Class representing a document loader that loads a specific file from
|
|
24
|
-
* Azure Blob Storage. It extends the BaseDocumentLoader class and
|
|
25
|
-
* implements the DocumentLoader interface.
|
|
26
|
-
* @example
|
|
27
|
-
* ```typescript
|
|
28
|
-
* const loader = new AzureBlobStorageFileLoader({
|
|
29
|
-
* azureConfig: {
|
|
30
|
-
* connectionString: "{connectionString}",
|
|
31
|
-
* container: "{containerName}",
|
|
32
|
-
* blobName: "{blobName}",
|
|
33
|
-
* },
|
|
34
|
-
* });
|
|
35
|
-
* const docs = await loader.load();
|
|
36
|
-
* ```
|
|
37
|
-
*/
|
|
38
|
-
export declare class AzureBlobStorageFileLoader extends BaseDocumentLoader {
|
|
39
|
-
private readonly connectionString;
|
|
40
|
-
private readonly container;
|
|
41
|
-
private readonly blobName;
|
|
42
|
-
private readonly unstructuredConfig?;
|
|
43
|
-
constructor({ azureConfig, unstructuredConfig, }: AzureBlobStorageFileLoaderConfig);
|
|
44
|
-
/**
|
|
45
|
-
* Method to load a specific file from Azure Blob Storage. It creates a
|
|
46
|
-
* temporary directory, constructs the file path, downloads the file, and
|
|
47
|
-
* loads the documents using the UnstructuredLoader. The loaded documents
|
|
48
|
-
* are returned, and the temporary directory is deleted.
|
|
49
|
-
* @returns An array of documents loaded from the file in Azure Blob Storage.
|
|
50
|
-
*/
|
|
51
|
-
load(): Promise<import("@langchain/core/documents").Document<Record<string, any>>[]>;
|
|
52
|
-
}
|
|
53
|
-
export {};
|
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
import * as fs from "node:fs";
|
|
2
|
-
import * as path from "node:path";
|
|
3
|
-
import * as os from "node:os";
|
|
4
|
-
import { BlobServiceClient } from "@azure/storage-blob";
|
|
5
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
6
|
-
import { UnstructuredLoader, } from "../fs/unstructured.js";
|
|
7
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
8
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
9
|
-
oldEntrypointName: "document_loaders/web/azure_blog_storage_file",
|
|
10
|
-
newPackageName: "@langchain/community",
|
|
11
|
-
});
|
|
12
|
-
/**
|
|
13
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/azure_blog_storage_file" instead. This entrypoint will be removed in 0.3.0.
|
|
14
|
-
* Class representing a document loader that loads a specific file from
|
|
15
|
-
* Azure Blob Storage. It extends the BaseDocumentLoader class and
|
|
16
|
-
* implements the DocumentLoader interface.
|
|
17
|
-
* @example
|
|
18
|
-
* ```typescript
|
|
19
|
-
* const loader = new AzureBlobStorageFileLoader({
|
|
20
|
-
* azureConfig: {
|
|
21
|
-
* connectionString: "{connectionString}",
|
|
22
|
-
* container: "{containerName}",
|
|
23
|
-
* blobName: "{blobName}",
|
|
24
|
-
* },
|
|
25
|
-
* });
|
|
26
|
-
* const docs = await loader.load();
|
|
27
|
-
* ```
|
|
28
|
-
*/
|
|
29
|
-
export class AzureBlobStorageFileLoader extends BaseDocumentLoader {
|
|
30
|
-
constructor({ azureConfig, unstructuredConfig, }) {
|
|
31
|
-
super();
|
|
32
|
-
Object.defineProperty(this, "connectionString", {
|
|
33
|
-
enumerable: true,
|
|
34
|
-
configurable: true,
|
|
35
|
-
writable: true,
|
|
36
|
-
value: void 0
|
|
37
|
-
});
|
|
38
|
-
Object.defineProperty(this, "container", {
|
|
39
|
-
enumerable: true,
|
|
40
|
-
configurable: true,
|
|
41
|
-
writable: true,
|
|
42
|
-
value: void 0
|
|
43
|
-
});
|
|
44
|
-
Object.defineProperty(this, "blobName", {
|
|
45
|
-
enumerable: true,
|
|
46
|
-
configurable: true,
|
|
47
|
-
writable: true,
|
|
48
|
-
value: void 0
|
|
49
|
-
});
|
|
50
|
-
Object.defineProperty(this, "unstructuredConfig", {
|
|
51
|
-
enumerable: true,
|
|
52
|
-
configurable: true,
|
|
53
|
-
writable: true,
|
|
54
|
-
value: void 0
|
|
55
|
-
});
|
|
56
|
-
this.connectionString = azureConfig.connectionString;
|
|
57
|
-
this.container = azureConfig.container;
|
|
58
|
-
this.blobName = azureConfig.blobName;
|
|
59
|
-
this.unstructuredConfig = unstructuredConfig;
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* Method to load a specific file from Azure Blob Storage. It creates a
|
|
63
|
-
* temporary directory, constructs the file path, downloads the file, and
|
|
64
|
-
* loads the documents using the UnstructuredLoader. The loaded documents
|
|
65
|
-
* are returned, and the temporary directory is deleted.
|
|
66
|
-
* @returns An array of documents loaded from the file in Azure Blob Storage.
|
|
67
|
-
*/
|
|
68
|
-
async load() {
|
|
69
|
-
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "azureblobfileloader-"));
|
|
70
|
-
const filePath = path.join(tempDir, this.blobName);
|
|
71
|
-
try {
|
|
72
|
-
const blobServiceClient = BlobServiceClient.fromConnectionString(this.connectionString, {
|
|
73
|
-
userAgentOptions: {
|
|
74
|
-
userAgentPrefix: "langchainjs-blob-storage-file",
|
|
75
|
-
},
|
|
76
|
-
});
|
|
77
|
-
const containerClient = blobServiceClient.getContainerClient(this.container);
|
|
78
|
-
const blobClient = containerClient.getBlobClient(this.blobName);
|
|
79
|
-
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
80
|
-
await blobClient.downloadToFile(filePath);
|
|
81
|
-
}
|
|
82
|
-
catch (e) {
|
|
83
|
-
throw new Error(`Failed to download file ${this.blobName} from Azure Blob Storage container ${this.container}: ${e.message}`);
|
|
84
|
-
}
|
|
85
|
-
try {
|
|
86
|
-
const unstructuredLoader = new UnstructuredLoader(filePath, this.unstructuredConfig);
|
|
87
|
-
const docs = await unstructuredLoader.load();
|
|
88
|
-
return docs;
|
|
89
|
-
}
|
|
90
|
-
catch {
|
|
91
|
-
throw new Error(`Failed to load file ${filePath} using unstructured loader.`);
|
|
92
|
-
}
|
|
93
|
-
finally {
|
|
94
|
-
fs.rmSync(path.dirname(filePath), { recursive: true, force: true });
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
}
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.BrowserbaseLoader = void 0;
|
|
7
|
-
const documents_1 = require("@langchain/core/documents");
|
|
8
|
-
const sdk_1 = __importDefault(require("@browserbasehq/sdk"));
|
|
9
|
-
const base_js_1 = require("../base.cjs");
|
|
10
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
11
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
12
|
-
oldEntrypointName: "document_loaders/web/browserbase",
|
|
13
|
-
newPackageName: "@langchain/community",
|
|
14
|
-
});
|
|
15
|
-
/**
|
|
16
|
-
* @deprecated Import from "@langchain/community/document_loaders/web/browserbase" instead. This entrypoint will be removed in 0.3.0.
|
|
17
|
-
* Load pre-rendered web pages using a headless browser hosted on Browserbase.
|
|
18
|
-
*
|
|
19
|
-
* Depends on `@browserbasehq/sdk` package.
|
|
20
|
-
* Get your API key from https://browserbase.com
|
|
21
|
-
*
|
|
22
|
-
* @example
|
|
23
|
-
* ```typescript
|
|
24
|
-
* import { BrowserbaseLoader } from "langchain/document_loaders/web/browserbase";
|
|
25
|
-
*
|
|
26
|
-
* const loader = new BrowserbaseLoader(["https://example.com"], {
|
|
27
|
-
* apiKey: process.env.BROWSERBASE_API_KEY,
|
|
28
|
-
* textContent: true,
|
|
29
|
-
* });
|
|
30
|
-
*
|
|
31
|
-
* const docs = await loader.load();
|
|
32
|
-
* ```
|
|
33
|
-
*
|
|
34
|
-
* @param {string[]} urls - The URLs of the web pages to load.
|
|
35
|
-
* @param {BrowserbaseLoaderOptions} [options] - Browserbase client options.
|
|
36
|
-
*/
|
|
37
|
-
class BrowserbaseLoader extends base_js_1.BaseDocumentLoader {
|
|
38
|
-
constructor(urls, options = {}) {
|
|
39
|
-
super();
|
|
40
|
-
Object.defineProperty(this, "urls", {
|
|
41
|
-
enumerable: true,
|
|
42
|
-
configurable: true,
|
|
43
|
-
writable: true,
|
|
44
|
-
value: void 0
|
|
45
|
-
});
|
|
46
|
-
Object.defineProperty(this, "options", {
|
|
47
|
-
enumerable: true,
|
|
48
|
-
configurable: true,
|
|
49
|
-
writable: true,
|
|
50
|
-
value: void 0
|
|
51
|
-
});
|
|
52
|
-
Object.defineProperty(this, "browserbase", {
|
|
53
|
-
enumerable: true,
|
|
54
|
-
configurable: true,
|
|
55
|
-
writable: true,
|
|
56
|
-
value: void 0
|
|
57
|
-
});
|
|
58
|
-
this.urls = urls;
|
|
59
|
-
this.options = options;
|
|
60
|
-
this.browserbase = new sdk_1.default(options);
|
|
61
|
-
}
|
|
62
|
-
/**
|
|
63
|
-
* Load pages from URLs.
|
|
64
|
-
*
|
|
65
|
-
* @returns {Promise<DocumentInterface[]>} - A promise which resolves to a list of documents.
|
|
66
|
-
*/
|
|
67
|
-
async load() {
|
|
68
|
-
const documents = [];
|
|
69
|
-
for await (const doc of this.lazyLoad()) {
|
|
70
|
-
documents.push(doc);
|
|
71
|
-
}
|
|
72
|
-
return documents;
|
|
73
|
-
}
|
|
74
|
-
/**
|
|
75
|
-
* Load pages from URLs.
|
|
76
|
-
*
|
|
77
|
-
* @returns {Generator<DocumentInterface>} - A generator that yields documents.
|
|
78
|
-
*/
|
|
79
|
-
async *lazyLoad() {
|
|
80
|
-
const pages = await this.browserbase.loadURLs(this.urls, this.options);
|
|
81
|
-
let index = 0;
|
|
82
|
-
for await (const page of pages) {
|
|
83
|
-
yield new documents_1.Document({
|
|
84
|
-
pageContent: page,
|
|
85
|
-
metadata: {
|
|
86
|
-
url: this.urls[index],
|
|
87
|
-
},
|
|
88
|
-
});
|
|
89
|
-
index += index + 1;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
exports.BrowserbaseLoader = BrowserbaseLoader;
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import { Document, type DocumentInterface } from "@langchain/core/documents";
|
|
2
|
-
import Browserbase, { LoadOptions, ClientOptions } from "@browserbasehq/sdk";
|
|
3
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
4
|
-
import type { DocumentLoader } from "../base.js";
|
|
5
|
-
type BrowserbaseLoaderOptions = ClientOptions & LoadOptions;
|
|
6
|
-
/**
|
|
7
|
-
* @deprecated Import from "@langchain/community/document_loaders/web/browserbase" instead. This entrypoint will be removed in 0.3.0.
|
|
8
|
-
* Load pre-rendered web pages using a headless browser hosted on Browserbase.
|
|
9
|
-
*
|
|
10
|
-
* Depends on `@browserbasehq/sdk` package.
|
|
11
|
-
* Get your API key from https://browserbase.com
|
|
12
|
-
*
|
|
13
|
-
* @example
|
|
14
|
-
* ```typescript
|
|
15
|
-
* import { BrowserbaseLoader } from "langchain/document_loaders/web/browserbase";
|
|
16
|
-
*
|
|
17
|
-
* const loader = new BrowserbaseLoader(["https://example.com"], {
|
|
18
|
-
* apiKey: process.env.BROWSERBASE_API_KEY,
|
|
19
|
-
* textContent: true,
|
|
20
|
-
* });
|
|
21
|
-
*
|
|
22
|
-
* const docs = await loader.load();
|
|
23
|
-
* ```
|
|
24
|
-
*
|
|
25
|
-
* @param {string[]} urls - The URLs of the web pages to load.
|
|
26
|
-
* @param {BrowserbaseLoaderOptions} [options] - Browserbase client options.
|
|
27
|
-
*/
|
|
28
|
-
export declare class BrowserbaseLoader extends BaseDocumentLoader implements DocumentLoader {
|
|
29
|
-
urls: string[];
|
|
30
|
-
options: BrowserbaseLoaderOptions;
|
|
31
|
-
browserbase: Browserbase;
|
|
32
|
-
constructor(urls: string[], options?: BrowserbaseLoaderOptions);
|
|
33
|
-
/**
|
|
34
|
-
* Load pages from URLs.
|
|
35
|
-
*
|
|
36
|
-
* @returns {Promise<DocumentInterface[]>} - A promise which resolves to a list of documents.
|
|
37
|
-
*/
|
|
38
|
-
load(): Promise<DocumentInterface[]>;
|
|
39
|
-
/**
|
|
40
|
-
* Load pages from URLs.
|
|
41
|
-
*
|
|
42
|
-
* @returns {Generator<DocumentInterface>} - A generator that yields documents.
|
|
43
|
-
*/
|
|
44
|
-
lazyLoad(): AsyncGenerator<Document<{
|
|
45
|
-
url: string;
|
|
46
|
-
}>, void, unknown>;
|
|
47
|
-
}
|
|
48
|
-
export {};
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import Browserbase from "@browserbasehq/sdk";
|
|
3
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
4
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
5
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
6
|
-
oldEntrypointName: "document_loaders/web/browserbase",
|
|
7
|
-
newPackageName: "@langchain/community",
|
|
8
|
-
});
|
|
9
|
-
/**
|
|
10
|
-
* @deprecated Import from "@langchain/community/document_loaders/web/browserbase" instead. This entrypoint will be removed in 0.3.0.
|
|
11
|
-
* Load pre-rendered web pages using a headless browser hosted on Browserbase.
|
|
12
|
-
*
|
|
13
|
-
* Depends on `@browserbasehq/sdk` package.
|
|
14
|
-
* Get your API key from https://browserbase.com
|
|
15
|
-
*
|
|
16
|
-
* @example
|
|
17
|
-
* ```typescript
|
|
18
|
-
* import { BrowserbaseLoader } from "langchain/document_loaders/web/browserbase";
|
|
19
|
-
*
|
|
20
|
-
* const loader = new BrowserbaseLoader(["https://example.com"], {
|
|
21
|
-
* apiKey: process.env.BROWSERBASE_API_KEY,
|
|
22
|
-
* textContent: true,
|
|
23
|
-
* });
|
|
24
|
-
*
|
|
25
|
-
* const docs = await loader.load();
|
|
26
|
-
* ```
|
|
27
|
-
*
|
|
28
|
-
* @param {string[]} urls - The URLs of the web pages to load.
|
|
29
|
-
* @param {BrowserbaseLoaderOptions} [options] - Browserbase client options.
|
|
30
|
-
*/
|
|
31
|
-
export class BrowserbaseLoader extends BaseDocumentLoader {
|
|
32
|
-
constructor(urls, options = {}) {
|
|
33
|
-
super();
|
|
34
|
-
Object.defineProperty(this, "urls", {
|
|
35
|
-
enumerable: true,
|
|
36
|
-
configurable: true,
|
|
37
|
-
writable: true,
|
|
38
|
-
value: void 0
|
|
39
|
-
});
|
|
40
|
-
Object.defineProperty(this, "options", {
|
|
41
|
-
enumerable: true,
|
|
42
|
-
configurable: true,
|
|
43
|
-
writable: true,
|
|
44
|
-
value: void 0
|
|
45
|
-
});
|
|
46
|
-
Object.defineProperty(this, "browserbase", {
|
|
47
|
-
enumerable: true,
|
|
48
|
-
configurable: true,
|
|
49
|
-
writable: true,
|
|
50
|
-
value: void 0
|
|
51
|
-
});
|
|
52
|
-
this.urls = urls;
|
|
53
|
-
this.options = options;
|
|
54
|
-
this.browserbase = new Browserbase(options);
|
|
55
|
-
}
|
|
56
|
-
/**
|
|
57
|
-
* Load pages from URLs.
|
|
58
|
-
*
|
|
59
|
-
* @returns {Promise<DocumentInterface[]>} - A promise which resolves to a list of documents.
|
|
60
|
-
*/
|
|
61
|
-
async load() {
|
|
62
|
-
const documents = [];
|
|
63
|
-
for await (const doc of this.lazyLoad()) {
|
|
64
|
-
documents.push(doc);
|
|
65
|
-
}
|
|
66
|
-
return documents;
|
|
67
|
-
}
|
|
68
|
-
/**
|
|
69
|
-
* Load pages from URLs.
|
|
70
|
-
*
|
|
71
|
-
* @returns {Generator<DocumentInterface>} - A generator that yields documents.
|
|
72
|
-
*/
|
|
73
|
-
async *lazyLoad() {
|
|
74
|
-
const pages = await this.browserbase.loadURLs(this.urls, this.options);
|
|
75
|
-
let index = 0;
|
|
76
|
-
for await (const page of pages) {
|
|
77
|
-
yield new Document({
|
|
78
|
-
pageContent: page,
|
|
79
|
-
metadata: {
|
|
80
|
-
url: this.urls[index],
|
|
81
|
-
},
|
|
82
|
-
});
|
|
83
|
-
index += index + 1;
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
}
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CheerioWebBaseLoader = void 0;
|
|
4
|
-
const documents_1 = require("@langchain/core/documents");
|
|
5
|
-
const async_caller_1 = require("@langchain/core/utils/async_caller");
|
|
6
|
-
const base_js_1 = require("../base.cjs");
|
|
7
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
8
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
9
|
-
oldEntrypointName: "document_loaders/web/cheerio",
|
|
10
|
-
newPackageName: "@langchain/community",
|
|
11
|
-
});
|
|
12
|
-
/**
|
|
13
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/cheerio" instead. This entrypoint will be removed in 0.3.0.
|
|
14
|
-
*
|
|
15
|
-
* A class that extends the BaseDocumentLoader and implements the
|
|
16
|
-
* DocumentLoader interface. It represents a document loader for loading
|
|
17
|
-
* web-based documents using Cheerio.
|
|
18
|
-
* @example
|
|
19
|
-
* ```typescript
|
|
20
|
-
* const loader = new CheerioWebBaseLoader("https:exampleurl.com");
|
|
21
|
-
* const docs = await loader.load();
|
|
22
|
-
* console.log({ docs });
|
|
23
|
-
* ```
|
|
24
|
-
*/
|
|
25
|
-
class CheerioWebBaseLoader extends base_js_1.BaseDocumentLoader {
|
|
26
|
-
constructor(webPath, fields) {
|
|
27
|
-
super();
|
|
28
|
-
Object.defineProperty(this, "webPath", {
|
|
29
|
-
enumerable: true,
|
|
30
|
-
configurable: true,
|
|
31
|
-
writable: true,
|
|
32
|
-
value: webPath
|
|
33
|
-
});
|
|
34
|
-
Object.defineProperty(this, "timeout", {
|
|
35
|
-
enumerable: true,
|
|
36
|
-
configurable: true,
|
|
37
|
-
writable: true,
|
|
38
|
-
value: void 0
|
|
39
|
-
});
|
|
40
|
-
Object.defineProperty(this, "caller", {
|
|
41
|
-
enumerable: true,
|
|
42
|
-
configurable: true,
|
|
43
|
-
writable: true,
|
|
44
|
-
value: void 0
|
|
45
|
-
});
|
|
46
|
-
Object.defineProperty(this, "selector", {
|
|
47
|
-
enumerable: true,
|
|
48
|
-
configurable: true,
|
|
49
|
-
writable: true,
|
|
50
|
-
value: void 0
|
|
51
|
-
});
|
|
52
|
-
Object.defineProperty(this, "textDecoder", {
|
|
53
|
-
enumerable: true,
|
|
54
|
-
configurable: true,
|
|
55
|
-
writable: true,
|
|
56
|
-
value: void 0
|
|
57
|
-
});
|
|
58
|
-
const { timeout, selector, textDecoder, ...rest } = fields ?? {};
|
|
59
|
-
this.timeout = timeout ?? 10000;
|
|
60
|
-
this.caller = new async_caller_1.AsyncCaller(rest);
|
|
61
|
-
this.selector = selector ?? "body";
|
|
62
|
-
this.textDecoder = textDecoder;
|
|
63
|
-
}
|
|
64
|
-
/**
|
|
65
|
-
* Fetches web documents from the given array of URLs and loads them using Cheerio.
|
|
66
|
-
* It returns an array of CheerioAPI instances.
|
|
67
|
-
* @param urls An array of URLs to fetch and load.
|
|
68
|
-
* @returns A Promise that resolves to an array of CheerioAPI instances.
|
|
69
|
-
*/
|
|
70
|
-
static async scrapeAll(urls, caller, timeout, textDecoder, options) {
|
|
71
|
-
return Promise.all(urls.map((url) => CheerioWebBaseLoader._scrape(url, caller, timeout, textDecoder, options)));
|
|
72
|
-
}
|
|
73
|
-
static async _scrape(url, caller, timeout, textDecoder, options) {
|
|
74
|
-
const { load } = await CheerioWebBaseLoader.imports();
|
|
75
|
-
const response = await caller.call(fetch, url, {
|
|
76
|
-
signal: timeout ? AbortSignal.timeout(timeout) : undefined,
|
|
77
|
-
});
|
|
78
|
-
const html = textDecoder?.decode(await response.arrayBuffer()) ??
|
|
79
|
-
(await response.text());
|
|
80
|
-
return load(html, options);
|
|
81
|
-
}
|
|
82
|
-
/**
|
|
83
|
-
* Fetches the web document from the webPath and loads it using Cheerio.
|
|
84
|
-
* It returns a CheerioAPI instance.
|
|
85
|
-
* @returns A Promise that resolves to a CheerioAPI instance.
|
|
86
|
-
*/
|
|
87
|
-
async scrape() {
|
|
88
|
-
return CheerioWebBaseLoader._scrape(this.webPath, this.caller, this.timeout, this.textDecoder);
|
|
89
|
-
}
|
|
90
|
-
/**
|
|
91
|
-
* Extracts the text content from the loaded document using the selector
|
|
92
|
-
* and creates a Document instance with the extracted text and metadata.
|
|
93
|
-
* It returns an array of Document instances.
|
|
94
|
-
* @returns A Promise that resolves to an array of Document instances.
|
|
95
|
-
*/
|
|
96
|
-
async load() {
|
|
97
|
-
const $ = await this.scrape();
|
|
98
|
-
const text = $(this.selector).text();
|
|
99
|
-
const metadata = { source: this.webPath };
|
|
100
|
-
return [new documents_1.Document({ pageContent: text, metadata })];
|
|
101
|
-
}
|
|
102
|
-
/**
|
|
103
|
-
* A static method that dynamically imports the Cheerio library and
|
|
104
|
-
* returns the load function. If the import fails, it throws an error.
|
|
105
|
-
* @returns A Promise that resolves to an object containing the load function from the Cheerio library.
|
|
106
|
-
*/
|
|
107
|
-
static async imports() {
|
|
108
|
-
try {
|
|
109
|
-
const { load } = await import("cheerio");
|
|
110
|
-
return { load };
|
|
111
|
-
}
|
|
112
|
-
catch (e) {
|
|
113
|
-
console.error(e);
|
|
114
|
-
throw new Error("Please install cheerio as a dependency with, e.g. `yarn add cheerio`");
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
exports.CheerioWebBaseLoader = CheerioWebBaseLoader;
|