langchain 0.2.17 → 0.3.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/executor.cjs +11 -0
- package/dist/agents/executor.d.ts +2 -0
- package/dist/agents/executor.js +11 -0
- package/dist/agents/openai_functions/index.cjs +2 -2
- package/dist/agents/openai_functions/index.js +2 -2
- package/dist/agents/react/output_parser.cjs +3 -2
- package/dist/agents/react/output_parser.js +3 -2
- package/dist/chains/combine_documents/stuff.cjs +2 -2
- package/dist/chains/combine_documents/stuff.js +2 -2
- package/dist/chains/conversational_retrieval_chain.cjs +1 -1
- package/dist/chains/conversational_retrieval_chain.d.ts +1 -1
- package/dist/chains/conversational_retrieval_chain.js +1 -1
- package/dist/chains/llm_chain.cjs +1 -1
- package/dist/chains/llm_chain.d.ts +1 -1
- package/dist/chains/llm_chain.js +1 -1
- package/dist/chains/openai_functions/openapi.cjs +3 -1
- package/dist/chains/openai_functions/openapi.js +3 -1
- package/dist/chains/retrieval_qa.cjs +1 -1
- package/dist/chains/retrieval_qa.d.ts +1 -1
- package/dist/chains/retrieval_qa.js +1 -1
- package/dist/load/import_constants.cjs +2 -39
- package/dist/load/import_constants.js +2 -39
- package/dist/load/import_map.cjs +2 -3
- package/dist/load/import_map.d.ts +0 -1
- package/dist/load/import_map.js +0 -1
- package/dist/smith/config.d.ts +1 -5
- package/package.json +35 -858
- package/dist/document_loaders/fs/chatgpt.cjs +0 -90
- package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
- package/dist/document_loaders/fs/chatgpt.js +0 -86
- package/dist/document_loaders/fs/csv.cjs +0 -73
- package/dist/document_loaders/fs/csv.d.ts +0 -65
- package/dist/document_loaders/fs/csv.js +0 -69
- package/dist/document_loaders/fs/docx.cjs +0 -58
- package/dist/document_loaders/fs/docx.d.ts +0 -25
- package/dist/document_loaders/fs/docx.js +0 -54
- package/dist/document_loaders/fs/epub.cjs +0 -103
- package/dist/document_loaders/fs/epub.d.ts +0 -33
- package/dist/document_loaders/fs/epub.js +0 -99
- package/dist/document_loaders/fs/notion.cjs +0 -26
- package/dist/document_loaders/fs/notion.d.ts +0 -12
- package/dist/document_loaders/fs/notion.js +0 -22
- package/dist/document_loaders/fs/obsidian.cjs +0 -247
- package/dist/document_loaders/fs/obsidian.d.ts +0 -28
- package/dist/document_loaders/fs/obsidian.js +0 -240
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
- package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
- package/dist/document_loaders/fs/pdf.cjs +0 -148
- package/dist/document_loaders/fs/pdf.d.ts +0 -49
- package/dist/document_loaders/fs/pdf.js +0 -144
- package/dist/document_loaders/fs/pptx.cjs +0 -46
- package/dist/document_loaders/fs/pptx.d.ts +0 -25
- package/dist/document_loaders/fs/pptx.js +0 -42
- package/dist/document_loaders/fs/srt.cjs +0 -57
- package/dist/document_loaders/fs/srt.d.ts +0 -32
- package/dist/document_loaders/fs/srt.js +0 -50
- package/dist/document_loaders/fs/unstructured.cjs +0 -338
- package/dist/document_loaders/fs/unstructured.d.ts +0 -125
- package/dist/document_loaders/fs/unstructured.js +0 -333
- package/dist/document_loaders/web/apify_dataset.cjs +0 -130
- package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
- package/dist/document_loaders/web/apify_dataset.js +0 -126
- package/dist/document_loaders/web/assemblyai.cjs +0 -200
- package/dist/document_loaders/web/assemblyai.d.ts +0 -95
- package/dist/document_loaders/web/assemblyai.js +0 -193
- package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
- package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
- package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
- package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
- package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
- package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
- package/dist/document_loaders/web/browserbase.cjs +0 -93
- package/dist/document_loaders/web/browserbase.d.ts +0 -48
- package/dist/document_loaders/web/browserbase.js +0 -86
- package/dist/document_loaders/web/cheerio.cjs +0 -118
- package/dist/document_loaders/web/cheerio.d.ts +0 -77
- package/dist/document_loaders/web/cheerio.js +0 -114
- package/dist/document_loaders/web/college_confidential.cjs +0 -41
- package/dist/document_loaders/web/college_confidential.d.ts +0 -25
- package/dist/document_loaders/web/college_confidential.js +0 -37
- package/dist/document_loaders/web/confluence.cjs +0 -190
- package/dist/document_loaders/web/confluence.d.ts +0 -114
- package/dist/document_loaders/web/confluence.js +0 -186
- package/dist/document_loaders/web/couchbase.cjs +0 -95
- package/dist/document_loaders/web/couchbase.d.ts +0 -32
- package/dist/document_loaders/web/couchbase.js +0 -91
- package/dist/document_loaders/web/figma.cjs +0 -102
- package/dist/document_loaders/web/figma.d.ts +0 -82
- package/dist/document_loaders/web/figma.js +0 -98
- package/dist/document_loaders/web/firecrawl.cjs +0 -95
- package/dist/document_loaders/web/firecrawl.d.ts +0 -50
- package/dist/document_loaders/web/firecrawl.js +0 -88
- package/dist/document_loaders/web/gitbook.cjs +0 -110
- package/dist/document_loaders/web/gitbook.d.ts +0 -55
- package/dist/document_loaders/web/gitbook.js +0 -106
- package/dist/document_loaders/web/github.cjs +0 -615
- package/dist/document_loaders/web/github.d.ts +0 -203
- package/dist/document_loaders/web/github.js +0 -608
- package/dist/document_loaders/web/hn.cjs +0 -90
- package/dist/document_loaders/web/hn.d.ts +0 -42
- package/dist/document_loaders/web/hn.js +0 -86
- package/dist/document_loaders/web/imsdb.cjs +0 -44
- package/dist/document_loaders/web/imsdb.d.ts +0 -23
- package/dist/document_loaders/web/imsdb.js +0 -40
- package/dist/document_loaders/web/notionapi.cjs +0 -404
- package/dist/document_loaders/web/notionapi.d.ts +0 -133
- package/dist/document_loaders/web/notionapi.js +0 -392
- package/dist/document_loaders/web/notiondb.cjs +0 -199
- package/dist/document_loaders/web/notiondb.d.ts +0 -56
- package/dist/document_loaders/web/notiondb.js +0 -195
- package/dist/document_loaders/web/pdf.cjs +0 -140
- package/dist/document_loaders/web/pdf.d.ts +0 -35
- package/dist/document_loaders/web/pdf.js +0 -136
- package/dist/document_loaders/web/playwright.cjs +0 -89
- package/dist/document_loaders/web/playwright.d.ts +0 -58
- package/dist/document_loaders/web/playwright.js +0 -85
- package/dist/document_loaders/web/puppeteer.cjs +0 -139
- package/dist/document_loaders/web/puppeteer.d.ts +0 -82
- package/dist/document_loaders/web/puppeteer.js +0 -135
- package/dist/document_loaders/web/recursive_url.cjs +0 -198
- package/dist/document_loaders/web/recursive_url.d.ts +0 -33
- package/dist/document_loaders/web/recursive_url.js +0 -194
- package/dist/document_loaders/web/s3.cjs +0 -164
- package/dist/document_loaders/web/s3.d.ts +0 -78
- package/dist/document_loaders/web/s3.js +0 -137
- package/dist/document_loaders/web/searchapi.cjs +0 -150
- package/dist/document_loaders/web/searchapi.d.ts +0 -76
- package/dist/document_loaders/web/searchapi.js +0 -146
- package/dist/document_loaders/web/serpapi.cjs +0 -127
- package/dist/document_loaders/web/serpapi.d.ts +0 -62
- package/dist/document_loaders/web/serpapi.js +0 -123
- package/dist/document_loaders/web/sitemap.cjs +0 -118
- package/dist/document_loaders/web/sitemap.d.ts +0 -41
- package/dist/document_loaders/web/sitemap.js +0 -114
- package/dist/document_loaders/web/sonix_audio.cjs +0 -68
- package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
- package/dist/document_loaders/web/sonix_audio.js +0 -64
- package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
- package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
- package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
- package/dist/document_loaders/web/youtube.cjs +0 -116
- package/dist/document_loaders/web/youtube.d.ts +0 -55
- package/dist/document_loaders/web/youtube.js +0 -112
- package/dist/experimental/chrome_ai/app/dist/bundle.cjs +0 -1250
- package/dist/experimental/chrome_ai/app/dist/bundle.d.ts +0 -1
- package/dist/experimental/chrome_ai/app/dist/bundle.js +0 -1249
- package/dist/experimental/tools/pyinterpreter.cjs +0 -248
- package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
- package/dist/experimental/tools/pyinterpreter.js +0 -244
- package/dist/retrievers/self_query/chroma.cjs +0 -48
- package/dist/retrievers/self_query/chroma.d.ts +0 -26
- package/dist/retrievers/self_query/chroma.js +0 -44
- package/dist/retrievers/self_query/pinecone.cjs +0 -47
- package/dist/retrievers/self_query/pinecone.d.ts +0 -26
- package/dist/retrievers/self_query/pinecone.js +0 -43
- package/dist/retrievers/self_query/supabase.cjs +0 -278
- package/dist/retrievers/self_query/supabase.d.ts +0 -109
- package/dist/retrievers/self_query/supabase.js +0 -274
- package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
- package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
- package/dist/retrievers/self_query/supabase_utils.js +0 -259
- package/dist/retrievers/self_query/vectara.cjs +0 -143
- package/dist/retrievers/self_query/vectara.d.ts +0 -42
- package/dist/retrievers/self_query/vectara.js +0 -139
- package/dist/retrievers/self_query/weaviate.cjs +0 -201
- package/dist/retrievers/self_query/weaviate.d.ts +0 -99
- package/dist/retrievers/self_query/weaviate.js +0 -197
- package/dist/types/assemblyai-types.cjs +0 -2
- package/dist/types/assemblyai-types.d.ts +0 -4
- package/dist/types/assemblyai-types.js +0 -1
- package/document_loaders/fs/chatgpt.cjs +0 -1
- package/document_loaders/fs/chatgpt.d.cts +0 -1
- package/document_loaders/fs/chatgpt.d.ts +0 -1
- package/document_loaders/fs/chatgpt.js +0 -1
- package/document_loaders/fs/csv.cjs +0 -1
- package/document_loaders/fs/csv.d.cts +0 -1
- package/document_loaders/fs/csv.d.ts +0 -1
- package/document_loaders/fs/csv.js +0 -1
- package/document_loaders/fs/docx.cjs +0 -1
- package/document_loaders/fs/docx.d.cts +0 -1
- package/document_loaders/fs/docx.d.ts +0 -1
- package/document_loaders/fs/docx.js +0 -1
- package/document_loaders/fs/epub.cjs +0 -1
- package/document_loaders/fs/epub.d.cts +0 -1
- package/document_loaders/fs/epub.d.ts +0 -1
- package/document_loaders/fs/epub.js +0 -1
- package/document_loaders/fs/notion.cjs +0 -1
- package/document_loaders/fs/notion.d.cts +0 -1
- package/document_loaders/fs/notion.d.ts +0 -1
- package/document_loaders/fs/notion.js +0 -1
- package/document_loaders/fs/obsidian.cjs +0 -1
- package/document_loaders/fs/obsidian.d.cts +0 -1
- package/document_loaders/fs/obsidian.d.ts +0 -1
- package/document_loaders/fs/obsidian.js +0 -1
- package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
- package/document_loaders/fs/openai_whisper_audio.js +0 -1
- package/document_loaders/fs/pdf.cjs +0 -1
- package/document_loaders/fs/pdf.d.cts +0 -1
- package/document_loaders/fs/pdf.d.ts +0 -1
- package/document_loaders/fs/pdf.js +0 -1
- package/document_loaders/fs/pptx.cjs +0 -1
- package/document_loaders/fs/pptx.d.cts +0 -1
- package/document_loaders/fs/pptx.d.ts +0 -1
- package/document_loaders/fs/pptx.js +0 -1
- package/document_loaders/fs/srt.cjs +0 -1
- package/document_loaders/fs/srt.d.cts +0 -1
- package/document_loaders/fs/srt.d.ts +0 -1
- package/document_loaders/fs/srt.js +0 -1
- package/document_loaders/fs/unstructured.cjs +0 -1
- package/document_loaders/fs/unstructured.d.cts +0 -1
- package/document_loaders/fs/unstructured.d.ts +0 -1
- package/document_loaders/fs/unstructured.js +0 -1
- package/document_loaders/web/apify_dataset.cjs +0 -1
- package/document_loaders/web/apify_dataset.d.cts +0 -1
- package/document_loaders/web/apify_dataset.d.ts +0 -1
- package/document_loaders/web/apify_dataset.js +0 -1
- package/document_loaders/web/assemblyai.cjs +0 -1
- package/document_loaders/web/assemblyai.d.cts +0 -1
- package/document_loaders/web/assemblyai.d.ts +0 -1
- package/document_loaders/web/assemblyai.js +0 -1
- package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_container.js +0 -1
- package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_file.js +0 -1
- package/document_loaders/web/browserbase.cjs +0 -1
- package/document_loaders/web/browserbase.d.cts +0 -1
- package/document_loaders/web/browserbase.d.ts +0 -1
- package/document_loaders/web/browserbase.js +0 -1
- package/document_loaders/web/cheerio.cjs +0 -1
- package/document_loaders/web/cheerio.d.cts +0 -1
- package/document_loaders/web/cheerio.d.ts +0 -1
- package/document_loaders/web/cheerio.js +0 -1
- package/document_loaders/web/college_confidential.cjs +0 -1
- package/document_loaders/web/college_confidential.d.cts +0 -1
- package/document_loaders/web/college_confidential.d.ts +0 -1
- package/document_loaders/web/college_confidential.js +0 -1
- package/document_loaders/web/confluence.cjs +0 -1
- package/document_loaders/web/confluence.d.cts +0 -1
- package/document_loaders/web/confluence.d.ts +0 -1
- package/document_loaders/web/confluence.js +0 -1
- package/document_loaders/web/couchbase.cjs +0 -1
- package/document_loaders/web/couchbase.d.cts +0 -1
- package/document_loaders/web/couchbase.d.ts +0 -1
- package/document_loaders/web/couchbase.js +0 -1
- package/document_loaders/web/figma.cjs +0 -1
- package/document_loaders/web/figma.d.cts +0 -1
- package/document_loaders/web/figma.d.ts +0 -1
- package/document_loaders/web/figma.js +0 -1
- package/document_loaders/web/firecrawl.cjs +0 -1
- package/document_loaders/web/firecrawl.d.cts +0 -1
- package/document_loaders/web/firecrawl.d.ts +0 -1
- package/document_loaders/web/firecrawl.js +0 -1
- package/document_loaders/web/gitbook.cjs +0 -1
- package/document_loaders/web/gitbook.d.cts +0 -1
- package/document_loaders/web/gitbook.d.ts +0 -1
- package/document_loaders/web/gitbook.js +0 -1
- package/document_loaders/web/github.cjs +0 -1
- package/document_loaders/web/github.d.cts +0 -1
- package/document_loaders/web/github.d.ts +0 -1
- package/document_loaders/web/github.js +0 -1
- package/document_loaders/web/hn.cjs +0 -1
- package/document_loaders/web/hn.d.cts +0 -1
- package/document_loaders/web/hn.d.ts +0 -1
- package/document_loaders/web/hn.js +0 -1
- package/document_loaders/web/imsdb.cjs +0 -1
- package/document_loaders/web/imsdb.d.cts +0 -1
- package/document_loaders/web/imsdb.d.ts +0 -1
- package/document_loaders/web/imsdb.js +0 -1
- package/document_loaders/web/notionapi.cjs +0 -1
- package/document_loaders/web/notionapi.d.cts +0 -1
- package/document_loaders/web/notionapi.d.ts +0 -1
- package/document_loaders/web/notionapi.js +0 -1
- package/document_loaders/web/notiondb.cjs +0 -1
- package/document_loaders/web/notiondb.d.cts +0 -1
- package/document_loaders/web/notiondb.d.ts +0 -1
- package/document_loaders/web/notiondb.js +0 -1
- package/document_loaders/web/pdf.cjs +0 -1
- package/document_loaders/web/pdf.d.cts +0 -1
- package/document_loaders/web/pdf.d.ts +0 -1
- package/document_loaders/web/pdf.js +0 -1
- package/document_loaders/web/playwright.cjs +0 -1
- package/document_loaders/web/playwright.d.cts +0 -1
- package/document_loaders/web/playwright.d.ts +0 -1
- package/document_loaders/web/playwright.js +0 -1
- package/document_loaders/web/puppeteer.cjs +0 -1
- package/document_loaders/web/puppeteer.d.cts +0 -1
- package/document_loaders/web/puppeteer.d.ts +0 -1
- package/document_loaders/web/puppeteer.js +0 -1
- package/document_loaders/web/recursive_url.cjs +0 -1
- package/document_loaders/web/recursive_url.d.cts +0 -1
- package/document_loaders/web/recursive_url.d.ts +0 -1
- package/document_loaders/web/recursive_url.js +0 -1
- package/document_loaders/web/s3.cjs +0 -1
- package/document_loaders/web/s3.d.cts +0 -1
- package/document_loaders/web/s3.d.ts +0 -1
- package/document_loaders/web/s3.js +0 -1
- package/document_loaders/web/searchapi.cjs +0 -1
- package/document_loaders/web/searchapi.d.cts +0 -1
- package/document_loaders/web/searchapi.d.ts +0 -1
- package/document_loaders/web/searchapi.js +0 -1
- package/document_loaders/web/serpapi.cjs +0 -1
- package/document_loaders/web/serpapi.d.cts +0 -1
- package/document_loaders/web/serpapi.d.ts +0 -1
- package/document_loaders/web/serpapi.js +0 -1
- package/document_loaders/web/sitemap.cjs +0 -1
- package/document_loaders/web/sitemap.d.cts +0 -1
- package/document_loaders/web/sitemap.d.ts +0 -1
- package/document_loaders/web/sitemap.js +0 -1
- package/document_loaders/web/sonix_audio.cjs +0 -1
- package/document_loaders/web/sonix_audio.d.cts +0 -1
- package/document_loaders/web/sonix_audio.d.ts +0 -1
- package/document_loaders/web/sonix_audio.js +0 -1
- package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.js +0 -1
- package/document_loaders/web/youtube.cjs +0 -1
- package/document_loaders/web/youtube.d.cts +0 -1
- package/document_loaders/web/youtube.d.ts +0 -1
- package/document_loaders/web/youtube.js +0 -1
- package/experimental/tools/pyinterpreter.cjs +0 -1
- package/experimental/tools/pyinterpreter.d.cts +0 -1
- package/experimental/tools/pyinterpreter.d.ts +0 -1
- package/experimental/tools/pyinterpreter.js +0 -1
- package/memory/index.cjs +0 -1
- package/memory/index.d.cts +0 -1
- package/memory/index.d.ts +0 -1
- package/memory/index.js +0 -1
- package/retrievers/self_query/chroma.cjs +0 -1
- package/retrievers/self_query/chroma.d.cts +0 -1
- package/retrievers/self_query/chroma.d.ts +0 -1
- package/retrievers/self_query/chroma.js +0 -1
- package/retrievers/self_query/pinecone.cjs +0 -1
- package/retrievers/self_query/pinecone.d.cts +0 -1
- package/retrievers/self_query/pinecone.d.ts +0 -1
- package/retrievers/self_query/pinecone.js +0 -1
- package/retrievers/self_query/supabase.cjs +0 -1
- package/retrievers/self_query/supabase.d.cts +0 -1
- package/retrievers/self_query/supabase.d.ts +0 -1
- package/retrievers/self_query/supabase.js +0 -1
- package/retrievers/self_query/vectara.cjs +0 -1
- package/retrievers/self_query/vectara.d.cts +0 -1
- package/retrievers/self_query/vectara.d.ts +0 -1
- package/retrievers/self_query/vectara.js +0 -1
- package/retrievers/self_query/weaviate.cjs +0 -1
- package/retrievers/self_query/weaviate.d.cts +0 -1
- package/retrievers/self_query/weaviate.d.ts +0 -1
- package/retrievers/self_query/weaviate.js +0 -1
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.OpenAIWhisperAudio = void 0;
|
|
4
|
-
const openai_1 = require("@langchain/openai");
|
|
5
|
-
const documents_1 = require("@langchain/core/documents");
|
|
6
|
-
const buffer_js_1 = require("./buffer.cjs");
|
|
7
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
8
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
9
|
-
oldEntrypointName: "document_loaders/fs/openai_whisper_audio",
|
|
10
|
-
newPackageName: "@langchain/community",
|
|
11
|
-
});
|
|
12
|
-
const MODEL_NAME = "whisper-1";
|
|
13
|
-
/**
|
|
14
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/openai_whisper_audio" instead. This entrypoint will be removed in 0.3.0.
|
|
15
|
-
*
|
|
16
|
-
* @example
|
|
17
|
-
* ```typescript
|
|
18
|
-
* const loader = new OpenAIWhisperAudio(
|
|
19
|
-
* "./src/document_loaders/example_data/test.mp3",
|
|
20
|
-
* );
|
|
21
|
-
* const docs = await loader.load();
|
|
22
|
-
* console.log(docs);
|
|
23
|
-
* ```
|
|
24
|
-
*/
|
|
25
|
-
class OpenAIWhisperAudio extends buffer_js_1.BufferLoader {
|
|
26
|
-
constructor(filePathOrBlob, fields) {
|
|
27
|
-
super(filePathOrBlob);
|
|
28
|
-
Object.defineProperty(this, "openAIClient", {
|
|
29
|
-
enumerable: true,
|
|
30
|
-
configurable: true,
|
|
31
|
-
writable: true,
|
|
32
|
-
value: void 0
|
|
33
|
-
});
|
|
34
|
-
this.openAIClient = new openai_1.OpenAIClient(fields?.clientOptions);
|
|
35
|
-
}
|
|
36
|
-
async parse(raw, metadata) {
|
|
37
|
-
const fileName = metadata.source === "blob" ? metadata.blobType : metadata.source;
|
|
38
|
-
const transcriptionResponse = await this.openAIClient.audio.transcriptions.create({
|
|
39
|
-
file: await (0, openai_1.toFile)(raw, fileName),
|
|
40
|
-
model: MODEL_NAME,
|
|
41
|
-
});
|
|
42
|
-
const document = new documents_1.Document({
|
|
43
|
-
pageContent: transcriptionResponse.text,
|
|
44
|
-
metadata,
|
|
45
|
-
});
|
|
46
|
-
return [document];
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
exports.OpenAIWhisperAudio = OpenAIWhisperAudio;
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
-
import { type ClientOptions } from "@langchain/openai";
|
|
3
|
-
import { Document } from "@langchain/core/documents";
|
|
4
|
-
import { BufferLoader } from "./buffer.js";
|
|
5
|
-
/**
|
|
6
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/openai_whisper_audio" instead. This entrypoint will be removed in 0.3.0.
|
|
7
|
-
*
|
|
8
|
-
* @example
|
|
9
|
-
* ```typescript
|
|
10
|
-
* const loader = new OpenAIWhisperAudio(
|
|
11
|
-
* "./src/document_loaders/example_data/test.mp3",
|
|
12
|
-
* );
|
|
13
|
-
* const docs = await loader.load();
|
|
14
|
-
* console.log(docs);
|
|
15
|
-
* ```
|
|
16
|
-
*/
|
|
17
|
-
export declare class OpenAIWhisperAudio extends BufferLoader {
|
|
18
|
-
private readonly openAIClient;
|
|
19
|
-
constructor(filePathOrBlob: string | Blob, fields?: {
|
|
20
|
-
clientOptions?: ClientOptions;
|
|
21
|
-
});
|
|
22
|
-
protected parse(raw: Buffer, metadata: Record<string, string>): Promise<Document[]>;
|
|
23
|
-
}
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
import { OpenAIClient, toFile } from "@langchain/openai";
|
|
2
|
-
import { Document } from "@langchain/core/documents";
|
|
3
|
-
import { BufferLoader } from "./buffer.js";
|
|
4
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
5
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
6
|
-
oldEntrypointName: "document_loaders/fs/openai_whisper_audio",
|
|
7
|
-
newPackageName: "@langchain/community",
|
|
8
|
-
});
|
|
9
|
-
const MODEL_NAME = "whisper-1";
|
|
10
|
-
/**
|
|
11
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/openai_whisper_audio" instead. This entrypoint will be removed in 0.3.0.
|
|
12
|
-
*
|
|
13
|
-
* @example
|
|
14
|
-
* ```typescript
|
|
15
|
-
* const loader = new OpenAIWhisperAudio(
|
|
16
|
-
* "./src/document_loaders/example_data/test.mp3",
|
|
17
|
-
* );
|
|
18
|
-
* const docs = await loader.load();
|
|
19
|
-
* console.log(docs);
|
|
20
|
-
* ```
|
|
21
|
-
*/
|
|
22
|
-
export class OpenAIWhisperAudio extends BufferLoader {
|
|
23
|
-
constructor(filePathOrBlob, fields) {
|
|
24
|
-
super(filePathOrBlob);
|
|
25
|
-
Object.defineProperty(this, "openAIClient", {
|
|
26
|
-
enumerable: true,
|
|
27
|
-
configurable: true,
|
|
28
|
-
writable: true,
|
|
29
|
-
value: void 0
|
|
30
|
-
});
|
|
31
|
-
this.openAIClient = new OpenAIClient(fields?.clientOptions);
|
|
32
|
-
}
|
|
33
|
-
async parse(raw, metadata) {
|
|
34
|
-
const fileName = metadata.source === "blob" ? metadata.blobType : metadata.source;
|
|
35
|
-
const transcriptionResponse = await this.openAIClient.audio.transcriptions.create({
|
|
36
|
-
file: await toFile(raw, fileName),
|
|
37
|
-
model: MODEL_NAME,
|
|
38
|
-
});
|
|
39
|
-
const document = new Document({
|
|
40
|
-
pageContent: transcriptionResponse.text,
|
|
41
|
-
metadata,
|
|
42
|
-
});
|
|
43
|
-
return [document];
|
|
44
|
-
}
|
|
45
|
-
}
|
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.PDFLoader = void 0;
|
|
4
|
-
const documents_1 = require("@langchain/core/documents");
|
|
5
|
-
const buffer_js_1 = require("./buffer.cjs");
|
|
6
|
-
const document_js_1 = require("../../util/document.cjs");
|
|
7
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
8
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
9
|
-
oldEntrypointName: "document_loaders/fs/pdf",
|
|
10
|
-
newPackageName: "@langchain/community",
|
|
11
|
-
});
|
|
12
|
-
/**
|
|
13
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/pdf" instead. This entrypoint will be removed in 0.3.0.
|
|
14
|
-
*
|
|
15
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
16
|
-
* loader that loads documents from PDF files.
|
|
17
|
-
* @example
|
|
18
|
-
* ```typescript
|
|
19
|
-
* const loader = new PDFLoader("path/to/bitcoin.pdf");
|
|
20
|
-
* const docs = await loader.load();
|
|
21
|
-
* console.log({ docs });
|
|
22
|
-
* ```
|
|
23
|
-
*/
|
|
24
|
-
class PDFLoader extends buffer_js_1.BufferLoader {
|
|
25
|
-
constructor(filePathOrBlob, { splitPages = true, pdfjs = PDFLoaderImports, parsedItemSeparator = "", } = {}) {
|
|
26
|
-
super(filePathOrBlob);
|
|
27
|
-
Object.defineProperty(this, "splitPages", {
|
|
28
|
-
enumerable: true,
|
|
29
|
-
configurable: true,
|
|
30
|
-
writable: true,
|
|
31
|
-
value: void 0
|
|
32
|
-
});
|
|
33
|
-
Object.defineProperty(this, "pdfjs", {
|
|
34
|
-
enumerable: true,
|
|
35
|
-
configurable: true,
|
|
36
|
-
writable: true,
|
|
37
|
-
value: void 0
|
|
38
|
-
});
|
|
39
|
-
Object.defineProperty(this, "parsedItemSeparator", {
|
|
40
|
-
enumerable: true,
|
|
41
|
-
configurable: true,
|
|
42
|
-
writable: true,
|
|
43
|
-
value: void 0
|
|
44
|
-
});
|
|
45
|
-
this.splitPages = splitPages;
|
|
46
|
-
this.pdfjs = pdfjs;
|
|
47
|
-
this.parsedItemSeparator = parsedItemSeparator;
|
|
48
|
-
}
|
|
49
|
-
/**
|
|
50
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
51
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
52
|
-
* uses the `getDocument` function from the PDF.js library to load the PDF
|
|
53
|
-
* from the buffer. It then iterates over each page of the PDF, retrieves
|
|
54
|
-
* the text content using the `getTextContent` method, and joins the text
|
|
55
|
-
* items to form the page content. It creates a new `Document` instance
|
|
56
|
-
* for each page with the extracted text content and metadata, and adds it
|
|
57
|
-
* to the `documents` array. If `splitPages` is `true`, it returns the
|
|
58
|
-
* array of `Document` instances. Otherwise, if there are no documents, it
|
|
59
|
-
* returns an empty array. Otherwise, it concatenates the page content of
|
|
60
|
-
* all documents and creates a single `Document` instance with the
|
|
61
|
-
* concatenated content.
|
|
62
|
-
* @param raw The buffer to be parsed.
|
|
63
|
-
* @param metadata The metadata of the document.
|
|
64
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
65
|
-
*/
|
|
66
|
-
async parse(raw, metadata) {
|
|
67
|
-
const { getDocument, version } = await this.pdfjs();
|
|
68
|
-
const pdf = await getDocument({
|
|
69
|
-
data: new Uint8Array(raw.buffer),
|
|
70
|
-
useWorkerFetch: false,
|
|
71
|
-
isEvalSupported: false,
|
|
72
|
-
useSystemFonts: true,
|
|
73
|
-
}).promise;
|
|
74
|
-
const meta = await pdf.getMetadata().catch(() => null);
|
|
75
|
-
const documents = [];
|
|
76
|
-
for (let i = 1; i <= pdf.numPages; i += 1) {
|
|
77
|
-
const page = await pdf.getPage(i);
|
|
78
|
-
const content = await page.getTextContent();
|
|
79
|
-
if (content.items.length === 0) {
|
|
80
|
-
continue;
|
|
81
|
-
}
|
|
82
|
-
// Eliminate excessive newlines
|
|
83
|
-
// Source: https://github.com/albertcui/pdf-parse/blob/7086fc1cc9058545cdf41dd0646d6ae5832c7107/lib/pdf-parse.js#L16
|
|
84
|
-
let lastY;
|
|
85
|
-
const textItems = [];
|
|
86
|
-
for (const item of content.items) {
|
|
87
|
-
if ("str" in item) {
|
|
88
|
-
if (lastY === item.transform[5] || !lastY) {
|
|
89
|
-
textItems.push(item.str);
|
|
90
|
-
}
|
|
91
|
-
else {
|
|
92
|
-
textItems.push(`\n${item.str}`);
|
|
93
|
-
}
|
|
94
|
-
// eslint-disable-next-line prefer-destructuring
|
|
95
|
-
lastY = item.transform[5];
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
const text = textItems.join(this.parsedItemSeparator);
|
|
99
|
-
documents.push(new documents_1.Document({
|
|
100
|
-
pageContent: text,
|
|
101
|
-
metadata: {
|
|
102
|
-
...metadata,
|
|
103
|
-
pdf: {
|
|
104
|
-
version,
|
|
105
|
-
info: meta?.info,
|
|
106
|
-
metadata: meta?.metadata,
|
|
107
|
-
totalPages: pdf.numPages,
|
|
108
|
-
},
|
|
109
|
-
loc: {
|
|
110
|
-
pageNumber: i,
|
|
111
|
-
},
|
|
112
|
-
},
|
|
113
|
-
}));
|
|
114
|
-
}
|
|
115
|
-
if (this.splitPages) {
|
|
116
|
-
return documents;
|
|
117
|
-
}
|
|
118
|
-
if (documents.length === 0) {
|
|
119
|
-
return [];
|
|
120
|
-
}
|
|
121
|
-
return [
|
|
122
|
-
new documents_1.Document({
|
|
123
|
-
pageContent: (0, document_js_1.formatDocumentsAsString)(documents),
|
|
124
|
-
metadata: {
|
|
125
|
-
...metadata,
|
|
126
|
-
pdf: {
|
|
127
|
-
version,
|
|
128
|
-
info: meta?.info,
|
|
129
|
-
metadata: meta?.metadata,
|
|
130
|
-
totalPages: pdf.numPages,
|
|
131
|
-
},
|
|
132
|
-
},
|
|
133
|
-
}),
|
|
134
|
-
];
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
exports.PDFLoader = PDFLoader;
|
|
138
|
-
async function PDFLoaderImports() {
|
|
139
|
-
try {
|
|
140
|
-
const { default: mod } = await import("pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js");
|
|
141
|
-
const { getDocument, version } = mod;
|
|
142
|
-
return { getDocument, version };
|
|
143
|
-
}
|
|
144
|
-
catch (e) {
|
|
145
|
-
console.error(e);
|
|
146
|
-
throw new Error("Failed to load pdf-parse. Please install it with eg. `npm install pdf-parse`.");
|
|
147
|
-
}
|
|
148
|
-
}
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
/// <reference path="../../../src/types/pdf-parse.d.ts" />
|
|
2
|
-
/// <reference types="node" resolution-mode="require"/>
|
|
3
|
-
import { Document } from "@langchain/core/documents";
|
|
4
|
-
import { BufferLoader } from "./buffer.js";
|
|
5
|
-
/**
|
|
6
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/pdf" instead. This entrypoint will be removed in 0.3.0.
|
|
7
|
-
*
|
|
8
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
9
|
-
* loader that loads documents from PDF files.
|
|
10
|
-
* @example
|
|
11
|
-
* ```typescript
|
|
12
|
-
* const loader = new PDFLoader("path/to/bitcoin.pdf");
|
|
13
|
-
* const docs = await loader.load();
|
|
14
|
-
* console.log({ docs });
|
|
15
|
-
* ```
|
|
16
|
-
*/
|
|
17
|
-
export declare class PDFLoader extends BufferLoader {
|
|
18
|
-
private splitPages;
|
|
19
|
-
private pdfjs;
|
|
20
|
-
protected parsedItemSeparator: string;
|
|
21
|
-
constructor(filePathOrBlob: string | Blob, { splitPages, pdfjs, parsedItemSeparator, }?: {
|
|
22
|
-
splitPages?: boolean | undefined;
|
|
23
|
-
pdfjs?: typeof PDFLoaderImports | undefined;
|
|
24
|
-
parsedItemSeparator?: string | undefined;
|
|
25
|
-
});
|
|
26
|
-
/**
|
|
27
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
28
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
29
|
-
* uses the `getDocument` function from the PDF.js library to load the PDF
|
|
30
|
-
* from the buffer. It then iterates over each page of the PDF, retrieves
|
|
31
|
-
* the text content using the `getTextContent` method, and joins the text
|
|
32
|
-
* items to form the page content. It creates a new `Document` instance
|
|
33
|
-
* for each page with the extracted text content and metadata, and adds it
|
|
34
|
-
* to the `documents` array. If `splitPages` is `true`, it returns the
|
|
35
|
-
* array of `Document` instances. Otherwise, if there are no documents, it
|
|
36
|
-
* returns an empty array. Otherwise, it concatenates the page content of
|
|
37
|
-
* all documents and creates a single `Document` instance with the
|
|
38
|
-
* concatenated content.
|
|
39
|
-
* @param raw The buffer to be parsed.
|
|
40
|
-
* @param metadata The metadata of the document.
|
|
41
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
42
|
-
*/
|
|
43
|
-
parse(raw: Buffer, metadata: Document["metadata"]): Promise<Document[]>;
|
|
44
|
-
}
|
|
45
|
-
declare function PDFLoaderImports(): Promise<{
|
|
46
|
-
getDocument: typeof import("pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js").getDocument;
|
|
47
|
-
version: string;
|
|
48
|
-
}>;
|
|
49
|
-
export {};
|
|
@@ -1,144 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { BufferLoader } from "./buffer.js";
|
|
3
|
-
import { formatDocumentsAsString } from "../../util/document.js";
|
|
4
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
5
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
6
|
-
oldEntrypointName: "document_loaders/fs/pdf",
|
|
7
|
-
newPackageName: "@langchain/community",
|
|
8
|
-
});
|
|
9
|
-
/**
|
|
10
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/pdf" instead. This entrypoint will be removed in 0.3.0.
|
|
11
|
-
*
|
|
12
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
13
|
-
* loader that loads documents from PDF files.
|
|
14
|
-
* @example
|
|
15
|
-
* ```typescript
|
|
16
|
-
* const loader = new PDFLoader("path/to/bitcoin.pdf");
|
|
17
|
-
* const docs = await loader.load();
|
|
18
|
-
* console.log({ docs });
|
|
19
|
-
* ```
|
|
20
|
-
*/
|
|
21
|
-
export class PDFLoader extends BufferLoader {
|
|
22
|
-
constructor(filePathOrBlob, { splitPages = true, pdfjs = PDFLoaderImports, parsedItemSeparator = "", } = {}) {
|
|
23
|
-
super(filePathOrBlob);
|
|
24
|
-
Object.defineProperty(this, "splitPages", {
|
|
25
|
-
enumerable: true,
|
|
26
|
-
configurable: true,
|
|
27
|
-
writable: true,
|
|
28
|
-
value: void 0
|
|
29
|
-
});
|
|
30
|
-
Object.defineProperty(this, "pdfjs", {
|
|
31
|
-
enumerable: true,
|
|
32
|
-
configurable: true,
|
|
33
|
-
writable: true,
|
|
34
|
-
value: void 0
|
|
35
|
-
});
|
|
36
|
-
Object.defineProperty(this, "parsedItemSeparator", {
|
|
37
|
-
enumerable: true,
|
|
38
|
-
configurable: true,
|
|
39
|
-
writable: true,
|
|
40
|
-
value: void 0
|
|
41
|
-
});
|
|
42
|
-
this.splitPages = splitPages;
|
|
43
|
-
this.pdfjs = pdfjs;
|
|
44
|
-
this.parsedItemSeparator = parsedItemSeparator;
|
|
45
|
-
}
|
|
46
|
-
/**
|
|
47
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
48
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
49
|
-
* uses the `getDocument` function from the PDF.js library to load the PDF
|
|
50
|
-
* from the buffer. It then iterates over each page of the PDF, retrieves
|
|
51
|
-
* the text content using the `getTextContent` method, and joins the text
|
|
52
|
-
* items to form the page content. It creates a new `Document` instance
|
|
53
|
-
* for each page with the extracted text content and metadata, and adds it
|
|
54
|
-
* to the `documents` array. If `splitPages` is `true`, it returns the
|
|
55
|
-
* array of `Document` instances. Otherwise, if there are no documents, it
|
|
56
|
-
* returns an empty array. Otherwise, it concatenates the page content of
|
|
57
|
-
* all documents and creates a single `Document` instance with the
|
|
58
|
-
* concatenated content.
|
|
59
|
-
* @param raw The buffer to be parsed.
|
|
60
|
-
* @param metadata The metadata of the document.
|
|
61
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
62
|
-
*/
|
|
63
|
-
async parse(raw, metadata) {
|
|
64
|
-
const { getDocument, version } = await this.pdfjs();
|
|
65
|
-
const pdf = await getDocument({
|
|
66
|
-
data: new Uint8Array(raw.buffer),
|
|
67
|
-
useWorkerFetch: false,
|
|
68
|
-
isEvalSupported: false,
|
|
69
|
-
useSystemFonts: true,
|
|
70
|
-
}).promise;
|
|
71
|
-
const meta = await pdf.getMetadata().catch(() => null);
|
|
72
|
-
const documents = [];
|
|
73
|
-
for (let i = 1; i <= pdf.numPages; i += 1) {
|
|
74
|
-
const page = await pdf.getPage(i);
|
|
75
|
-
const content = await page.getTextContent();
|
|
76
|
-
if (content.items.length === 0) {
|
|
77
|
-
continue;
|
|
78
|
-
}
|
|
79
|
-
// Eliminate excessive newlines
|
|
80
|
-
// Source: https://github.com/albertcui/pdf-parse/blob/7086fc1cc9058545cdf41dd0646d6ae5832c7107/lib/pdf-parse.js#L16
|
|
81
|
-
let lastY;
|
|
82
|
-
const textItems = [];
|
|
83
|
-
for (const item of content.items) {
|
|
84
|
-
if ("str" in item) {
|
|
85
|
-
if (lastY === item.transform[5] || !lastY) {
|
|
86
|
-
textItems.push(item.str);
|
|
87
|
-
}
|
|
88
|
-
else {
|
|
89
|
-
textItems.push(`\n${item.str}`);
|
|
90
|
-
}
|
|
91
|
-
// eslint-disable-next-line prefer-destructuring
|
|
92
|
-
lastY = item.transform[5];
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
const text = textItems.join(this.parsedItemSeparator);
|
|
96
|
-
documents.push(new Document({
|
|
97
|
-
pageContent: text,
|
|
98
|
-
metadata: {
|
|
99
|
-
...metadata,
|
|
100
|
-
pdf: {
|
|
101
|
-
version,
|
|
102
|
-
info: meta?.info,
|
|
103
|
-
metadata: meta?.metadata,
|
|
104
|
-
totalPages: pdf.numPages,
|
|
105
|
-
},
|
|
106
|
-
loc: {
|
|
107
|
-
pageNumber: i,
|
|
108
|
-
},
|
|
109
|
-
},
|
|
110
|
-
}));
|
|
111
|
-
}
|
|
112
|
-
if (this.splitPages) {
|
|
113
|
-
return documents;
|
|
114
|
-
}
|
|
115
|
-
if (documents.length === 0) {
|
|
116
|
-
return [];
|
|
117
|
-
}
|
|
118
|
-
return [
|
|
119
|
-
new Document({
|
|
120
|
-
pageContent: formatDocumentsAsString(documents),
|
|
121
|
-
metadata: {
|
|
122
|
-
...metadata,
|
|
123
|
-
pdf: {
|
|
124
|
-
version,
|
|
125
|
-
info: meta?.info,
|
|
126
|
-
metadata: meta?.metadata,
|
|
127
|
-
totalPages: pdf.numPages,
|
|
128
|
-
},
|
|
129
|
-
},
|
|
130
|
-
}),
|
|
131
|
-
];
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
async function PDFLoaderImports() {
|
|
135
|
-
try {
|
|
136
|
-
const { default: mod } = await import("pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js");
|
|
137
|
-
const { getDocument, version } = mod;
|
|
138
|
-
return { getDocument, version };
|
|
139
|
-
}
|
|
140
|
-
catch (e) {
|
|
141
|
-
console.error(e);
|
|
142
|
-
throw new Error("Failed to load pdf-parse. Please install it with eg. `npm install pdf-parse`.");
|
|
143
|
-
}
|
|
144
|
-
}
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.PPTXLoader = void 0;
|
|
4
|
-
const officeparser_1 = require("officeparser");
|
|
5
|
-
const documents_1 = require("@langchain/core/documents");
|
|
6
|
-
const buffer_js_1 = require("./buffer.cjs");
|
|
7
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
8
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
9
|
-
oldEntrypointName: "document_loaders/fs/pptx",
|
|
10
|
-
newPackageName: "@langchain/community",
|
|
11
|
-
});
|
|
12
|
-
/**
|
|
13
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/pptx" instead. This entrypoint will be removed in 0.3.0.
|
|
14
|
-
*
|
|
15
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
16
|
-
* loader that loads documents from PDF files.
|
|
17
|
-
*/
|
|
18
|
-
class PPTXLoader extends buffer_js_1.BufferLoader {
|
|
19
|
-
constructor(filePathOrBlob) {
|
|
20
|
-
super(filePathOrBlob);
|
|
21
|
-
}
|
|
22
|
-
/**
|
|
23
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
24
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
25
|
-
* uses the `parseOfficeAsync` function from the `officeparser` module to extract
|
|
26
|
-
* the raw text content from the buffer. If the extracted powerpoint content is
|
|
27
|
-
* empty, it returns an empty array. Otherwise, it creates a new
|
|
28
|
-
* `Document` instance with the extracted powerpoint content and the provided
|
|
29
|
-
* metadata, and returns it as an array.
|
|
30
|
-
* @param raw The buffer to be parsed.
|
|
31
|
-
* @param metadata The metadata of the document.
|
|
32
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
33
|
-
*/
|
|
34
|
-
async parse(raw, metadata) {
|
|
35
|
-
const pptx = await (0, officeparser_1.parseOfficeAsync)(raw, { outputErrorToConsole: true });
|
|
36
|
-
if (!pptx)
|
|
37
|
-
return [];
|
|
38
|
-
return [
|
|
39
|
-
new documents_1.Document({
|
|
40
|
-
pageContent: pptx,
|
|
41
|
-
metadata,
|
|
42
|
-
}),
|
|
43
|
-
];
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
exports.PPTXLoader = PPTXLoader;
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
-
import { Document } from "@langchain/core/documents";
|
|
3
|
-
import { BufferLoader } from "./buffer.js";
|
|
4
|
-
/**
|
|
5
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/pptx" instead. This entrypoint will be removed in 0.3.0.
|
|
6
|
-
*
|
|
7
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
8
|
-
* loader that loads documents from PDF files.
|
|
9
|
-
*/
|
|
10
|
-
export declare class PPTXLoader extends BufferLoader {
|
|
11
|
-
constructor(filePathOrBlob: string | Blob);
|
|
12
|
-
/**
|
|
13
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
14
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
15
|
-
* uses the `parseOfficeAsync` function from the `officeparser` module to extract
|
|
16
|
-
* the raw text content from the buffer. If the extracted powerpoint content is
|
|
17
|
-
* empty, it returns an empty array. Otherwise, it creates a new
|
|
18
|
-
* `Document` instance with the extracted powerpoint content and the provided
|
|
19
|
-
* metadata, and returns it as an array.
|
|
20
|
-
* @param raw The buffer to be parsed.
|
|
21
|
-
* @param metadata The metadata of the document.
|
|
22
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
23
|
-
*/
|
|
24
|
-
parse(raw: Buffer, metadata: Document["metadata"]): Promise<Document[]>;
|
|
25
|
-
}
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import { parseOfficeAsync } from "officeparser";
|
|
2
|
-
import { Document } from "@langchain/core/documents";
|
|
3
|
-
import { BufferLoader } from "./buffer.js";
|
|
4
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
5
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
6
|
-
oldEntrypointName: "document_loaders/fs/pptx",
|
|
7
|
-
newPackageName: "@langchain/community",
|
|
8
|
-
});
|
|
9
|
-
/**
|
|
10
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/pptx" instead. This entrypoint will be removed in 0.3.0.
|
|
11
|
-
*
|
|
12
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
13
|
-
* loader that loads documents from PDF files.
|
|
14
|
-
*/
|
|
15
|
-
export class PPTXLoader extends BufferLoader {
|
|
16
|
-
constructor(filePathOrBlob) {
|
|
17
|
-
super(filePathOrBlob);
|
|
18
|
-
}
|
|
19
|
-
/**
|
|
20
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
21
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
22
|
-
* uses the `parseOfficeAsync` function from the `officeparser` module to extract
|
|
23
|
-
* the raw text content from the buffer. If the extracted powerpoint content is
|
|
24
|
-
* empty, it returns an empty array. Otherwise, it creates a new
|
|
25
|
-
* `Document` instance with the extracted powerpoint content and the provided
|
|
26
|
-
* metadata, and returns it as an array.
|
|
27
|
-
* @param raw The buffer to be parsed.
|
|
28
|
-
* @param metadata The metadata of the document.
|
|
29
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
30
|
-
*/
|
|
31
|
-
async parse(raw, metadata) {
|
|
32
|
-
const pptx = await parseOfficeAsync(raw, { outputErrorToConsole: true });
|
|
33
|
-
if (!pptx)
|
|
34
|
-
return [];
|
|
35
|
-
return [
|
|
36
|
-
new Document({
|
|
37
|
-
pageContent: pptx,
|
|
38
|
-
metadata,
|
|
39
|
-
}),
|
|
40
|
-
];
|
|
41
|
-
}
|
|
42
|
-
}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.SRTLoader = void 0;
|
|
7
|
-
const srt_parser_2_1 = __importDefault(require("srt-parser-2"));
|
|
8
|
-
const text_js_1 = require("./text.cjs");
|
|
9
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
10
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
11
|
-
oldEntrypointName: "document_loaders/fs/srt",
|
|
12
|
-
newPackageName: "@langchain/community",
|
|
13
|
-
});
|
|
14
|
-
/**
|
|
15
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/srt" instead. This entrypoint will be removed in 0.3.0.
|
|
16
|
-
*
|
|
17
|
-
* A class that extends the `TextLoader` class. It represents a document
|
|
18
|
-
* loader that loads documents from SRT (SubRip) subtitle files. It has a
|
|
19
|
-
* constructor that takes a `filePathOrBlob` parameter representing the
|
|
20
|
-
* path to the SRT file or a `Blob` object. The `parse()` method is
|
|
21
|
-
* implemented to parse the SRT file and extract the text content of each
|
|
22
|
-
* subtitle.
|
|
23
|
-
* @example
|
|
24
|
-
* ```typescript
|
|
25
|
-
* const loader = new SRTLoader("path/to/file.srt");
|
|
26
|
-
* const docs = await loader.load();
|
|
27
|
-
* console.log({ docs });
|
|
28
|
-
* ```
|
|
29
|
-
*/
|
|
30
|
-
class SRTLoader extends text_js_1.TextLoader {
|
|
31
|
-
constructor(filePathOrBlob) {
|
|
32
|
-
super(filePathOrBlob);
|
|
33
|
-
}
|
|
34
|
-
/**
|
|
35
|
-
* A protected method that takes a `raw` string as a parameter and returns
|
|
36
|
-
* a promise that resolves to an array of strings. It parses the raw SRT
|
|
37
|
-
* string using the `SRTParser2` class from the `srt-parser-2` module. It
|
|
38
|
-
* retrieves the subtitle objects from the parsed SRT data and extracts
|
|
39
|
-
* the text content from each subtitle object. It filters out any empty
|
|
40
|
-
* text content and joins the non-empty text content with a space
|
|
41
|
-
* separator.
|
|
42
|
-
* @param raw The raw SRT string to be parsed.
|
|
43
|
-
* @returns A promise that resolves to an array of strings representing the text content of each subtitle.
|
|
44
|
-
*/
|
|
45
|
-
async parse(raw) {
|
|
46
|
-
// eslint-disable-next-line new-cap
|
|
47
|
-
const parser = new srt_parser_2_1.default();
|
|
48
|
-
const srts = parser.fromSrt(raw);
|
|
49
|
-
return [
|
|
50
|
-
srts
|
|
51
|
-
.map((srt) => srt.text)
|
|
52
|
-
.filter(Boolean)
|
|
53
|
-
.join(" "),
|
|
54
|
-
];
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
exports.SRTLoader = SRTLoader;
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import { TextLoader } from "./text.js";
|
|
2
|
-
/**
|
|
3
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/srt" instead. This entrypoint will be removed in 0.3.0.
|
|
4
|
-
*
|
|
5
|
-
* A class that extends the `TextLoader` class. It represents a document
|
|
6
|
-
* loader that loads documents from SRT (SubRip) subtitle files. It has a
|
|
7
|
-
* constructor that takes a `filePathOrBlob` parameter representing the
|
|
8
|
-
* path to the SRT file or a `Blob` object. The `parse()` method is
|
|
9
|
-
* implemented to parse the SRT file and extract the text content of each
|
|
10
|
-
* subtitle.
|
|
11
|
-
* @example
|
|
12
|
-
* ```typescript
|
|
13
|
-
* const loader = new SRTLoader("path/to/file.srt");
|
|
14
|
-
* const docs = await loader.load();
|
|
15
|
-
* console.log({ docs });
|
|
16
|
-
* ```
|
|
17
|
-
*/
|
|
18
|
-
export declare class SRTLoader extends TextLoader {
|
|
19
|
-
constructor(filePathOrBlob: string | Blob);
|
|
20
|
-
/**
|
|
21
|
-
* A protected method that takes a `raw` string as a parameter and returns
|
|
22
|
-
* a promise that resolves to an array of strings. It parses the raw SRT
|
|
23
|
-
* string using the `SRTParser2` class from the `srt-parser-2` module. It
|
|
24
|
-
* retrieves the subtitle objects from the parsed SRT data and extracts
|
|
25
|
-
* the text content from each subtitle object. It filters out any empty
|
|
26
|
-
* text content and joins the non-empty text content with a space
|
|
27
|
-
* separator.
|
|
28
|
-
* @param raw The raw SRT string to be parsed.
|
|
29
|
-
* @returns A promise that resolves to an array of strings representing the text content of each subtitle.
|
|
30
|
-
*/
|
|
31
|
-
protected parse(raw: string): Promise<string[]>;
|
|
32
|
-
}
|