langchain 0.2.17 → 0.3.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/executor.cjs +11 -0
- package/dist/agents/executor.d.ts +2 -0
- package/dist/agents/executor.js +11 -0
- package/dist/agents/openai_functions/index.cjs +2 -2
- package/dist/agents/openai_functions/index.js +2 -2
- package/dist/agents/react/output_parser.cjs +3 -2
- package/dist/agents/react/output_parser.js +3 -2
- package/dist/chains/combine_documents/stuff.cjs +2 -2
- package/dist/chains/combine_documents/stuff.js +2 -2
- package/dist/chains/conversational_retrieval_chain.cjs +1 -1
- package/dist/chains/conversational_retrieval_chain.d.ts +1 -1
- package/dist/chains/conversational_retrieval_chain.js +1 -1
- package/dist/chains/llm_chain.cjs +1 -1
- package/dist/chains/llm_chain.d.ts +1 -1
- package/dist/chains/llm_chain.js +1 -1
- package/dist/chains/openai_functions/openapi.cjs +3 -1
- package/dist/chains/openai_functions/openapi.js +3 -1
- package/dist/chains/retrieval_qa.cjs +1 -1
- package/dist/chains/retrieval_qa.d.ts +1 -1
- package/dist/chains/retrieval_qa.js +1 -1
- package/dist/load/import_constants.cjs +2 -39
- package/dist/load/import_constants.js +2 -39
- package/dist/load/import_map.cjs +2 -3
- package/dist/load/import_map.d.ts +0 -1
- package/dist/load/import_map.js +0 -1
- package/dist/smith/config.d.ts +1 -5
- package/package.json +35 -858
- package/dist/document_loaders/fs/chatgpt.cjs +0 -90
- package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
- package/dist/document_loaders/fs/chatgpt.js +0 -86
- package/dist/document_loaders/fs/csv.cjs +0 -73
- package/dist/document_loaders/fs/csv.d.ts +0 -65
- package/dist/document_loaders/fs/csv.js +0 -69
- package/dist/document_loaders/fs/docx.cjs +0 -58
- package/dist/document_loaders/fs/docx.d.ts +0 -25
- package/dist/document_loaders/fs/docx.js +0 -54
- package/dist/document_loaders/fs/epub.cjs +0 -103
- package/dist/document_loaders/fs/epub.d.ts +0 -33
- package/dist/document_loaders/fs/epub.js +0 -99
- package/dist/document_loaders/fs/notion.cjs +0 -26
- package/dist/document_loaders/fs/notion.d.ts +0 -12
- package/dist/document_loaders/fs/notion.js +0 -22
- package/dist/document_loaders/fs/obsidian.cjs +0 -247
- package/dist/document_loaders/fs/obsidian.d.ts +0 -28
- package/dist/document_loaders/fs/obsidian.js +0 -240
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
- package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
- package/dist/document_loaders/fs/pdf.cjs +0 -148
- package/dist/document_loaders/fs/pdf.d.ts +0 -49
- package/dist/document_loaders/fs/pdf.js +0 -144
- package/dist/document_loaders/fs/pptx.cjs +0 -46
- package/dist/document_loaders/fs/pptx.d.ts +0 -25
- package/dist/document_loaders/fs/pptx.js +0 -42
- package/dist/document_loaders/fs/srt.cjs +0 -57
- package/dist/document_loaders/fs/srt.d.ts +0 -32
- package/dist/document_loaders/fs/srt.js +0 -50
- package/dist/document_loaders/fs/unstructured.cjs +0 -338
- package/dist/document_loaders/fs/unstructured.d.ts +0 -125
- package/dist/document_loaders/fs/unstructured.js +0 -333
- package/dist/document_loaders/web/apify_dataset.cjs +0 -130
- package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
- package/dist/document_loaders/web/apify_dataset.js +0 -126
- package/dist/document_loaders/web/assemblyai.cjs +0 -200
- package/dist/document_loaders/web/assemblyai.d.ts +0 -95
- package/dist/document_loaders/web/assemblyai.js +0 -193
- package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
- package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
- package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
- package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
- package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
- package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
- package/dist/document_loaders/web/browserbase.cjs +0 -93
- package/dist/document_loaders/web/browserbase.d.ts +0 -48
- package/dist/document_loaders/web/browserbase.js +0 -86
- package/dist/document_loaders/web/cheerio.cjs +0 -118
- package/dist/document_loaders/web/cheerio.d.ts +0 -77
- package/dist/document_loaders/web/cheerio.js +0 -114
- package/dist/document_loaders/web/college_confidential.cjs +0 -41
- package/dist/document_loaders/web/college_confidential.d.ts +0 -25
- package/dist/document_loaders/web/college_confidential.js +0 -37
- package/dist/document_loaders/web/confluence.cjs +0 -190
- package/dist/document_loaders/web/confluence.d.ts +0 -114
- package/dist/document_loaders/web/confluence.js +0 -186
- package/dist/document_loaders/web/couchbase.cjs +0 -95
- package/dist/document_loaders/web/couchbase.d.ts +0 -32
- package/dist/document_loaders/web/couchbase.js +0 -91
- package/dist/document_loaders/web/figma.cjs +0 -102
- package/dist/document_loaders/web/figma.d.ts +0 -82
- package/dist/document_loaders/web/figma.js +0 -98
- package/dist/document_loaders/web/firecrawl.cjs +0 -95
- package/dist/document_loaders/web/firecrawl.d.ts +0 -50
- package/dist/document_loaders/web/firecrawl.js +0 -88
- package/dist/document_loaders/web/gitbook.cjs +0 -110
- package/dist/document_loaders/web/gitbook.d.ts +0 -55
- package/dist/document_loaders/web/gitbook.js +0 -106
- package/dist/document_loaders/web/github.cjs +0 -615
- package/dist/document_loaders/web/github.d.ts +0 -203
- package/dist/document_loaders/web/github.js +0 -608
- package/dist/document_loaders/web/hn.cjs +0 -90
- package/dist/document_loaders/web/hn.d.ts +0 -42
- package/dist/document_loaders/web/hn.js +0 -86
- package/dist/document_loaders/web/imsdb.cjs +0 -44
- package/dist/document_loaders/web/imsdb.d.ts +0 -23
- package/dist/document_loaders/web/imsdb.js +0 -40
- package/dist/document_loaders/web/notionapi.cjs +0 -404
- package/dist/document_loaders/web/notionapi.d.ts +0 -133
- package/dist/document_loaders/web/notionapi.js +0 -392
- package/dist/document_loaders/web/notiondb.cjs +0 -199
- package/dist/document_loaders/web/notiondb.d.ts +0 -56
- package/dist/document_loaders/web/notiondb.js +0 -195
- package/dist/document_loaders/web/pdf.cjs +0 -140
- package/dist/document_loaders/web/pdf.d.ts +0 -35
- package/dist/document_loaders/web/pdf.js +0 -136
- package/dist/document_loaders/web/playwright.cjs +0 -89
- package/dist/document_loaders/web/playwright.d.ts +0 -58
- package/dist/document_loaders/web/playwright.js +0 -85
- package/dist/document_loaders/web/puppeteer.cjs +0 -139
- package/dist/document_loaders/web/puppeteer.d.ts +0 -82
- package/dist/document_loaders/web/puppeteer.js +0 -135
- package/dist/document_loaders/web/recursive_url.cjs +0 -198
- package/dist/document_loaders/web/recursive_url.d.ts +0 -33
- package/dist/document_loaders/web/recursive_url.js +0 -194
- package/dist/document_loaders/web/s3.cjs +0 -164
- package/dist/document_loaders/web/s3.d.ts +0 -78
- package/dist/document_loaders/web/s3.js +0 -137
- package/dist/document_loaders/web/searchapi.cjs +0 -150
- package/dist/document_loaders/web/searchapi.d.ts +0 -76
- package/dist/document_loaders/web/searchapi.js +0 -146
- package/dist/document_loaders/web/serpapi.cjs +0 -127
- package/dist/document_loaders/web/serpapi.d.ts +0 -62
- package/dist/document_loaders/web/serpapi.js +0 -123
- package/dist/document_loaders/web/sitemap.cjs +0 -118
- package/dist/document_loaders/web/sitemap.d.ts +0 -41
- package/dist/document_loaders/web/sitemap.js +0 -114
- package/dist/document_loaders/web/sonix_audio.cjs +0 -68
- package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
- package/dist/document_loaders/web/sonix_audio.js +0 -64
- package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
- package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
- package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
- package/dist/document_loaders/web/youtube.cjs +0 -116
- package/dist/document_loaders/web/youtube.d.ts +0 -55
- package/dist/document_loaders/web/youtube.js +0 -112
- package/dist/experimental/chrome_ai/app/dist/bundle.cjs +0 -1250
- package/dist/experimental/chrome_ai/app/dist/bundle.d.ts +0 -1
- package/dist/experimental/chrome_ai/app/dist/bundle.js +0 -1249
- package/dist/experimental/tools/pyinterpreter.cjs +0 -248
- package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
- package/dist/experimental/tools/pyinterpreter.js +0 -244
- package/dist/retrievers/self_query/chroma.cjs +0 -48
- package/dist/retrievers/self_query/chroma.d.ts +0 -26
- package/dist/retrievers/self_query/chroma.js +0 -44
- package/dist/retrievers/self_query/pinecone.cjs +0 -47
- package/dist/retrievers/self_query/pinecone.d.ts +0 -26
- package/dist/retrievers/self_query/pinecone.js +0 -43
- package/dist/retrievers/self_query/supabase.cjs +0 -278
- package/dist/retrievers/self_query/supabase.d.ts +0 -109
- package/dist/retrievers/self_query/supabase.js +0 -274
- package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
- package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
- package/dist/retrievers/self_query/supabase_utils.js +0 -259
- package/dist/retrievers/self_query/vectara.cjs +0 -143
- package/dist/retrievers/self_query/vectara.d.ts +0 -42
- package/dist/retrievers/self_query/vectara.js +0 -139
- package/dist/retrievers/self_query/weaviate.cjs +0 -201
- package/dist/retrievers/self_query/weaviate.d.ts +0 -99
- package/dist/retrievers/self_query/weaviate.js +0 -197
- package/dist/types/assemblyai-types.cjs +0 -2
- package/dist/types/assemblyai-types.d.ts +0 -4
- package/dist/types/assemblyai-types.js +0 -1
- package/document_loaders/fs/chatgpt.cjs +0 -1
- package/document_loaders/fs/chatgpt.d.cts +0 -1
- package/document_loaders/fs/chatgpt.d.ts +0 -1
- package/document_loaders/fs/chatgpt.js +0 -1
- package/document_loaders/fs/csv.cjs +0 -1
- package/document_loaders/fs/csv.d.cts +0 -1
- package/document_loaders/fs/csv.d.ts +0 -1
- package/document_loaders/fs/csv.js +0 -1
- package/document_loaders/fs/docx.cjs +0 -1
- package/document_loaders/fs/docx.d.cts +0 -1
- package/document_loaders/fs/docx.d.ts +0 -1
- package/document_loaders/fs/docx.js +0 -1
- package/document_loaders/fs/epub.cjs +0 -1
- package/document_loaders/fs/epub.d.cts +0 -1
- package/document_loaders/fs/epub.d.ts +0 -1
- package/document_loaders/fs/epub.js +0 -1
- package/document_loaders/fs/notion.cjs +0 -1
- package/document_loaders/fs/notion.d.cts +0 -1
- package/document_loaders/fs/notion.d.ts +0 -1
- package/document_loaders/fs/notion.js +0 -1
- package/document_loaders/fs/obsidian.cjs +0 -1
- package/document_loaders/fs/obsidian.d.cts +0 -1
- package/document_loaders/fs/obsidian.d.ts +0 -1
- package/document_loaders/fs/obsidian.js +0 -1
- package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
- package/document_loaders/fs/openai_whisper_audio.js +0 -1
- package/document_loaders/fs/pdf.cjs +0 -1
- package/document_loaders/fs/pdf.d.cts +0 -1
- package/document_loaders/fs/pdf.d.ts +0 -1
- package/document_loaders/fs/pdf.js +0 -1
- package/document_loaders/fs/pptx.cjs +0 -1
- package/document_loaders/fs/pptx.d.cts +0 -1
- package/document_loaders/fs/pptx.d.ts +0 -1
- package/document_loaders/fs/pptx.js +0 -1
- package/document_loaders/fs/srt.cjs +0 -1
- package/document_loaders/fs/srt.d.cts +0 -1
- package/document_loaders/fs/srt.d.ts +0 -1
- package/document_loaders/fs/srt.js +0 -1
- package/document_loaders/fs/unstructured.cjs +0 -1
- package/document_loaders/fs/unstructured.d.cts +0 -1
- package/document_loaders/fs/unstructured.d.ts +0 -1
- package/document_loaders/fs/unstructured.js +0 -1
- package/document_loaders/web/apify_dataset.cjs +0 -1
- package/document_loaders/web/apify_dataset.d.cts +0 -1
- package/document_loaders/web/apify_dataset.d.ts +0 -1
- package/document_loaders/web/apify_dataset.js +0 -1
- package/document_loaders/web/assemblyai.cjs +0 -1
- package/document_loaders/web/assemblyai.d.cts +0 -1
- package/document_loaders/web/assemblyai.d.ts +0 -1
- package/document_loaders/web/assemblyai.js +0 -1
- package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_container.js +0 -1
- package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_file.js +0 -1
- package/document_loaders/web/browserbase.cjs +0 -1
- package/document_loaders/web/browserbase.d.cts +0 -1
- package/document_loaders/web/browserbase.d.ts +0 -1
- package/document_loaders/web/browserbase.js +0 -1
- package/document_loaders/web/cheerio.cjs +0 -1
- package/document_loaders/web/cheerio.d.cts +0 -1
- package/document_loaders/web/cheerio.d.ts +0 -1
- package/document_loaders/web/cheerio.js +0 -1
- package/document_loaders/web/college_confidential.cjs +0 -1
- package/document_loaders/web/college_confidential.d.cts +0 -1
- package/document_loaders/web/college_confidential.d.ts +0 -1
- package/document_loaders/web/college_confidential.js +0 -1
- package/document_loaders/web/confluence.cjs +0 -1
- package/document_loaders/web/confluence.d.cts +0 -1
- package/document_loaders/web/confluence.d.ts +0 -1
- package/document_loaders/web/confluence.js +0 -1
- package/document_loaders/web/couchbase.cjs +0 -1
- package/document_loaders/web/couchbase.d.cts +0 -1
- package/document_loaders/web/couchbase.d.ts +0 -1
- package/document_loaders/web/couchbase.js +0 -1
- package/document_loaders/web/figma.cjs +0 -1
- package/document_loaders/web/figma.d.cts +0 -1
- package/document_loaders/web/figma.d.ts +0 -1
- package/document_loaders/web/figma.js +0 -1
- package/document_loaders/web/firecrawl.cjs +0 -1
- package/document_loaders/web/firecrawl.d.cts +0 -1
- package/document_loaders/web/firecrawl.d.ts +0 -1
- package/document_loaders/web/firecrawl.js +0 -1
- package/document_loaders/web/gitbook.cjs +0 -1
- package/document_loaders/web/gitbook.d.cts +0 -1
- package/document_loaders/web/gitbook.d.ts +0 -1
- package/document_loaders/web/gitbook.js +0 -1
- package/document_loaders/web/github.cjs +0 -1
- package/document_loaders/web/github.d.cts +0 -1
- package/document_loaders/web/github.d.ts +0 -1
- package/document_loaders/web/github.js +0 -1
- package/document_loaders/web/hn.cjs +0 -1
- package/document_loaders/web/hn.d.cts +0 -1
- package/document_loaders/web/hn.d.ts +0 -1
- package/document_loaders/web/hn.js +0 -1
- package/document_loaders/web/imsdb.cjs +0 -1
- package/document_loaders/web/imsdb.d.cts +0 -1
- package/document_loaders/web/imsdb.d.ts +0 -1
- package/document_loaders/web/imsdb.js +0 -1
- package/document_loaders/web/notionapi.cjs +0 -1
- package/document_loaders/web/notionapi.d.cts +0 -1
- package/document_loaders/web/notionapi.d.ts +0 -1
- package/document_loaders/web/notionapi.js +0 -1
- package/document_loaders/web/notiondb.cjs +0 -1
- package/document_loaders/web/notiondb.d.cts +0 -1
- package/document_loaders/web/notiondb.d.ts +0 -1
- package/document_loaders/web/notiondb.js +0 -1
- package/document_loaders/web/pdf.cjs +0 -1
- package/document_loaders/web/pdf.d.cts +0 -1
- package/document_loaders/web/pdf.d.ts +0 -1
- package/document_loaders/web/pdf.js +0 -1
- package/document_loaders/web/playwright.cjs +0 -1
- package/document_loaders/web/playwright.d.cts +0 -1
- package/document_loaders/web/playwright.d.ts +0 -1
- package/document_loaders/web/playwright.js +0 -1
- package/document_loaders/web/puppeteer.cjs +0 -1
- package/document_loaders/web/puppeteer.d.cts +0 -1
- package/document_loaders/web/puppeteer.d.ts +0 -1
- package/document_loaders/web/puppeteer.js +0 -1
- package/document_loaders/web/recursive_url.cjs +0 -1
- package/document_loaders/web/recursive_url.d.cts +0 -1
- package/document_loaders/web/recursive_url.d.ts +0 -1
- package/document_loaders/web/recursive_url.js +0 -1
- package/document_loaders/web/s3.cjs +0 -1
- package/document_loaders/web/s3.d.cts +0 -1
- package/document_loaders/web/s3.d.ts +0 -1
- package/document_loaders/web/s3.js +0 -1
- package/document_loaders/web/searchapi.cjs +0 -1
- package/document_loaders/web/searchapi.d.cts +0 -1
- package/document_loaders/web/searchapi.d.ts +0 -1
- package/document_loaders/web/searchapi.js +0 -1
- package/document_loaders/web/serpapi.cjs +0 -1
- package/document_loaders/web/serpapi.d.cts +0 -1
- package/document_loaders/web/serpapi.d.ts +0 -1
- package/document_loaders/web/serpapi.js +0 -1
- package/document_loaders/web/sitemap.cjs +0 -1
- package/document_loaders/web/sitemap.d.cts +0 -1
- package/document_loaders/web/sitemap.d.ts +0 -1
- package/document_loaders/web/sitemap.js +0 -1
- package/document_loaders/web/sonix_audio.cjs +0 -1
- package/document_loaders/web/sonix_audio.d.cts +0 -1
- package/document_loaders/web/sonix_audio.d.ts +0 -1
- package/document_loaders/web/sonix_audio.js +0 -1
- package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.js +0 -1
- package/document_loaders/web/youtube.cjs +0 -1
- package/document_loaders/web/youtube.d.cts +0 -1
- package/document_loaders/web/youtube.d.ts +0 -1
- package/document_loaders/web/youtube.js +0 -1
- package/experimental/tools/pyinterpreter.cjs +0 -1
- package/experimental/tools/pyinterpreter.d.cts +0 -1
- package/experimental/tools/pyinterpreter.d.ts +0 -1
- package/experimental/tools/pyinterpreter.js +0 -1
- package/memory/index.cjs +0 -1
- package/memory/index.d.cts +0 -1
- package/memory/index.d.ts +0 -1
- package/memory/index.js +0 -1
- package/retrievers/self_query/chroma.cjs +0 -1
- package/retrievers/self_query/chroma.d.cts +0 -1
- package/retrievers/self_query/chroma.d.ts +0 -1
- package/retrievers/self_query/chroma.js +0 -1
- package/retrievers/self_query/pinecone.cjs +0 -1
- package/retrievers/self_query/pinecone.d.cts +0 -1
- package/retrievers/self_query/pinecone.d.ts +0 -1
- package/retrievers/self_query/pinecone.js +0 -1
- package/retrievers/self_query/supabase.cjs +0 -1
- package/retrievers/self_query/supabase.d.cts +0 -1
- package/retrievers/self_query/supabase.d.ts +0 -1
- package/retrievers/self_query/supabase.js +0 -1
- package/retrievers/self_query/vectara.cjs +0 -1
- package/retrievers/self_query/vectara.d.cts +0 -1
- package/retrievers/self_query/vectara.d.ts +0 -1
- package/retrievers/self_query/vectara.js +0 -1
- package/retrievers/self_query/weaviate.cjs +0 -1
- package/retrievers/self_query/weaviate.d.cts +0 -1
- package/retrievers/self_query/weaviate.d.ts +0 -1
- package/retrievers/self_query/weaviate.js +0 -1
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.ChatGPTLoader = void 0;
|
|
4
|
-
const documents_1 = require("@langchain/core/documents");
|
|
5
|
-
const text_js_1 = require("./text.cjs");
|
|
6
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
7
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
8
|
-
oldEntrypointName: "document_loaders/fs/chatgpt",
|
|
9
|
-
newPackageName: "@langchain/community",
|
|
10
|
-
});
|
|
11
|
-
function concatenateRows(message, title) {
|
|
12
|
-
/**
|
|
13
|
-
* Combine message information in a readable format ready to be used.
|
|
14
|
-
* @param {ChatGPTMessage} message - Message to be concatenated
|
|
15
|
-
* @param {string} title - Title of the conversation
|
|
16
|
-
*
|
|
17
|
-
* @returns {string} Concatenated message
|
|
18
|
-
*/
|
|
19
|
-
if (!message) {
|
|
20
|
-
return "";
|
|
21
|
-
}
|
|
22
|
-
const sender = message.author ? message.author.role : "unknown";
|
|
23
|
-
const text = message.content.parts[0];
|
|
24
|
-
const date = new Date(message.create_time * 1000)
|
|
25
|
-
.toISOString()
|
|
26
|
-
.slice(0, 19)
|
|
27
|
-
.replace("T", " ");
|
|
28
|
-
return `${title} - ${sender} on ${date}: ${text}\n\n`;
|
|
29
|
-
}
|
|
30
|
-
class ChatGPTLoader extends text_js_1.TextLoader {
|
|
31
|
-
constructor(filePathOrBlob, numLogs = 0) {
|
|
32
|
-
super(filePathOrBlob);
|
|
33
|
-
Object.defineProperty(this, "numLogs", {
|
|
34
|
-
enumerable: true,
|
|
35
|
-
configurable: true,
|
|
36
|
-
writable: true,
|
|
37
|
-
value: void 0
|
|
38
|
-
});
|
|
39
|
-
this.numLogs = numLogs;
|
|
40
|
-
}
|
|
41
|
-
async parse(raw) {
|
|
42
|
-
let data;
|
|
43
|
-
try {
|
|
44
|
-
data = JSON.parse(raw);
|
|
45
|
-
}
|
|
46
|
-
catch (e) {
|
|
47
|
-
console.error(e);
|
|
48
|
-
throw new Error("Failed to parse JSON");
|
|
49
|
-
}
|
|
50
|
-
const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data;
|
|
51
|
-
return truncatedData.map((d) => Object.values(d.mapping)
|
|
52
|
-
.filter((msg, idx) => !(idx === 0 && msg.message.author.role === "system"))
|
|
53
|
-
.map((msg) => concatenateRows(msg.message, d.title))
|
|
54
|
-
.join(""));
|
|
55
|
-
}
|
|
56
|
-
async load() {
|
|
57
|
-
let text;
|
|
58
|
-
let metadata;
|
|
59
|
-
if (typeof this.filePathOrBlob === "string") {
|
|
60
|
-
const { readFile } = await text_js_1.TextLoader.imports();
|
|
61
|
-
try {
|
|
62
|
-
text = await readFile(this.filePathOrBlob, "utf8");
|
|
63
|
-
}
|
|
64
|
-
catch (e) {
|
|
65
|
-
console.error(e);
|
|
66
|
-
throw new Error("Failed to read file");
|
|
67
|
-
}
|
|
68
|
-
metadata = { source: this.filePathOrBlob };
|
|
69
|
-
}
|
|
70
|
-
else {
|
|
71
|
-
try {
|
|
72
|
-
text = await this.filePathOrBlob.text();
|
|
73
|
-
}
|
|
74
|
-
catch (e) {
|
|
75
|
-
console.error(e);
|
|
76
|
-
throw new Error("Failed to read blob");
|
|
77
|
-
}
|
|
78
|
-
metadata = { source: "blob", blobType: this.filePathOrBlob.type };
|
|
79
|
-
}
|
|
80
|
-
const parsed = await this.parse(text);
|
|
81
|
-
return parsed.map((pageContent, i) => new documents_1.Document({
|
|
82
|
-
pageContent,
|
|
83
|
-
metadata: {
|
|
84
|
-
...metadata,
|
|
85
|
-
logIndex: i + 1,
|
|
86
|
-
},
|
|
87
|
-
}));
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
exports.ChatGPTLoader = ChatGPTLoader;
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { TextLoader } from "./text.js";
|
|
3
|
-
export declare class ChatGPTLoader extends TextLoader {
|
|
4
|
-
numLogs: number;
|
|
5
|
-
constructor(filePathOrBlob: string | Blob, numLogs?: number);
|
|
6
|
-
protected parse(raw: string): Promise<string[]>;
|
|
7
|
-
load(): Promise<Document[]>;
|
|
8
|
-
}
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { TextLoader } from "./text.js";
|
|
3
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
4
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
5
|
-
oldEntrypointName: "document_loaders/fs/chatgpt",
|
|
6
|
-
newPackageName: "@langchain/community",
|
|
7
|
-
});
|
|
8
|
-
function concatenateRows(message, title) {
|
|
9
|
-
/**
|
|
10
|
-
* Combine message information in a readable format ready to be used.
|
|
11
|
-
* @param {ChatGPTMessage} message - Message to be concatenated
|
|
12
|
-
* @param {string} title - Title of the conversation
|
|
13
|
-
*
|
|
14
|
-
* @returns {string} Concatenated message
|
|
15
|
-
*/
|
|
16
|
-
if (!message) {
|
|
17
|
-
return "";
|
|
18
|
-
}
|
|
19
|
-
const sender = message.author ? message.author.role : "unknown";
|
|
20
|
-
const text = message.content.parts[0];
|
|
21
|
-
const date = new Date(message.create_time * 1000)
|
|
22
|
-
.toISOString()
|
|
23
|
-
.slice(0, 19)
|
|
24
|
-
.replace("T", " ");
|
|
25
|
-
return `${title} - ${sender} on ${date}: ${text}\n\n`;
|
|
26
|
-
}
|
|
27
|
-
export class ChatGPTLoader extends TextLoader {
|
|
28
|
-
constructor(filePathOrBlob, numLogs = 0) {
|
|
29
|
-
super(filePathOrBlob);
|
|
30
|
-
Object.defineProperty(this, "numLogs", {
|
|
31
|
-
enumerable: true,
|
|
32
|
-
configurable: true,
|
|
33
|
-
writable: true,
|
|
34
|
-
value: void 0
|
|
35
|
-
});
|
|
36
|
-
this.numLogs = numLogs;
|
|
37
|
-
}
|
|
38
|
-
async parse(raw) {
|
|
39
|
-
let data;
|
|
40
|
-
try {
|
|
41
|
-
data = JSON.parse(raw);
|
|
42
|
-
}
|
|
43
|
-
catch (e) {
|
|
44
|
-
console.error(e);
|
|
45
|
-
throw new Error("Failed to parse JSON");
|
|
46
|
-
}
|
|
47
|
-
const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data;
|
|
48
|
-
return truncatedData.map((d) => Object.values(d.mapping)
|
|
49
|
-
.filter((msg, idx) => !(idx === 0 && msg.message.author.role === "system"))
|
|
50
|
-
.map((msg) => concatenateRows(msg.message, d.title))
|
|
51
|
-
.join(""));
|
|
52
|
-
}
|
|
53
|
-
async load() {
|
|
54
|
-
let text;
|
|
55
|
-
let metadata;
|
|
56
|
-
if (typeof this.filePathOrBlob === "string") {
|
|
57
|
-
const { readFile } = await TextLoader.imports();
|
|
58
|
-
try {
|
|
59
|
-
text = await readFile(this.filePathOrBlob, "utf8");
|
|
60
|
-
}
|
|
61
|
-
catch (e) {
|
|
62
|
-
console.error(e);
|
|
63
|
-
throw new Error("Failed to read file");
|
|
64
|
-
}
|
|
65
|
-
metadata = { source: this.filePathOrBlob };
|
|
66
|
-
}
|
|
67
|
-
else {
|
|
68
|
-
try {
|
|
69
|
-
text = await this.filePathOrBlob.text();
|
|
70
|
-
}
|
|
71
|
-
catch (e) {
|
|
72
|
-
console.error(e);
|
|
73
|
-
throw new Error("Failed to read blob");
|
|
74
|
-
}
|
|
75
|
-
metadata = { source: "blob", blobType: this.filePathOrBlob.type };
|
|
76
|
-
}
|
|
77
|
-
const parsed = await this.parse(text);
|
|
78
|
-
return parsed.map((pageContent, i) => new Document({
|
|
79
|
-
pageContent,
|
|
80
|
-
metadata: {
|
|
81
|
-
...metadata,
|
|
82
|
-
logIndex: i + 1,
|
|
83
|
-
},
|
|
84
|
-
}));
|
|
85
|
-
}
|
|
86
|
-
}
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CSVLoader = void 0;
|
|
4
|
-
const text_js_1 = require("./text.cjs");
|
|
5
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
6
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
7
|
-
oldEntrypointName: "document_loaders/fs/csv",
|
|
8
|
-
newPackageName: "@langchain/community",
|
|
9
|
-
});
|
|
10
|
-
/**
|
|
11
|
-
* A class that extends the TextLoader class. It represents a document
|
|
12
|
-
* loader that loads documents from a CSV file. It has a constructor that
|
|
13
|
-
* takes a `filePathOrBlob` parameter representing the path to the CSV
|
|
14
|
-
* file or a Blob object, and an optional `options` parameter of type
|
|
15
|
-
* `CSVLoaderOptions` or a string representing the column to use as the
|
|
16
|
-
* document's pageContent.
|
|
17
|
-
*/
|
|
18
|
-
class CSVLoader extends text_js_1.TextLoader {
|
|
19
|
-
constructor(filePathOrBlob, options) {
|
|
20
|
-
super(filePathOrBlob);
|
|
21
|
-
Object.defineProperty(this, "options", {
|
|
22
|
-
enumerable: true,
|
|
23
|
-
configurable: true,
|
|
24
|
-
writable: true,
|
|
25
|
-
value: {}
|
|
26
|
-
});
|
|
27
|
-
if (typeof options === "string") {
|
|
28
|
-
this.options = { column: options };
|
|
29
|
-
}
|
|
30
|
-
else {
|
|
31
|
-
this.options = options ?? this.options;
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
/**
|
|
35
|
-
* A protected method that parses the raw CSV data and returns an array of
|
|
36
|
-
* strings representing the pageContent of each document. It uses the
|
|
37
|
-
* `dsvFormat` function from the `d3-dsv` module to parse the CSV data. If
|
|
38
|
-
* the `column` option is specified, it checks if the column exists in the
|
|
39
|
-
* CSV file and returns the values of that column as the pageContent. If
|
|
40
|
-
* the `column` option is not specified, it converts each row of the CSV
|
|
41
|
-
* data into key/value pairs and joins them with newline characters.
|
|
42
|
-
* @param raw The raw CSV data to be parsed.
|
|
43
|
-
* @returns An array of strings representing the pageContent of each document.
|
|
44
|
-
*/
|
|
45
|
-
async parse(raw) {
|
|
46
|
-
const { column, separator = "," } = this.options;
|
|
47
|
-
const { dsvFormat } = await CSVLoaderImports();
|
|
48
|
-
const psv = dsvFormat(separator);
|
|
49
|
-
const parsed = psv.parse(raw.trim());
|
|
50
|
-
if (column !== undefined) {
|
|
51
|
-
if (!parsed.columns.includes(column)) {
|
|
52
|
-
throw new Error(`Column ${column} not found in CSV file.`);
|
|
53
|
-
}
|
|
54
|
-
// Note TextLoader will raise an exception if the value is null.
|
|
55
|
-
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
56
|
-
return parsed.map((row) => row[column]);
|
|
57
|
-
}
|
|
58
|
-
return parsed.map((row) => Object.keys(row)
|
|
59
|
-
.map((key) => `${key.trim()}: ${row[key]?.trim()}`)
|
|
60
|
-
.join("\n"));
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
exports.CSVLoader = CSVLoader;
|
|
64
|
-
async function CSVLoaderImports() {
|
|
65
|
-
try {
|
|
66
|
-
const { dsvFormat } = await import("d3-dsv");
|
|
67
|
-
return { dsvFormat };
|
|
68
|
-
}
|
|
69
|
-
catch (e) {
|
|
70
|
-
console.error(e);
|
|
71
|
-
throw new Error("Please install d3-dsv as a dependency with, e.g. `yarn add d3-dsv@2`");
|
|
72
|
-
}
|
|
73
|
-
}
|
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
import { TextLoader } from "./text.js";
|
|
2
|
-
/**
|
|
3
|
-
* Loads a CSV file into a list of documents.
|
|
4
|
-
* Each document represents one row of the CSV file.
|
|
5
|
-
*
|
|
6
|
-
* When `column` is not specified, each row is converted into a key/value pair
|
|
7
|
-
* with each key/value pair outputted to a new line in the document's pageContent.
|
|
8
|
-
*
|
|
9
|
-
* @example
|
|
10
|
-
* // CSV file:
|
|
11
|
-
* // id,html
|
|
12
|
-
* // 1,<i>Corruption discovered at the core of the Banking Clan!</i>
|
|
13
|
-
* // 2,<i>Corruption discovered at the core of the Banking Clan!</i>
|
|
14
|
-
*
|
|
15
|
-
* const loader = new CSVLoader("path/to/file.csv");
|
|
16
|
-
* const docs = await loader.load();
|
|
17
|
-
*
|
|
18
|
-
* // docs[0].pageContent:
|
|
19
|
-
* // id: 1
|
|
20
|
-
* // html: <i>Corruption discovered at the core of the Banking Clan!</i>
|
|
21
|
-
*
|
|
22
|
-
* When `column` is specified, one document is created for each row, and the
|
|
23
|
-
* value of the specified column is used as the document's pageContent.
|
|
24
|
-
*
|
|
25
|
-
* @example
|
|
26
|
-
* // CSV file:
|
|
27
|
-
* // id,html
|
|
28
|
-
* // 1,<i>Corruption discovered at the core of the Banking Clan!</i>
|
|
29
|
-
* // 2,<i>Corruption discovered at the core of the Banking Clan!</i>
|
|
30
|
-
*
|
|
31
|
-
* const loader = new CSVLoader("path/to/file.csv", "html");
|
|
32
|
-
* const docs = await loader.load();
|
|
33
|
-
*
|
|
34
|
-
* // docs[0].pageContent:
|
|
35
|
-
* // <i>Corruption discovered at the core of the Banking Clan!</i>
|
|
36
|
-
*/
|
|
37
|
-
type CSVLoaderOptions = {
|
|
38
|
-
column?: string;
|
|
39
|
-
separator?: string;
|
|
40
|
-
};
|
|
41
|
-
/**
|
|
42
|
-
* A class that extends the TextLoader class. It represents a document
|
|
43
|
-
* loader that loads documents from a CSV file. It has a constructor that
|
|
44
|
-
* takes a `filePathOrBlob` parameter representing the path to the CSV
|
|
45
|
-
* file or a Blob object, and an optional `options` parameter of type
|
|
46
|
-
* `CSVLoaderOptions` or a string representing the column to use as the
|
|
47
|
-
* document's pageContent.
|
|
48
|
-
*/
|
|
49
|
-
export declare class CSVLoader extends TextLoader {
|
|
50
|
-
protected options: CSVLoaderOptions;
|
|
51
|
-
constructor(filePathOrBlob: string | Blob, options?: CSVLoaderOptions | string);
|
|
52
|
-
/**
|
|
53
|
-
* A protected method that parses the raw CSV data and returns an array of
|
|
54
|
-
* strings representing the pageContent of each document. It uses the
|
|
55
|
-
* `dsvFormat` function from the `d3-dsv` module to parse the CSV data. If
|
|
56
|
-
* the `column` option is specified, it checks if the column exists in the
|
|
57
|
-
* CSV file and returns the values of that column as the pageContent. If
|
|
58
|
-
* the `column` option is not specified, it converts each row of the CSV
|
|
59
|
-
* data into key/value pairs and joins them with newline characters.
|
|
60
|
-
* @param raw The raw CSV data to be parsed.
|
|
61
|
-
* @returns An array of strings representing the pageContent of each document.
|
|
62
|
-
*/
|
|
63
|
-
protected parse(raw: string): Promise<string[]>;
|
|
64
|
-
}
|
|
65
|
-
export {};
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import { TextLoader } from "./text.js";
|
|
2
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
3
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
4
|
-
oldEntrypointName: "document_loaders/fs/csv",
|
|
5
|
-
newPackageName: "@langchain/community",
|
|
6
|
-
});
|
|
7
|
-
/**
|
|
8
|
-
* A class that extends the TextLoader class. It represents a document
|
|
9
|
-
* loader that loads documents from a CSV file. It has a constructor that
|
|
10
|
-
* takes a `filePathOrBlob` parameter representing the path to the CSV
|
|
11
|
-
* file or a Blob object, and an optional `options` parameter of type
|
|
12
|
-
* `CSVLoaderOptions` or a string representing the column to use as the
|
|
13
|
-
* document's pageContent.
|
|
14
|
-
*/
|
|
15
|
-
export class CSVLoader extends TextLoader {
|
|
16
|
-
constructor(filePathOrBlob, options) {
|
|
17
|
-
super(filePathOrBlob);
|
|
18
|
-
Object.defineProperty(this, "options", {
|
|
19
|
-
enumerable: true,
|
|
20
|
-
configurable: true,
|
|
21
|
-
writable: true,
|
|
22
|
-
value: {}
|
|
23
|
-
});
|
|
24
|
-
if (typeof options === "string") {
|
|
25
|
-
this.options = { column: options };
|
|
26
|
-
}
|
|
27
|
-
else {
|
|
28
|
-
this.options = options ?? this.options;
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
/**
|
|
32
|
-
* A protected method that parses the raw CSV data and returns an array of
|
|
33
|
-
* strings representing the pageContent of each document. It uses the
|
|
34
|
-
* `dsvFormat` function from the `d3-dsv` module to parse the CSV data. If
|
|
35
|
-
* the `column` option is specified, it checks if the column exists in the
|
|
36
|
-
* CSV file and returns the values of that column as the pageContent. If
|
|
37
|
-
* the `column` option is not specified, it converts each row of the CSV
|
|
38
|
-
* data into key/value pairs and joins them with newline characters.
|
|
39
|
-
* @param raw The raw CSV data to be parsed.
|
|
40
|
-
* @returns An array of strings representing the pageContent of each document.
|
|
41
|
-
*/
|
|
42
|
-
async parse(raw) {
|
|
43
|
-
const { column, separator = "," } = this.options;
|
|
44
|
-
const { dsvFormat } = await CSVLoaderImports();
|
|
45
|
-
const psv = dsvFormat(separator);
|
|
46
|
-
const parsed = psv.parse(raw.trim());
|
|
47
|
-
if (column !== undefined) {
|
|
48
|
-
if (!parsed.columns.includes(column)) {
|
|
49
|
-
throw new Error(`Column ${column} not found in CSV file.`);
|
|
50
|
-
}
|
|
51
|
-
// Note TextLoader will raise an exception if the value is null.
|
|
52
|
-
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
53
|
-
return parsed.map((row) => row[column]);
|
|
54
|
-
}
|
|
55
|
-
return parsed.map((row) => Object.keys(row)
|
|
56
|
-
.map((key) => `${key.trim()}: ${row[key]?.trim()}`)
|
|
57
|
-
.join("\n"));
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
async function CSVLoaderImports() {
|
|
61
|
-
try {
|
|
62
|
-
const { dsvFormat } = await import("d3-dsv");
|
|
63
|
-
return { dsvFormat };
|
|
64
|
-
}
|
|
65
|
-
catch (e) {
|
|
66
|
-
console.error(e);
|
|
67
|
-
throw new Error("Please install d3-dsv as a dependency with, e.g. `yarn add d3-dsv@2`");
|
|
68
|
-
}
|
|
69
|
-
}
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.DocxLoader = void 0;
|
|
4
|
-
const documents_1 = require("@langchain/core/documents");
|
|
5
|
-
const buffer_js_1 = require("./buffer.cjs");
|
|
6
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
7
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
8
|
-
oldEntrypointName: "document_loaders/fs/docx",
|
|
9
|
-
newPackageName: "@langchain/community",
|
|
10
|
-
});
|
|
11
|
-
/**
|
|
12
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/docx" instead. This entrypoint will be removed in 0.3.0.
|
|
13
|
-
*
|
|
14
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
15
|
-
* loader that loads documents from DOCX files.
|
|
16
|
-
*/
|
|
17
|
-
class DocxLoader extends buffer_js_1.BufferLoader {
|
|
18
|
-
constructor(filePathOrBlob) {
|
|
19
|
-
super(filePathOrBlob);
|
|
20
|
-
}
|
|
21
|
-
/**
|
|
22
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
23
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
24
|
-
* uses the `extractRawText` function from the `mammoth` module to extract
|
|
25
|
-
* the raw text content from the buffer. If the extracted text content is
|
|
26
|
-
* empty, it returns an empty array. Otherwise, it creates a new
|
|
27
|
-
* `Document` instance with the extracted text content and the provided
|
|
28
|
-
* metadata, and returns it as an array.
|
|
29
|
-
* @param raw The raw buffer from which to extract text content.
|
|
30
|
-
* @param metadata The metadata to be associated with the created `Document` instance.
|
|
31
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
32
|
-
*/
|
|
33
|
-
async parse(raw, metadata) {
|
|
34
|
-
const { extractRawText } = await DocxLoaderImports();
|
|
35
|
-
const docx = await extractRawText({
|
|
36
|
-
buffer: raw,
|
|
37
|
-
});
|
|
38
|
-
if (!docx.value)
|
|
39
|
-
return [];
|
|
40
|
-
return [
|
|
41
|
-
new documents_1.Document({
|
|
42
|
-
pageContent: docx.value,
|
|
43
|
-
metadata,
|
|
44
|
-
}),
|
|
45
|
-
];
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
exports.DocxLoader = DocxLoader;
|
|
49
|
-
async function DocxLoaderImports() {
|
|
50
|
-
try {
|
|
51
|
-
const { extractRawText } = await import("mammoth");
|
|
52
|
-
return { extractRawText };
|
|
53
|
-
}
|
|
54
|
-
catch (e) {
|
|
55
|
-
console.error(e);
|
|
56
|
-
throw new Error("Failed to load mammoth. Please install it with eg. `npm install mammoth`.");
|
|
57
|
-
}
|
|
58
|
-
}
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
-
import { Document } from "@langchain/core/documents";
|
|
3
|
-
import { BufferLoader } from "./buffer.js";
|
|
4
|
-
/**
|
|
5
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/docx" instead. This entrypoint will be removed in 0.3.0.
|
|
6
|
-
*
|
|
7
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
8
|
-
* loader that loads documents from DOCX files.
|
|
9
|
-
*/
|
|
10
|
-
export declare class DocxLoader extends BufferLoader {
|
|
11
|
-
constructor(filePathOrBlob: string | Blob);
|
|
12
|
-
/**
|
|
13
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
14
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
15
|
-
* uses the `extractRawText` function from the `mammoth` module to extract
|
|
16
|
-
* the raw text content from the buffer. If the extracted text content is
|
|
17
|
-
* empty, it returns an empty array. Otherwise, it creates a new
|
|
18
|
-
* `Document` instance with the extracted text content and the provided
|
|
19
|
-
* metadata, and returns it as an array.
|
|
20
|
-
* @param raw The raw buffer from which to extract text content.
|
|
21
|
-
* @param metadata The metadata to be associated with the created `Document` instance.
|
|
22
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
23
|
-
*/
|
|
24
|
-
parse(raw: Buffer, metadata: Document["metadata"]): Promise<Document[]>;
|
|
25
|
-
}
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { BufferLoader } from "./buffer.js";
|
|
3
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
4
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
5
|
-
oldEntrypointName: "document_loaders/fs/docx",
|
|
6
|
-
newPackageName: "@langchain/community",
|
|
7
|
-
});
|
|
8
|
-
/**
|
|
9
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/docx" instead. This entrypoint will be removed in 0.3.0.
|
|
10
|
-
*
|
|
11
|
-
* A class that extends the `BufferLoader` class. It represents a document
|
|
12
|
-
* loader that loads documents from DOCX files.
|
|
13
|
-
*/
|
|
14
|
-
export class DocxLoader extends BufferLoader {
|
|
15
|
-
constructor(filePathOrBlob) {
|
|
16
|
-
super(filePathOrBlob);
|
|
17
|
-
}
|
|
18
|
-
/**
|
|
19
|
-
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
20
|
-
* returns a promise that resolves to an array of `Document` instances. It
|
|
21
|
-
* uses the `extractRawText` function from the `mammoth` module to extract
|
|
22
|
-
* the raw text content from the buffer. If the extracted text content is
|
|
23
|
-
* empty, it returns an empty array. Otherwise, it creates a new
|
|
24
|
-
* `Document` instance with the extracted text content and the provided
|
|
25
|
-
* metadata, and returns it as an array.
|
|
26
|
-
* @param raw The raw buffer from which to extract text content.
|
|
27
|
-
* @param metadata The metadata to be associated with the created `Document` instance.
|
|
28
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
29
|
-
*/
|
|
30
|
-
async parse(raw, metadata) {
|
|
31
|
-
const { extractRawText } = await DocxLoaderImports();
|
|
32
|
-
const docx = await extractRawText({
|
|
33
|
-
buffer: raw,
|
|
34
|
-
});
|
|
35
|
-
if (!docx.value)
|
|
36
|
-
return [];
|
|
37
|
-
return [
|
|
38
|
-
new Document({
|
|
39
|
-
pageContent: docx.value,
|
|
40
|
-
metadata,
|
|
41
|
-
}),
|
|
42
|
-
];
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
async function DocxLoaderImports() {
|
|
46
|
-
try {
|
|
47
|
-
const { extractRawText } = await import("mammoth");
|
|
48
|
-
return { extractRawText };
|
|
49
|
-
}
|
|
50
|
-
catch (e) {
|
|
51
|
-
console.error(e);
|
|
52
|
-
throw new Error("Failed to load mammoth. Please install it with eg. `npm install mammoth`.");
|
|
53
|
-
}
|
|
54
|
-
}
|
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.EPubLoader = void 0;
|
|
4
|
-
const documents_1 = require("@langchain/core/documents");
|
|
5
|
-
const base_js_1 = require("../base.cjs");
|
|
6
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
7
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
8
|
-
oldEntrypointName: "document_loaders/fs/epub",
|
|
9
|
-
newPackageName: "@langchain/community",
|
|
10
|
-
});
|
|
11
|
-
/**
|
|
12
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/epub" instead. This entrypoint will be removed in 0.3.0.
|
|
13
|
-
*
|
|
14
|
-
* A class that extends the `BaseDocumentLoader` class. It represents a
|
|
15
|
-
* document loader that loads documents from EPUB files.
|
|
16
|
-
*/
|
|
17
|
-
class EPubLoader extends base_js_1.BaseDocumentLoader {
|
|
18
|
-
constructor(filePath, { splitChapters = true } = {}) {
|
|
19
|
-
super();
|
|
20
|
-
Object.defineProperty(this, "filePath", {
|
|
21
|
-
enumerable: true,
|
|
22
|
-
configurable: true,
|
|
23
|
-
writable: true,
|
|
24
|
-
value: filePath
|
|
25
|
-
});
|
|
26
|
-
Object.defineProperty(this, "splitChapters", {
|
|
27
|
-
enumerable: true,
|
|
28
|
-
configurable: true,
|
|
29
|
-
writable: true,
|
|
30
|
-
value: void 0
|
|
31
|
-
});
|
|
32
|
-
this.splitChapters = splitChapters;
|
|
33
|
-
}
|
|
34
|
-
/**
|
|
35
|
-
* A protected method that takes an EPUB object as a parameter and returns
|
|
36
|
-
* a promise that resolves to an array of objects representing the content
|
|
37
|
-
* and metadata of each chapter.
|
|
38
|
-
* @param epub The EPUB object to parse.
|
|
39
|
-
* @returns A promise that resolves to an array of objects representing the content and metadata of each chapter.
|
|
40
|
-
*/
|
|
41
|
-
async parse(epub) {
|
|
42
|
-
const { htmlToText } = await HtmlToTextImport();
|
|
43
|
-
const chapters = await Promise.all(epub.flow.map(async (chapter) => {
|
|
44
|
-
if (!chapter.id)
|
|
45
|
-
return null;
|
|
46
|
-
const html = await epub.getChapterRawAsync(chapter.id);
|
|
47
|
-
if (!html)
|
|
48
|
-
return null;
|
|
49
|
-
return {
|
|
50
|
-
html,
|
|
51
|
-
title: chapter.title,
|
|
52
|
-
};
|
|
53
|
-
}));
|
|
54
|
-
return chapters.filter(Boolean).map((chapter) => ({
|
|
55
|
-
pageContent: htmlToText(chapter.html),
|
|
56
|
-
metadata: {
|
|
57
|
-
...(chapter.title && { chapter: chapter.title }),
|
|
58
|
-
},
|
|
59
|
-
}));
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* A method that loads the EPUB file and returns a promise that resolves
|
|
63
|
-
* to an array of `Document` instances.
|
|
64
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
65
|
-
*/
|
|
66
|
-
async load() {
|
|
67
|
-
const { EPub } = await EpubImport();
|
|
68
|
-
const epub = await EPub.createAsync(this.filePath);
|
|
69
|
-
const parsed = await this.parse(epub);
|
|
70
|
-
const metadata = { source: this.filePath };
|
|
71
|
-
if (parsed.length === 0)
|
|
72
|
-
return [];
|
|
73
|
-
return this.splitChapters
|
|
74
|
-
? parsed.map((chapter) => new documents_1.Document({
|
|
75
|
-
pageContent: chapter.pageContent,
|
|
76
|
-
metadata: {
|
|
77
|
-
...metadata,
|
|
78
|
-
...chapter.metadata,
|
|
79
|
-
},
|
|
80
|
-
}))
|
|
81
|
-
: [
|
|
82
|
-
new documents_1.Document({
|
|
83
|
-
pageContent: parsed
|
|
84
|
-
.map((chapter) => chapter.pageContent)
|
|
85
|
-
.join("\n\n"),
|
|
86
|
-
metadata,
|
|
87
|
-
}),
|
|
88
|
-
];
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
exports.EPubLoader = EPubLoader;
|
|
92
|
-
async function EpubImport() {
|
|
93
|
-
const { EPub } = await import("epub2").catch(() => {
|
|
94
|
-
throw new Error("Failed to load epub2. Please install it with eg. `npm install epub2`.");
|
|
95
|
-
});
|
|
96
|
-
return { EPub };
|
|
97
|
-
}
|
|
98
|
-
async function HtmlToTextImport() {
|
|
99
|
-
const { htmlToText } = await import("html-to-text").catch(() => {
|
|
100
|
-
throw new Error("Failed to load html-to-text. Please install it with eg. `npm install html-to-text`.");
|
|
101
|
-
});
|
|
102
|
-
return { htmlToText };
|
|
103
|
-
}
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import type { EPub } from "epub2";
|
|
2
|
-
import { Document } from "@langchain/core/documents";
|
|
3
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
4
|
-
/**
|
|
5
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/epub" instead. This entrypoint will be removed in 0.3.0.
|
|
6
|
-
*
|
|
7
|
-
* A class that extends the `BaseDocumentLoader` class. It represents a
|
|
8
|
-
* document loader that loads documents from EPUB files.
|
|
9
|
-
*/
|
|
10
|
-
export declare class EPubLoader extends BaseDocumentLoader {
|
|
11
|
-
filePath: string;
|
|
12
|
-
private splitChapters;
|
|
13
|
-
constructor(filePath: string, { splitChapters }?: {
|
|
14
|
-
splitChapters?: boolean | undefined;
|
|
15
|
-
});
|
|
16
|
-
/**
|
|
17
|
-
* A protected method that takes an EPUB object as a parameter and returns
|
|
18
|
-
* a promise that resolves to an array of objects representing the content
|
|
19
|
-
* and metadata of each chapter.
|
|
20
|
-
* @param epub The EPUB object to parse.
|
|
21
|
-
* @returns A promise that resolves to an array of objects representing the content and metadata of each chapter.
|
|
22
|
-
*/
|
|
23
|
-
protected parse(epub: EPub): Promise<{
|
|
24
|
-
pageContent: string;
|
|
25
|
-
metadata?: object;
|
|
26
|
-
}[]>;
|
|
27
|
-
/**
|
|
28
|
-
* A method that loads the EPUB file and returns a promise that resolves
|
|
29
|
-
* to an array of `Document` instances.
|
|
30
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
31
|
-
*/
|
|
32
|
-
load(): Promise<Document[]>;
|
|
33
|
-
}
|