langchain 0.2.18 → 0.3.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/openai_functions/index.cjs +2 -2
- package/dist/agents/openai_functions/index.js +2 -2
- package/dist/chains/combine_documents/stuff.cjs +2 -2
- package/dist/chains/combine_documents/stuff.js +2 -2
- package/dist/chains/openai_functions/openapi.cjs +3 -1
- package/dist/chains/openai_functions/openapi.js +3 -1
- package/dist/load/import_constants.cjs +2 -39
- package/dist/load/import_constants.js +2 -39
- package/dist/load/import_map.cjs +2 -3
- package/dist/load/import_map.d.ts +0 -1
- package/dist/load/import_map.js +0 -1
- package/dist/smith/config.d.ts +1 -5
- package/package.json +31 -854
- package/dist/document_loaders/fs/chatgpt.cjs +0 -90
- package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
- package/dist/document_loaders/fs/chatgpt.js +0 -86
- package/dist/document_loaders/fs/csv.cjs +0 -73
- package/dist/document_loaders/fs/csv.d.ts +0 -65
- package/dist/document_loaders/fs/csv.js +0 -69
- package/dist/document_loaders/fs/docx.cjs +0 -58
- package/dist/document_loaders/fs/docx.d.ts +0 -25
- package/dist/document_loaders/fs/docx.js +0 -54
- package/dist/document_loaders/fs/epub.cjs +0 -103
- package/dist/document_loaders/fs/epub.d.ts +0 -33
- package/dist/document_loaders/fs/epub.js +0 -99
- package/dist/document_loaders/fs/notion.cjs +0 -26
- package/dist/document_loaders/fs/notion.d.ts +0 -12
- package/dist/document_loaders/fs/notion.js +0 -22
- package/dist/document_loaders/fs/obsidian.cjs +0 -247
- package/dist/document_loaders/fs/obsidian.d.ts +0 -28
- package/dist/document_loaders/fs/obsidian.js +0 -240
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
- package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
- package/dist/document_loaders/fs/pdf.cjs +0 -148
- package/dist/document_loaders/fs/pdf.d.ts +0 -49
- package/dist/document_loaders/fs/pdf.js +0 -144
- package/dist/document_loaders/fs/pptx.cjs +0 -46
- package/dist/document_loaders/fs/pptx.d.ts +0 -25
- package/dist/document_loaders/fs/pptx.js +0 -42
- package/dist/document_loaders/fs/srt.cjs +0 -57
- package/dist/document_loaders/fs/srt.d.ts +0 -32
- package/dist/document_loaders/fs/srt.js +0 -50
- package/dist/document_loaders/fs/unstructured.cjs +0 -338
- package/dist/document_loaders/fs/unstructured.d.ts +0 -125
- package/dist/document_loaders/fs/unstructured.js +0 -333
- package/dist/document_loaders/web/apify_dataset.cjs +0 -130
- package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
- package/dist/document_loaders/web/apify_dataset.js +0 -126
- package/dist/document_loaders/web/assemblyai.cjs +0 -200
- package/dist/document_loaders/web/assemblyai.d.ts +0 -95
- package/dist/document_loaders/web/assemblyai.js +0 -193
- package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
- package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
- package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
- package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
- package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
- package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
- package/dist/document_loaders/web/browserbase.cjs +0 -93
- package/dist/document_loaders/web/browserbase.d.ts +0 -48
- package/dist/document_loaders/web/browserbase.js +0 -86
- package/dist/document_loaders/web/cheerio.cjs +0 -118
- package/dist/document_loaders/web/cheerio.d.ts +0 -77
- package/dist/document_loaders/web/cheerio.js +0 -114
- package/dist/document_loaders/web/college_confidential.cjs +0 -41
- package/dist/document_loaders/web/college_confidential.d.ts +0 -25
- package/dist/document_loaders/web/college_confidential.js +0 -37
- package/dist/document_loaders/web/confluence.cjs +0 -190
- package/dist/document_loaders/web/confluence.d.ts +0 -114
- package/dist/document_loaders/web/confluence.js +0 -186
- package/dist/document_loaders/web/couchbase.cjs +0 -95
- package/dist/document_loaders/web/couchbase.d.ts +0 -32
- package/dist/document_loaders/web/couchbase.js +0 -91
- package/dist/document_loaders/web/figma.cjs +0 -102
- package/dist/document_loaders/web/figma.d.ts +0 -82
- package/dist/document_loaders/web/figma.js +0 -98
- package/dist/document_loaders/web/firecrawl.cjs +0 -95
- package/dist/document_loaders/web/firecrawl.d.ts +0 -50
- package/dist/document_loaders/web/firecrawl.js +0 -88
- package/dist/document_loaders/web/gitbook.cjs +0 -110
- package/dist/document_loaders/web/gitbook.d.ts +0 -55
- package/dist/document_loaders/web/gitbook.js +0 -106
- package/dist/document_loaders/web/github.cjs +0 -615
- package/dist/document_loaders/web/github.d.ts +0 -203
- package/dist/document_loaders/web/github.js +0 -608
- package/dist/document_loaders/web/hn.cjs +0 -90
- package/dist/document_loaders/web/hn.d.ts +0 -42
- package/dist/document_loaders/web/hn.js +0 -86
- package/dist/document_loaders/web/imsdb.cjs +0 -44
- package/dist/document_loaders/web/imsdb.d.ts +0 -23
- package/dist/document_loaders/web/imsdb.js +0 -40
- package/dist/document_loaders/web/notionapi.cjs +0 -404
- package/dist/document_loaders/web/notionapi.d.ts +0 -133
- package/dist/document_loaders/web/notionapi.js +0 -392
- package/dist/document_loaders/web/notiondb.cjs +0 -199
- package/dist/document_loaders/web/notiondb.d.ts +0 -56
- package/dist/document_loaders/web/notiondb.js +0 -195
- package/dist/document_loaders/web/pdf.cjs +0 -140
- package/dist/document_loaders/web/pdf.d.ts +0 -35
- package/dist/document_loaders/web/pdf.js +0 -136
- package/dist/document_loaders/web/playwright.cjs +0 -89
- package/dist/document_loaders/web/playwright.d.ts +0 -58
- package/dist/document_loaders/web/playwright.js +0 -85
- package/dist/document_loaders/web/puppeteer.cjs +0 -139
- package/dist/document_loaders/web/puppeteer.d.ts +0 -82
- package/dist/document_loaders/web/puppeteer.js +0 -135
- package/dist/document_loaders/web/recursive_url.cjs +0 -198
- package/dist/document_loaders/web/recursive_url.d.ts +0 -33
- package/dist/document_loaders/web/recursive_url.js +0 -194
- package/dist/document_loaders/web/s3.cjs +0 -164
- package/dist/document_loaders/web/s3.d.ts +0 -78
- package/dist/document_loaders/web/s3.js +0 -137
- package/dist/document_loaders/web/searchapi.cjs +0 -150
- package/dist/document_loaders/web/searchapi.d.ts +0 -76
- package/dist/document_loaders/web/searchapi.js +0 -146
- package/dist/document_loaders/web/serpapi.cjs +0 -127
- package/dist/document_loaders/web/serpapi.d.ts +0 -62
- package/dist/document_loaders/web/serpapi.js +0 -123
- package/dist/document_loaders/web/sitemap.cjs +0 -118
- package/dist/document_loaders/web/sitemap.d.ts +0 -41
- package/dist/document_loaders/web/sitemap.js +0 -114
- package/dist/document_loaders/web/sonix_audio.cjs +0 -68
- package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
- package/dist/document_loaders/web/sonix_audio.js +0 -64
- package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
- package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
- package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
- package/dist/document_loaders/web/youtube.cjs +0 -116
- package/dist/document_loaders/web/youtube.d.ts +0 -55
- package/dist/document_loaders/web/youtube.js +0 -112
- package/dist/experimental/tools/pyinterpreter.cjs +0 -248
- package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
- package/dist/experimental/tools/pyinterpreter.js +0 -244
- package/dist/retrievers/self_query/chroma.cjs +0 -48
- package/dist/retrievers/self_query/chroma.d.ts +0 -26
- package/dist/retrievers/self_query/chroma.js +0 -44
- package/dist/retrievers/self_query/pinecone.cjs +0 -47
- package/dist/retrievers/self_query/pinecone.d.ts +0 -26
- package/dist/retrievers/self_query/pinecone.js +0 -43
- package/dist/retrievers/self_query/supabase.cjs +0 -278
- package/dist/retrievers/self_query/supabase.d.ts +0 -109
- package/dist/retrievers/self_query/supabase.js +0 -274
- package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
- package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
- package/dist/retrievers/self_query/supabase_utils.js +0 -259
- package/dist/retrievers/self_query/vectara.cjs +0 -143
- package/dist/retrievers/self_query/vectara.d.ts +0 -42
- package/dist/retrievers/self_query/vectara.js +0 -139
- package/dist/retrievers/self_query/weaviate.cjs +0 -201
- package/dist/retrievers/self_query/weaviate.d.ts +0 -99
- package/dist/retrievers/self_query/weaviate.js +0 -197
- package/dist/types/assemblyai-types.cjs +0 -2
- package/dist/types/assemblyai-types.d.ts +0 -4
- package/dist/types/assemblyai-types.js +0 -1
- package/document_loaders/fs/chatgpt.cjs +0 -1
- package/document_loaders/fs/chatgpt.d.cts +0 -1
- package/document_loaders/fs/chatgpt.d.ts +0 -1
- package/document_loaders/fs/chatgpt.js +0 -1
- package/document_loaders/fs/csv.cjs +0 -1
- package/document_loaders/fs/csv.d.cts +0 -1
- package/document_loaders/fs/csv.d.ts +0 -1
- package/document_loaders/fs/csv.js +0 -1
- package/document_loaders/fs/docx.cjs +0 -1
- package/document_loaders/fs/docx.d.cts +0 -1
- package/document_loaders/fs/docx.d.ts +0 -1
- package/document_loaders/fs/docx.js +0 -1
- package/document_loaders/fs/epub.cjs +0 -1
- package/document_loaders/fs/epub.d.cts +0 -1
- package/document_loaders/fs/epub.d.ts +0 -1
- package/document_loaders/fs/epub.js +0 -1
- package/document_loaders/fs/notion.cjs +0 -1
- package/document_loaders/fs/notion.d.cts +0 -1
- package/document_loaders/fs/notion.d.ts +0 -1
- package/document_loaders/fs/notion.js +0 -1
- package/document_loaders/fs/obsidian.cjs +0 -1
- package/document_loaders/fs/obsidian.d.cts +0 -1
- package/document_loaders/fs/obsidian.d.ts +0 -1
- package/document_loaders/fs/obsidian.js +0 -1
- package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
- package/document_loaders/fs/openai_whisper_audio.js +0 -1
- package/document_loaders/fs/pdf.cjs +0 -1
- package/document_loaders/fs/pdf.d.cts +0 -1
- package/document_loaders/fs/pdf.d.ts +0 -1
- package/document_loaders/fs/pdf.js +0 -1
- package/document_loaders/fs/pptx.cjs +0 -1
- package/document_loaders/fs/pptx.d.cts +0 -1
- package/document_loaders/fs/pptx.d.ts +0 -1
- package/document_loaders/fs/pptx.js +0 -1
- package/document_loaders/fs/srt.cjs +0 -1
- package/document_loaders/fs/srt.d.cts +0 -1
- package/document_loaders/fs/srt.d.ts +0 -1
- package/document_loaders/fs/srt.js +0 -1
- package/document_loaders/fs/unstructured.cjs +0 -1
- package/document_loaders/fs/unstructured.d.cts +0 -1
- package/document_loaders/fs/unstructured.d.ts +0 -1
- package/document_loaders/fs/unstructured.js +0 -1
- package/document_loaders/web/apify_dataset.cjs +0 -1
- package/document_loaders/web/apify_dataset.d.cts +0 -1
- package/document_loaders/web/apify_dataset.d.ts +0 -1
- package/document_loaders/web/apify_dataset.js +0 -1
- package/document_loaders/web/assemblyai.cjs +0 -1
- package/document_loaders/web/assemblyai.d.cts +0 -1
- package/document_loaders/web/assemblyai.d.ts +0 -1
- package/document_loaders/web/assemblyai.js +0 -1
- package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_container.js +0 -1
- package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_file.js +0 -1
- package/document_loaders/web/browserbase.cjs +0 -1
- package/document_loaders/web/browserbase.d.cts +0 -1
- package/document_loaders/web/browserbase.d.ts +0 -1
- package/document_loaders/web/browserbase.js +0 -1
- package/document_loaders/web/cheerio.cjs +0 -1
- package/document_loaders/web/cheerio.d.cts +0 -1
- package/document_loaders/web/cheerio.d.ts +0 -1
- package/document_loaders/web/cheerio.js +0 -1
- package/document_loaders/web/college_confidential.cjs +0 -1
- package/document_loaders/web/college_confidential.d.cts +0 -1
- package/document_loaders/web/college_confidential.d.ts +0 -1
- package/document_loaders/web/college_confidential.js +0 -1
- package/document_loaders/web/confluence.cjs +0 -1
- package/document_loaders/web/confluence.d.cts +0 -1
- package/document_loaders/web/confluence.d.ts +0 -1
- package/document_loaders/web/confluence.js +0 -1
- package/document_loaders/web/couchbase.cjs +0 -1
- package/document_loaders/web/couchbase.d.cts +0 -1
- package/document_loaders/web/couchbase.d.ts +0 -1
- package/document_loaders/web/couchbase.js +0 -1
- package/document_loaders/web/figma.cjs +0 -1
- package/document_loaders/web/figma.d.cts +0 -1
- package/document_loaders/web/figma.d.ts +0 -1
- package/document_loaders/web/figma.js +0 -1
- package/document_loaders/web/firecrawl.cjs +0 -1
- package/document_loaders/web/firecrawl.d.cts +0 -1
- package/document_loaders/web/firecrawl.d.ts +0 -1
- package/document_loaders/web/firecrawl.js +0 -1
- package/document_loaders/web/gitbook.cjs +0 -1
- package/document_loaders/web/gitbook.d.cts +0 -1
- package/document_loaders/web/gitbook.d.ts +0 -1
- package/document_loaders/web/gitbook.js +0 -1
- package/document_loaders/web/github.cjs +0 -1
- package/document_loaders/web/github.d.cts +0 -1
- package/document_loaders/web/github.d.ts +0 -1
- package/document_loaders/web/github.js +0 -1
- package/document_loaders/web/hn.cjs +0 -1
- package/document_loaders/web/hn.d.cts +0 -1
- package/document_loaders/web/hn.d.ts +0 -1
- package/document_loaders/web/hn.js +0 -1
- package/document_loaders/web/imsdb.cjs +0 -1
- package/document_loaders/web/imsdb.d.cts +0 -1
- package/document_loaders/web/imsdb.d.ts +0 -1
- package/document_loaders/web/imsdb.js +0 -1
- package/document_loaders/web/notionapi.cjs +0 -1
- package/document_loaders/web/notionapi.d.cts +0 -1
- package/document_loaders/web/notionapi.d.ts +0 -1
- package/document_loaders/web/notionapi.js +0 -1
- package/document_loaders/web/notiondb.cjs +0 -1
- package/document_loaders/web/notiondb.d.cts +0 -1
- package/document_loaders/web/notiondb.d.ts +0 -1
- package/document_loaders/web/notiondb.js +0 -1
- package/document_loaders/web/pdf.cjs +0 -1
- package/document_loaders/web/pdf.d.cts +0 -1
- package/document_loaders/web/pdf.d.ts +0 -1
- package/document_loaders/web/pdf.js +0 -1
- package/document_loaders/web/playwright.cjs +0 -1
- package/document_loaders/web/playwright.d.cts +0 -1
- package/document_loaders/web/playwright.d.ts +0 -1
- package/document_loaders/web/playwright.js +0 -1
- package/document_loaders/web/puppeteer.cjs +0 -1
- package/document_loaders/web/puppeteer.d.cts +0 -1
- package/document_loaders/web/puppeteer.d.ts +0 -1
- package/document_loaders/web/puppeteer.js +0 -1
- package/document_loaders/web/recursive_url.cjs +0 -1
- package/document_loaders/web/recursive_url.d.cts +0 -1
- package/document_loaders/web/recursive_url.d.ts +0 -1
- package/document_loaders/web/recursive_url.js +0 -1
- package/document_loaders/web/s3.cjs +0 -1
- package/document_loaders/web/s3.d.cts +0 -1
- package/document_loaders/web/s3.d.ts +0 -1
- package/document_loaders/web/s3.js +0 -1
- package/document_loaders/web/searchapi.cjs +0 -1
- package/document_loaders/web/searchapi.d.cts +0 -1
- package/document_loaders/web/searchapi.d.ts +0 -1
- package/document_loaders/web/searchapi.js +0 -1
- package/document_loaders/web/serpapi.cjs +0 -1
- package/document_loaders/web/serpapi.d.cts +0 -1
- package/document_loaders/web/serpapi.d.ts +0 -1
- package/document_loaders/web/serpapi.js +0 -1
- package/document_loaders/web/sitemap.cjs +0 -1
- package/document_loaders/web/sitemap.d.cts +0 -1
- package/document_loaders/web/sitemap.d.ts +0 -1
- package/document_loaders/web/sitemap.js +0 -1
- package/document_loaders/web/sonix_audio.cjs +0 -1
- package/document_loaders/web/sonix_audio.d.cts +0 -1
- package/document_loaders/web/sonix_audio.d.ts +0 -1
- package/document_loaders/web/sonix_audio.js +0 -1
- package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.js +0 -1
- package/document_loaders/web/youtube.cjs +0 -1
- package/document_loaders/web/youtube.d.cts +0 -1
- package/document_loaders/web/youtube.d.ts +0 -1
- package/document_loaders/web/youtube.js +0 -1
- package/experimental/tools/pyinterpreter.cjs +0 -1
- package/experimental/tools/pyinterpreter.d.cts +0 -1
- package/experimental/tools/pyinterpreter.d.ts +0 -1
- package/experimental/tools/pyinterpreter.js +0 -1
- package/memory/index.cjs +0 -1
- package/memory/index.d.cts +0 -1
- package/memory/index.d.ts +0 -1
- package/memory/index.js +0 -1
- package/retrievers/self_query/chroma.cjs +0 -1
- package/retrievers/self_query/chroma.d.cts +0 -1
- package/retrievers/self_query/chroma.d.ts +0 -1
- package/retrievers/self_query/chroma.js +0 -1
- package/retrievers/self_query/pinecone.cjs +0 -1
- package/retrievers/self_query/pinecone.d.cts +0 -1
- package/retrievers/self_query/pinecone.d.ts +0 -1
- package/retrievers/self_query/pinecone.js +0 -1
- package/retrievers/self_query/supabase.cjs +0 -1
- package/retrievers/self_query/supabase.d.cts +0 -1
- package/retrievers/self_query/supabase.d.ts +0 -1
- package/retrievers/self_query/supabase.js +0 -1
- package/retrievers/self_query/vectara.cjs +0 -1
- package/retrievers/self_query/vectara.d.cts +0 -1
- package/retrievers/self_query/vectara.d.ts +0 -1
- package/retrievers/self_query/vectara.js +0 -1
- package/retrievers/self_query/weaviate.cjs +0 -1
- package/retrievers/self_query/weaviate.d.cts +0 -1
- package/retrievers/self_query/weaviate.d.ts +0 -1
- package/retrievers/self_query/weaviate.js +0 -1
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
3
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
4
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
5
|
-
oldEntrypointName: "document_loaders/fs/epub",
|
|
6
|
-
newPackageName: "@langchain/community",
|
|
7
|
-
});
|
|
8
|
-
/**
|
|
9
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/epub" instead. This entrypoint will be removed in 0.3.0.
|
|
10
|
-
*
|
|
11
|
-
* A class that extends the `BaseDocumentLoader` class. It represents a
|
|
12
|
-
* document loader that loads documents from EPUB files.
|
|
13
|
-
*/
|
|
14
|
-
export class EPubLoader extends BaseDocumentLoader {
|
|
15
|
-
constructor(filePath, { splitChapters = true } = {}) {
|
|
16
|
-
super();
|
|
17
|
-
Object.defineProperty(this, "filePath", {
|
|
18
|
-
enumerable: true,
|
|
19
|
-
configurable: true,
|
|
20
|
-
writable: true,
|
|
21
|
-
value: filePath
|
|
22
|
-
});
|
|
23
|
-
Object.defineProperty(this, "splitChapters", {
|
|
24
|
-
enumerable: true,
|
|
25
|
-
configurable: true,
|
|
26
|
-
writable: true,
|
|
27
|
-
value: void 0
|
|
28
|
-
});
|
|
29
|
-
this.splitChapters = splitChapters;
|
|
30
|
-
}
|
|
31
|
-
/**
|
|
32
|
-
* A protected method that takes an EPUB object as a parameter and returns
|
|
33
|
-
* a promise that resolves to an array of objects representing the content
|
|
34
|
-
* and metadata of each chapter.
|
|
35
|
-
* @param epub The EPUB object to parse.
|
|
36
|
-
* @returns A promise that resolves to an array of objects representing the content and metadata of each chapter.
|
|
37
|
-
*/
|
|
38
|
-
async parse(epub) {
|
|
39
|
-
const { htmlToText } = await HtmlToTextImport();
|
|
40
|
-
const chapters = await Promise.all(epub.flow.map(async (chapter) => {
|
|
41
|
-
if (!chapter.id)
|
|
42
|
-
return null;
|
|
43
|
-
const html = await epub.getChapterRawAsync(chapter.id);
|
|
44
|
-
if (!html)
|
|
45
|
-
return null;
|
|
46
|
-
return {
|
|
47
|
-
html,
|
|
48
|
-
title: chapter.title,
|
|
49
|
-
};
|
|
50
|
-
}));
|
|
51
|
-
return chapters.filter(Boolean).map((chapter) => ({
|
|
52
|
-
pageContent: htmlToText(chapter.html),
|
|
53
|
-
metadata: {
|
|
54
|
-
...(chapter.title && { chapter: chapter.title }),
|
|
55
|
-
},
|
|
56
|
-
}));
|
|
57
|
-
}
|
|
58
|
-
/**
|
|
59
|
-
* A method that loads the EPUB file and returns a promise that resolves
|
|
60
|
-
* to an array of `Document` instances.
|
|
61
|
-
* @returns A promise that resolves to an array of `Document` instances.
|
|
62
|
-
*/
|
|
63
|
-
async load() {
|
|
64
|
-
const { EPub } = await EpubImport();
|
|
65
|
-
const epub = await EPub.createAsync(this.filePath);
|
|
66
|
-
const parsed = await this.parse(epub);
|
|
67
|
-
const metadata = { source: this.filePath };
|
|
68
|
-
if (parsed.length === 0)
|
|
69
|
-
return [];
|
|
70
|
-
return this.splitChapters
|
|
71
|
-
? parsed.map((chapter) => new Document({
|
|
72
|
-
pageContent: chapter.pageContent,
|
|
73
|
-
metadata: {
|
|
74
|
-
...metadata,
|
|
75
|
-
...chapter.metadata,
|
|
76
|
-
},
|
|
77
|
-
}))
|
|
78
|
-
: [
|
|
79
|
-
new Document({
|
|
80
|
-
pageContent: parsed
|
|
81
|
-
.map((chapter) => chapter.pageContent)
|
|
82
|
-
.join("\n\n"),
|
|
83
|
-
metadata,
|
|
84
|
-
}),
|
|
85
|
-
];
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
async function EpubImport() {
|
|
89
|
-
const { EPub } = await import("epub2").catch(() => {
|
|
90
|
-
throw new Error("Failed to load epub2. Please install it with eg. `npm install epub2`.");
|
|
91
|
-
});
|
|
92
|
-
return { EPub };
|
|
93
|
-
}
|
|
94
|
-
async function HtmlToTextImport() {
|
|
95
|
-
const { htmlToText } = await import("html-to-text").catch(() => {
|
|
96
|
-
throw new Error("Failed to load html-to-text. Please install it with eg. `npm install html-to-text`.");
|
|
97
|
-
});
|
|
98
|
-
return { htmlToText };
|
|
99
|
-
}
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.NotionLoader = void 0;
|
|
4
|
-
const directory_js_1 = require("./directory.cjs");
|
|
5
|
-
const text_js_1 = require("./text.cjs");
|
|
6
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
7
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
8
|
-
oldEntrypointName: "document_loaders/fs/notion",
|
|
9
|
-
newPackageName: "@langchain/community",
|
|
10
|
-
});
|
|
11
|
-
/**
|
|
12
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/notion" instead. This entrypoint will be removed in 0.3.0.
|
|
13
|
-
*
|
|
14
|
-
* A class that extends the DirectoryLoader class. It represents a
|
|
15
|
-
* document loader that loads documents from a directory in the Notion
|
|
16
|
-
* format. It uses the TextLoader for loading '.md' files and ignores
|
|
17
|
-
* unknown file types.
|
|
18
|
-
*/
|
|
19
|
-
class NotionLoader extends directory_js_1.DirectoryLoader {
|
|
20
|
-
constructor(directoryPath) {
|
|
21
|
-
super(directoryPath, {
|
|
22
|
-
".md": (filePath) => new text_js_1.TextLoader(filePath),
|
|
23
|
-
}, true, directory_js_1.UnknownHandling.Ignore);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
exports.NotionLoader = NotionLoader;
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import { DirectoryLoader } from "./directory.js";
|
|
2
|
-
/**
|
|
3
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/notion" instead. This entrypoint will be removed in 0.3.0.
|
|
4
|
-
*
|
|
5
|
-
* A class that extends the DirectoryLoader class. It represents a
|
|
6
|
-
* document loader that loads documents from a directory in the Notion
|
|
7
|
-
* format. It uses the TextLoader for loading '.md' files and ignores
|
|
8
|
-
* unknown file types.
|
|
9
|
-
*/
|
|
10
|
-
export declare class NotionLoader extends DirectoryLoader {
|
|
11
|
-
constructor(directoryPath: string);
|
|
12
|
-
}
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { DirectoryLoader, UnknownHandling } from "./directory.js";
|
|
2
|
-
import { TextLoader } from "./text.js";
|
|
3
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
4
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
5
|
-
oldEntrypointName: "document_loaders/fs/notion",
|
|
6
|
-
newPackageName: "@langchain/community",
|
|
7
|
-
});
|
|
8
|
-
/**
|
|
9
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/notion" instead. This entrypoint will be removed in 0.3.0.
|
|
10
|
-
*
|
|
11
|
-
* A class that extends the DirectoryLoader class. It represents a
|
|
12
|
-
* document loader that loads documents from a directory in the Notion
|
|
13
|
-
* format. It uses the TextLoader for loading '.md' files and ignores
|
|
14
|
-
* unknown file types.
|
|
15
|
-
*/
|
|
16
|
-
export class NotionLoader extends DirectoryLoader {
|
|
17
|
-
constructor(directoryPath) {
|
|
18
|
-
super(directoryPath, {
|
|
19
|
-
".md": (filePath) => new TextLoader(filePath),
|
|
20
|
-
}, true, UnknownHandling.Ignore);
|
|
21
|
-
}
|
|
22
|
-
}
|
|
@@ -1,247 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.ObsidianLoader = void 0;
|
|
7
|
-
const js_yaml_1 = __importDefault(require("js-yaml"));
|
|
8
|
-
const documents_1 = require("@langchain/core/documents");
|
|
9
|
-
const env_1 = require("@langchain/core/utils/env");
|
|
10
|
-
const directory_js_1 = require("./directory.cjs");
|
|
11
|
-
const base_js_1 = require("../base.cjs");
|
|
12
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
13
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
14
|
-
oldEntrypointName: "document_loaders/fs/obsidian",
|
|
15
|
-
newPackageName: "@langchain/community",
|
|
16
|
-
});
|
|
17
|
-
/**
|
|
18
|
-
* Represents a loader for Obsidian markdown files. This loader extends the BaseDocumentLoader
|
|
19
|
-
* and provides functionality to parse and extract metadata, tags, and dataview fields from
|
|
20
|
-
* Obsidian markdown files.
|
|
21
|
-
*/
|
|
22
|
-
class ObsidianFileLoader extends base_js_1.BaseDocumentLoader {
|
|
23
|
-
/**
|
|
24
|
-
* Initializes a new instance of the ObsidianFileLoader class.
|
|
25
|
-
* @param filePath The path to the Obsidian markdown file.
|
|
26
|
-
* @param encoding The character encoding to use when reading the file. Defaults to 'utf-8'.
|
|
27
|
-
* @param collectMetadata Determines whether metadata should be collected from the file. Defaults to true.
|
|
28
|
-
*/
|
|
29
|
-
constructor(filePath, { encoding = "utf-8", collectMetadata = true, } = {}) {
|
|
30
|
-
super();
|
|
31
|
-
Object.defineProperty(this, "filePath", {
|
|
32
|
-
enumerable: true,
|
|
33
|
-
configurable: true,
|
|
34
|
-
writable: true,
|
|
35
|
-
value: void 0
|
|
36
|
-
});
|
|
37
|
-
Object.defineProperty(this, "encoding", {
|
|
38
|
-
enumerable: true,
|
|
39
|
-
configurable: true,
|
|
40
|
-
writable: true,
|
|
41
|
-
value: void 0
|
|
42
|
-
});
|
|
43
|
-
Object.defineProperty(this, "collectMetadata", {
|
|
44
|
-
enumerable: true,
|
|
45
|
-
configurable: true,
|
|
46
|
-
writable: true,
|
|
47
|
-
value: void 0
|
|
48
|
-
});
|
|
49
|
-
this.filePath = filePath;
|
|
50
|
-
this.encoding = encoding;
|
|
51
|
-
this.collectMetadata = collectMetadata;
|
|
52
|
-
}
|
|
53
|
-
/**
|
|
54
|
-
* Parses the YAML front matter from the given content string.
|
|
55
|
-
* @param content The string content of the markdown file.
|
|
56
|
-
* @returns An object representing the parsed front matter.
|
|
57
|
-
*/
|
|
58
|
-
parseFrontMatter(content) {
|
|
59
|
-
if (!this.collectMetadata) {
|
|
60
|
-
return {};
|
|
61
|
-
}
|
|
62
|
-
const match = content.match(ObsidianFileLoader.FRONT_MATTER_REGEX);
|
|
63
|
-
if (!match) {
|
|
64
|
-
return {};
|
|
65
|
-
}
|
|
66
|
-
try {
|
|
67
|
-
const frontMatter = js_yaml_1.default.load(match[1]);
|
|
68
|
-
if (frontMatter.tags && typeof frontMatter.tags === "string") {
|
|
69
|
-
frontMatter.tags = frontMatter.tags.split(", ");
|
|
70
|
-
}
|
|
71
|
-
return frontMatter;
|
|
72
|
-
}
|
|
73
|
-
catch (e) {
|
|
74
|
-
console.warn("Encountered non-yaml frontmatter");
|
|
75
|
-
return {};
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
/**
|
|
79
|
-
* Removes YAML front matter from the given content string.
|
|
80
|
-
* @param content The string content of the markdown file.
|
|
81
|
-
* @returns The content string with the front matter removed.
|
|
82
|
-
*/
|
|
83
|
-
removeFrontMatter(content) {
|
|
84
|
-
if (!this.collectMetadata) {
|
|
85
|
-
return content;
|
|
86
|
-
}
|
|
87
|
-
return content.replace(ObsidianFileLoader.FRONT_MATTER_REGEX, "");
|
|
88
|
-
}
|
|
89
|
-
/**
|
|
90
|
-
* Parses Obsidian-style tags from the given content string.
|
|
91
|
-
* @param content The string content of the markdown file.
|
|
92
|
-
* @returns A set of parsed tags.
|
|
93
|
-
*/
|
|
94
|
-
parseObsidianTags(content) {
|
|
95
|
-
if (!this.collectMetadata) {
|
|
96
|
-
return new Set();
|
|
97
|
-
}
|
|
98
|
-
const matches = content.matchAll(ObsidianFileLoader.TAG_REGEX);
|
|
99
|
-
const tags = new Set();
|
|
100
|
-
for (const match of matches) {
|
|
101
|
-
tags.add(match[1]);
|
|
102
|
-
}
|
|
103
|
-
return tags;
|
|
104
|
-
}
|
|
105
|
-
/**
|
|
106
|
-
* Parses dataview fields from the given content string.
|
|
107
|
-
* @param content The string content of the markdown file.
|
|
108
|
-
* @returns A record object containing key-value pairs of dataview fields.
|
|
109
|
-
*/
|
|
110
|
-
parseObsidianDataviewFields(content) {
|
|
111
|
-
if (!this.collectMetadata) {
|
|
112
|
-
return {};
|
|
113
|
-
}
|
|
114
|
-
const fields = {};
|
|
115
|
-
const lineMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_LINE_REGEX);
|
|
116
|
-
for (const [, key, value] of lineMatches) {
|
|
117
|
-
fields[key] = value;
|
|
118
|
-
}
|
|
119
|
-
const bracketMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_INLINE_BRACKET_REGEX);
|
|
120
|
-
for (const [, key, value] of bracketMatches) {
|
|
121
|
-
fields[key] = value;
|
|
122
|
-
}
|
|
123
|
-
const parenMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_INLINE_PAREN_REGEX);
|
|
124
|
-
for (const [, key, value] of parenMatches) {
|
|
125
|
-
fields[key] = value;
|
|
126
|
-
}
|
|
127
|
-
return fields;
|
|
128
|
-
}
|
|
129
|
-
/**
|
|
130
|
-
* Converts metadata to a format compatible with Langchain.
|
|
131
|
-
* @param metadata The metadata object to convert.
|
|
132
|
-
* @returns A record object containing key-value pairs of Langchain-compatible metadata.
|
|
133
|
-
*/
|
|
134
|
-
toLangchainCompatibleMetadata(metadata) {
|
|
135
|
-
const result = {};
|
|
136
|
-
for (const [key, value] of Object.entries(metadata)) {
|
|
137
|
-
if (typeof value === "string" || typeof value === "number") {
|
|
138
|
-
result[key] = value;
|
|
139
|
-
}
|
|
140
|
-
else {
|
|
141
|
-
result[key] = JSON.stringify(value);
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
return result;
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* It loads the Obsidian file, parses it, and returns a `Document` instance.
|
|
148
|
-
* @returns An array of `Document` instances to comply with the BaseDocumentLoader interface.
|
|
149
|
-
*/
|
|
150
|
-
async load() {
|
|
151
|
-
const documents = [];
|
|
152
|
-
const { basename, readFile, stat } = await ObsidianFileLoader.imports();
|
|
153
|
-
const fileName = basename(this.filePath);
|
|
154
|
-
const stats = await stat(this.filePath);
|
|
155
|
-
let content = await readFile(this.filePath, this.encoding);
|
|
156
|
-
const frontMatter = this.parseFrontMatter(content);
|
|
157
|
-
const tags = this.parseObsidianTags(content);
|
|
158
|
-
const dataviewFields = this.parseObsidianDataviewFields(content);
|
|
159
|
-
content = this.removeFrontMatter(content);
|
|
160
|
-
const metadata = {
|
|
161
|
-
source: fileName,
|
|
162
|
-
path: this.filePath,
|
|
163
|
-
created: stats.birthtimeMs,
|
|
164
|
-
lastModified: stats.mtimeMs,
|
|
165
|
-
lastAccessed: stats.atimeMs,
|
|
166
|
-
...this.toLangchainCompatibleMetadata(frontMatter),
|
|
167
|
-
...dataviewFields,
|
|
168
|
-
};
|
|
169
|
-
if (tags.size || frontMatter.tags) {
|
|
170
|
-
metadata.tags = Array.from(new Set([...tags, ...(frontMatter.tags ?? [])])).join(",");
|
|
171
|
-
}
|
|
172
|
-
documents.push(new documents_1.Document({
|
|
173
|
-
pageContent: content,
|
|
174
|
-
metadata,
|
|
175
|
-
}));
|
|
176
|
-
return documents;
|
|
177
|
-
}
|
|
178
|
-
/**
|
|
179
|
-
* Imports the necessary functions from the `node:path` and
|
|
180
|
-
* `node:fs/promises` modules. It is used to dynamically import the
|
|
181
|
-
* functions when needed. If the import fails, it throws an error
|
|
182
|
-
* indicating that the modules failed to load.
|
|
183
|
-
* @returns A promise that resolves to an object containing the imported functions.
|
|
184
|
-
*/
|
|
185
|
-
static async imports() {
|
|
186
|
-
try {
|
|
187
|
-
const { basename } = await import("node:path");
|
|
188
|
-
const { readFile, stat } = await import("node:fs/promises");
|
|
189
|
-
return { basename, readFile, stat };
|
|
190
|
-
}
|
|
191
|
-
catch (e) {
|
|
192
|
-
console.error(e);
|
|
193
|
-
throw new Error(`Failed to load fs/promises. ObsidianFileLoader available only on environment 'node'. It appears you are running environment '${(0, env_1.getEnv)()}'. See https://<link to docs> for alternatives.`);
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
Object.defineProperty(ObsidianFileLoader, "FRONT_MATTER_REGEX", {
|
|
198
|
-
enumerable: true,
|
|
199
|
-
configurable: true,
|
|
200
|
-
writable: true,
|
|
201
|
-
value: /^---\n(.*?)\n---\n/s
|
|
202
|
-
});
|
|
203
|
-
Object.defineProperty(ObsidianFileLoader, "TAG_REGEX", {
|
|
204
|
-
enumerable: true,
|
|
205
|
-
configurable: true,
|
|
206
|
-
writable: true,
|
|
207
|
-
value: /(?:\s|^)#([a-zA-Z_][\w/-]*)/g
|
|
208
|
-
});
|
|
209
|
-
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_LINE_REGEX", {
|
|
210
|
-
enumerable: true,
|
|
211
|
-
configurable: true,
|
|
212
|
-
writable: true,
|
|
213
|
-
value: /^\s*(\w+)::\s*(.*)$/gm
|
|
214
|
-
});
|
|
215
|
-
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_INLINE_BRACKET_REGEX", {
|
|
216
|
-
enumerable: true,
|
|
217
|
-
configurable: true,
|
|
218
|
-
writable: true,
|
|
219
|
-
value: /\[(\w+)::\s*(.*)\]/gm
|
|
220
|
-
});
|
|
221
|
-
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_INLINE_PAREN_REGEX", {
|
|
222
|
-
enumerable: true,
|
|
223
|
-
configurable: true,
|
|
224
|
-
writable: true,
|
|
225
|
-
value: /\((\w+)::\s*(.*)\)/gm
|
|
226
|
-
});
|
|
227
|
-
/**
|
|
228
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/obsidian" instead. This entrypoint will be removed in 0.3.0.
|
|
229
|
-
*
|
|
230
|
-
* Represents a loader for directories containing Obsidian markdown files. This loader extends
|
|
231
|
-
* the DirectoryLoader and provides functionality to load and parse '.md' files with YAML frontmatter,
|
|
232
|
-
* Obsidian tags, and Dataview fields.
|
|
233
|
-
*/
|
|
234
|
-
class ObsidianLoader extends directory_js_1.DirectoryLoader {
|
|
235
|
-
/**
|
|
236
|
-
* Initializes a new instance of the ObsidianLoader class.
|
|
237
|
-
* @param directoryPath The path to the directory containing Obsidian markdown files.
|
|
238
|
-
* @param encoding The character encoding to use when reading files. Defaults to 'utf-8'.
|
|
239
|
-
* @param collectMetadata Determines whether metadata should be collected from the files. Defaults to true.
|
|
240
|
-
*/
|
|
241
|
-
constructor(directoryPath, options) {
|
|
242
|
-
super(directoryPath, {
|
|
243
|
-
".md": (filePath) => new ObsidianFileLoader(filePath, options),
|
|
244
|
-
}, true, directory_js_1.UnknownHandling.Ignore);
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
exports.ObsidianLoader = ObsidianLoader;
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
-
import { DirectoryLoader } from "./directory.js";
|
|
3
|
-
export type FrontMatter = {
|
|
4
|
-
title?: string;
|
|
5
|
-
description?: string;
|
|
6
|
-
tags?: string[] | string;
|
|
7
|
-
[key: string]: unknown;
|
|
8
|
-
};
|
|
9
|
-
export interface ObsidianFileLoaderOptions {
|
|
10
|
-
encoding?: BufferEncoding;
|
|
11
|
-
collectMetadata?: boolean;
|
|
12
|
-
}
|
|
13
|
-
/**
|
|
14
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/obsidian" instead. This entrypoint will be removed in 0.3.0.
|
|
15
|
-
*
|
|
16
|
-
* Represents a loader for directories containing Obsidian markdown files. This loader extends
|
|
17
|
-
* the DirectoryLoader and provides functionality to load and parse '.md' files with YAML frontmatter,
|
|
18
|
-
* Obsidian tags, and Dataview fields.
|
|
19
|
-
*/
|
|
20
|
-
export declare class ObsidianLoader extends DirectoryLoader {
|
|
21
|
-
/**
|
|
22
|
-
* Initializes a new instance of the ObsidianLoader class.
|
|
23
|
-
* @param directoryPath The path to the directory containing Obsidian markdown files.
|
|
24
|
-
* @param encoding The character encoding to use when reading files. Defaults to 'utf-8'.
|
|
25
|
-
* @param collectMetadata Determines whether metadata should be collected from the files. Defaults to true.
|
|
26
|
-
*/
|
|
27
|
-
constructor(directoryPath: string, options?: ObsidianFileLoaderOptions);
|
|
28
|
-
}
|
|
@@ -1,240 +0,0 @@
|
|
|
1
|
-
import yaml from "js-yaml";
|
|
2
|
-
import { Document } from "@langchain/core/documents";
|
|
3
|
-
import { getEnv } from "@langchain/core/utils/env";
|
|
4
|
-
import { DirectoryLoader, UnknownHandling } from "./directory.js";
|
|
5
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
6
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
7
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
8
|
-
oldEntrypointName: "document_loaders/fs/obsidian",
|
|
9
|
-
newPackageName: "@langchain/community",
|
|
10
|
-
});
|
|
11
|
-
/**
|
|
12
|
-
* Represents a loader for Obsidian markdown files. This loader extends the BaseDocumentLoader
|
|
13
|
-
* and provides functionality to parse and extract metadata, tags, and dataview fields from
|
|
14
|
-
* Obsidian markdown files.
|
|
15
|
-
*/
|
|
16
|
-
class ObsidianFileLoader extends BaseDocumentLoader {
|
|
17
|
-
/**
|
|
18
|
-
* Initializes a new instance of the ObsidianFileLoader class.
|
|
19
|
-
* @param filePath The path to the Obsidian markdown file.
|
|
20
|
-
* @param encoding The character encoding to use when reading the file. Defaults to 'utf-8'.
|
|
21
|
-
* @param collectMetadata Determines whether metadata should be collected from the file. Defaults to true.
|
|
22
|
-
*/
|
|
23
|
-
constructor(filePath, { encoding = "utf-8", collectMetadata = true, } = {}) {
|
|
24
|
-
super();
|
|
25
|
-
Object.defineProperty(this, "filePath", {
|
|
26
|
-
enumerable: true,
|
|
27
|
-
configurable: true,
|
|
28
|
-
writable: true,
|
|
29
|
-
value: void 0
|
|
30
|
-
});
|
|
31
|
-
Object.defineProperty(this, "encoding", {
|
|
32
|
-
enumerable: true,
|
|
33
|
-
configurable: true,
|
|
34
|
-
writable: true,
|
|
35
|
-
value: void 0
|
|
36
|
-
});
|
|
37
|
-
Object.defineProperty(this, "collectMetadata", {
|
|
38
|
-
enumerable: true,
|
|
39
|
-
configurable: true,
|
|
40
|
-
writable: true,
|
|
41
|
-
value: void 0
|
|
42
|
-
});
|
|
43
|
-
this.filePath = filePath;
|
|
44
|
-
this.encoding = encoding;
|
|
45
|
-
this.collectMetadata = collectMetadata;
|
|
46
|
-
}
|
|
47
|
-
/**
|
|
48
|
-
* Parses the YAML front matter from the given content string.
|
|
49
|
-
* @param content The string content of the markdown file.
|
|
50
|
-
* @returns An object representing the parsed front matter.
|
|
51
|
-
*/
|
|
52
|
-
parseFrontMatter(content) {
|
|
53
|
-
if (!this.collectMetadata) {
|
|
54
|
-
return {};
|
|
55
|
-
}
|
|
56
|
-
const match = content.match(ObsidianFileLoader.FRONT_MATTER_REGEX);
|
|
57
|
-
if (!match) {
|
|
58
|
-
return {};
|
|
59
|
-
}
|
|
60
|
-
try {
|
|
61
|
-
const frontMatter = yaml.load(match[1]);
|
|
62
|
-
if (frontMatter.tags && typeof frontMatter.tags === "string") {
|
|
63
|
-
frontMatter.tags = frontMatter.tags.split(", ");
|
|
64
|
-
}
|
|
65
|
-
return frontMatter;
|
|
66
|
-
}
|
|
67
|
-
catch (e) {
|
|
68
|
-
console.warn("Encountered non-yaml frontmatter");
|
|
69
|
-
return {};
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
/**
|
|
73
|
-
* Removes YAML front matter from the given content string.
|
|
74
|
-
* @param content The string content of the markdown file.
|
|
75
|
-
* @returns The content string with the front matter removed.
|
|
76
|
-
*/
|
|
77
|
-
removeFrontMatter(content) {
|
|
78
|
-
if (!this.collectMetadata) {
|
|
79
|
-
return content;
|
|
80
|
-
}
|
|
81
|
-
return content.replace(ObsidianFileLoader.FRONT_MATTER_REGEX, "");
|
|
82
|
-
}
|
|
83
|
-
/**
|
|
84
|
-
* Parses Obsidian-style tags from the given content string.
|
|
85
|
-
* @param content The string content of the markdown file.
|
|
86
|
-
* @returns A set of parsed tags.
|
|
87
|
-
*/
|
|
88
|
-
parseObsidianTags(content) {
|
|
89
|
-
if (!this.collectMetadata) {
|
|
90
|
-
return new Set();
|
|
91
|
-
}
|
|
92
|
-
const matches = content.matchAll(ObsidianFileLoader.TAG_REGEX);
|
|
93
|
-
const tags = new Set();
|
|
94
|
-
for (const match of matches) {
|
|
95
|
-
tags.add(match[1]);
|
|
96
|
-
}
|
|
97
|
-
return tags;
|
|
98
|
-
}
|
|
99
|
-
/**
|
|
100
|
-
* Parses dataview fields from the given content string.
|
|
101
|
-
* @param content The string content of the markdown file.
|
|
102
|
-
* @returns A record object containing key-value pairs of dataview fields.
|
|
103
|
-
*/
|
|
104
|
-
parseObsidianDataviewFields(content) {
|
|
105
|
-
if (!this.collectMetadata) {
|
|
106
|
-
return {};
|
|
107
|
-
}
|
|
108
|
-
const fields = {};
|
|
109
|
-
const lineMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_LINE_REGEX);
|
|
110
|
-
for (const [, key, value] of lineMatches) {
|
|
111
|
-
fields[key] = value;
|
|
112
|
-
}
|
|
113
|
-
const bracketMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_INLINE_BRACKET_REGEX);
|
|
114
|
-
for (const [, key, value] of bracketMatches) {
|
|
115
|
-
fields[key] = value;
|
|
116
|
-
}
|
|
117
|
-
const parenMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_INLINE_PAREN_REGEX);
|
|
118
|
-
for (const [, key, value] of parenMatches) {
|
|
119
|
-
fields[key] = value;
|
|
120
|
-
}
|
|
121
|
-
return fields;
|
|
122
|
-
}
|
|
123
|
-
/**
|
|
124
|
-
* Converts metadata to a format compatible with Langchain.
|
|
125
|
-
* @param metadata The metadata object to convert.
|
|
126
|
-
* @returns A record object containing key-value pairs of Langchain-compatible metadata.
|
|
127
|
-
*/
|
|
128
|
-
toLangchainCompatibleMetadata(metadata) {
|
|
129
|
-
const result = {};
|
|
130
|
-
for (const [key, value] of Object.entries(metadata)) {
|
|
131
|
-
if (typeof value === "string" || typeof value === "number") {
|
|
132
|
-
result[key] = value;
|
|
133
|
-
}
|
|
134
|
-
else {
|
|
135
|
-
result[key] = JSON.stringify(value);
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
return result;
|
|
139
|
-
}
|
|
140
|
-
/**
|
|
141
|
-
* It loads the Obsidian file, parses it, and returns a `Document` instance.
|
|
142
|
-
* @returns An array of `Document` instances to comply with the BaseDocumentLoader interface.
|
|
143
|
-
*/
|
|
144
|
-
async load() {
|
|
145
|
-
const documents = [];
|
|
146
|
-
const { basename, readFile, stat } = await ObsidianFileLoader.imports();
|
|
147
|
-
const fileName = basename(this.filePath);
|
|
148
|
-
const stats = await stat(this.filePath);
|
|
149
|
-
let content = await readFile(this.filePath, this.encoding);
|
|
150
|
-
const frontMatter = this.parseFrontMatter(content);
|
|
151
|
-
const tags = this.parseObsidianTags(content);
|
|
152
|
-
const dataviewFields = this.parseObsidianDataviewFields(content);
|
|
153
|
-
content = this.removeFrontMatter(content);
|
|
154
|
-
const metadata = {
|
|
155
|
-
source: fileName,
|
|
156
|
-
path: this.filePath,
|
|
157
|
-
created: stats.birthtimeMs,
|
|
158
|
-
lastModified: stats.mtimeMs,
|
|
159
|
-
lastAccessed: stats.atimeMs,
|
|
160
|
-
...this.toLangchainCompatibleMetadata(frontMatter),
|
|
161
|
-
...dataviewFields,
|
|
162
|
-
};
|
|
163
|
-
if (tags.size || frontMatter.tags) {
|
|
164
|
-
metadata.tags = Array.from(new Set([...tags, ...(frontMatter.tags ?? [])])).join(",");
|
|
165
|
-
}
|
|
166
|
-
documents.push(new Document({
|
|
167
|
-
pageContent: content,
|
|
168
|
-
metadata,
|
|
169
|
-
}));
|
|
170
|
-
return documents;
|
|
171
|
-
}
|
|
172
|
-
/**
|
|
173
|
-
* Imports the necessary functions from the `node:path` and
|
|
174
|
-
* `node:fs/promises` modules. It is used to dynamically import the
|
|
175
|
-
* functions when needed. If the import fails, it throws an error
|
|
176
|
-
* indicating that the modules failed to load.
|
|
177
|
-
* @returns A promise that resolves to an object containing the imported functions.
|
|
178
|
-
*/
|
|
179
|
-
static async imports() {
|
|
180
|
-
try {
|
|
181
|
-
const { basename } = await import("node:path");
|
|
182
|
-
const { readFile, stat } = await import("node:fs/promises");
|
|
183
|
-
return { basename, readFile, stat };
|
|
184
|
-
}
|
|
185
|
-
catch (e) {
|
|
186
|
-
console.error(e);
|
|
187
|
-
throw new Error(`Failed to load fs/promises. ObsidianFileLoader available only on environment 'node'. It appears you are running environment '${getEnv()}'. See https://<link to docs> for alternatives.`);
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
Object.defineProperty(ObsidianFileLoader, "FRONT_MATTER_REGEX", {
|
|
192
|
-
enumerable: true,
|
|
193
|
-
configurable: true,
|
|
194
|
-
writable: true,
|
|
195
|
-
value: /^---\n(.*?)\n---\n/s
|
|
196
|
-
});
|
|
197
|
-
Object.defineProperty(ObsidianFileLoader, "TAG_REGEX", {
|
|
198
|
-
enumerable: true,
|
|
199
|
-
configurable: true,
|
|
200
|
-
writable: true,
|
|
201
|
-
value: /(?:\s|^)#([a-zA-Z_][\w/-]*)/g
|
|
202
|
-
});
|
|
203
|
-
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_LINE_REGEX", {
|
|
204
|
-
enumerable: true,
|
|
205
|
-
configurable: true,
|
|
206
|
-
writable: true,
|
|
207
|
-
value: /^\s*(\w+)::\s*(.*)$/gm
|
|
208
|
-
});
|
|
209
|
-
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_INLINE_BRACKET_REGEX", {
|
|
210
|
-
enumerable: true,
|
|
211
|
-
configurable: true,
|
|
212
|
-
writable: true,
|
|
213
|
-
value: /\[(\w+)::\s*(.*)\]/gm
|
|
214
|
-
});
|
|
215
|
-
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_INLINE_PAREN_REGEX", {
|
|
216
|
-
enumerable: true,
|
|
217
|
-
configurable: true,
|
|
218
|
-
writable: true,
|
|
219
|
-
value: /\((\w+)::\s*(.*)\)/gm
|
|
220
|
-
});
|
|
221
|
-
/**
|
|
222
|
-
* @deprecated - Import from "@langchain/community/document_loaders/fs/obsidian" instead. This entrypoint will be removed in 0.3.0.
|
|
223
|
-
*
|
|
224
|
-
* Represents a loader for directories containing Obsidian markdown files. This loader extends
|
|
225
|
-
* the DirectoryLoader and provides functionality to load and parse '.md' files with YAML frontmatter,
|
|
226
|
-
* Obsidian tags, and Dataview fields.
|
|
227
|
-
*/
|
|
228
|
-
export class ObsidianLoader extends DirectoryLoader {
|
|
229
|
-
/**
|
|
230
|
-
* Initializes a new instance of the ObsidianLoader class.
|
|
231
|
-
* @param directoryPath The path to the directory containing Obsidian markdown files.
|
|
232
|
-
* @param encoding The character encoding to use when reading files. Defaults to 'utf-8'.
|
|
233
|
-
* @param collectMetadata Determines whether metadata should be collected from the files. Defaults to true.
|
|
234
|
-
*/
|
|
235
|
-
constructor(directoryPath, options) {
|
|
236
|
-
super(directoryPath, {
|
|
237
|
-
".md": (filePath) => new ObsidianFileLoader(filePath, options),
|
|
238
|
-
}, true, UnknownHandling.Ignore);
|
|
239
|
-
}
|
|
240
|
-
}
|