langchain 0.2.18 → 0.3.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/openai_functions/index.cjs +2 -2
- package/dist/agents/openai_functions/index.js +2 -2
- package/dist/chains/combine_documents/stuff.cjs +2 -2
- package/dist/chains/combine_documents/stuff.js +2 -2
- package/dist/chains/openai_functions/openapi.cjs +3 -1
- package/dist/chains/openai_functions/openapi.js +3 -1
- package/dist/load/import_constants.cjs +2 -39
- package/dist/load/import_constants.js +2 -39
- package/dist/load/import_map.cjs +2 -3
- package/dist/load/import_map.d.ts +0 -1
- package/dist/load/import_map.js +0 -1
- package/dist/smith/config.d.ts +1 -5
- package/package.json +31 -854
- package/dist/document_loaders/fs/chatgpt.cjs +0 -90
- package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
- package/dist/document_loaders/fs/chatgpt.js +0 -86
- package/dist/document_loaders/fs/csv.cjs +0 -73
- package/dist/document_loaders/fs/csv.d.ts +0 -65
- package/dist/document_loaders/fs/csv.js +0 -69
- package/dist/document_loaders/fs/docx.cjs +0 -58
- package/dist/document_loaders/fs/docx.d.ts +0 -25
- package/dist/document_loaders/fs/docx.js +0 -54
- package/dist/document_loaders/fs/epub.cjs +0 -103
- package/dist/document_loaders/fs/epub.d.ts +0 -33
- package/dist/document_loaders/fs/epub.js +0 -99
- package/dist/document_loaders/fs/notion.cjs +0 -26
- package/dist/document_loaders/fs/notion.d.ts +0 -12
- package/dist/document_loaders/fs/notion.js +0 -22
- package/dist/document_loaders/fs/obsidian.cjs +0 -247
- package/dist/document_loaders/fs/obsidian.d.ts +0 -28
- package/dist/document_loaders/fs/obsidian.js +0 -240
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
- package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
- package/dist/document_loaders/fs/pdf.cjs +0 -148
- package/dist/document_loaders/fs/pdf.d.ts +0 -49
- package/dist/document_loaders/fs/pdf.js +0 -144
- package/dist/document_loaders/fs/pptx.cjs +0 -46
- package/dist/document_loaders/fs/pptx.d.ts +0 -25
- package/dist/document_loaders/fs/pptx.js +0 -42
- package/dist/document_loaders/fs/srt.cjs +0 -57
- package/dist/document_loaders/fs/srt.d.ts +0 -32
- package/dist/document_loaders/fs/srt.js +0 -50
- package/dist/document_loaders/fs/unstructured.cjs +0 -338
- package/dist/document_loaders/fs/unstructured.d.ts +0 -125
- package/dist/document_loaders/fs/unstructured.js +0 -333
- package/dist/document_loaders/web/apify_dataset.cjs +0 -130
- package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
- package/dist/document_loaders/web/apify_dataset.js +0 -126
- package/dist/document_loaders/web/assemblyai.cjs +0 -200
- package/dist/document_loaders/web/assemblyai.d.ts +0 -95
- package/dist/document_loaders/web/assemblyai.js +0 -193
- package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
- package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
- package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
- package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
- package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
- package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
- package/dist/document_loaders/web/browserbase.cjs +0 -93
- package/dist/document_loaders/web/browserbase.d.ts +0 -48
- package/dist/document_loaders/web/browserbase.js +0 -86
- package/dist/document_loaders/web/cheerio.cjs +0 -118
- package/dist/document_loaders/web/cheerio.d.ts +0 -77
- package/dist/document_loaders/web/cheerio.js +0 -114
- package/dist/document_loaders/web/college_confidential.cjs +0 -41
- package/dist/document_loaders/web/college_confidential.d.ts +0 -25
- package/dist/document_loaders/web/college_confidential.js +0 -37
- package/dist/document_loaders/web/confluence.cjs +0 -190
- package/dist/document_loaders/web/confluence.d.ts +0 -114
- package/dist/document_loaders/web/confluence.js +0 -186
- package/dist/document_loaders/web/couchbase.cjs +0 -95
- package/dist/document_loaders/web/couchbase.d.ts +0 -32
- package/dist/document_loaders/web/couchbase.js +0 -91
- package/dist/document_loaders/web/figma.cjs +0 -102
- package/dist/document_loaders/web/figma.d.ts +0 -82
- package/dist/document_loaders/web/figma.js +0 -98
- package/dist/document_loaders/web/firecrawl.cjs +0 -95
- package/dist/document_loaders/web/firecrawl.d.ts +0 -50
- package/dist/document_loaders/web/firecrawl.js +0 -88
- package/dist/document_loaders/web/gitbook.cjs +0 -110
- package/dist/document_loaders/web/gitbook.d.ts +0 -55
- package/dist/document_loaders/web/gitbook.js +0 -106
- package/dist/document_loaders/web/github.cjs +0 -615
- package/dist/document_loaders/web/github.d.ts +0 -203
- package/dist/document_loaders/web/github.js +0 -608
- package/dist/document_loaders/web/hn.cjs +0 -90
- package/dist/document_loaders/web/hn.d.ts +0 -42
- package/dist/document_loaders/web/hn.js +0 -86
- package/dist/document_loaders/web/imsdb.cjs +0 -44
- package/dist/document_loaders/web/imsdb.d.ts +0 -23
- package/dist/document_loaders/web/imsdb.js +0 -40
- package/dist/document_loaders/web/notionapi.cjs +0 -404
- package/dist/document_loaders/web/notionapi.d.ts +0 -133
- package/dist/document_loaders/web/notionapi.js +0 -392
- package/dist/document_loaders/web/notiondb.cjs +0 -199
- package/dist/document_loaders/web/notiondb.d.ts +0 -56
- package/dist/document_loaders/web/notiondb.js +0 -195
- package/dist/document_loaders/web/pdf.cjs +0 -140
- package/dist/document_loaders/web/pdf.d.ts +0 -35
- package/dist/document_loaders/web/pdf.js +0 -136
- package/dist/document_loaders/web/playwright.cjs +0 -89
- package/dist/document_loaders/web/playwright.d.ts +0 -58
- package/dist/document_loaders/web/playwright.js +0 -85
- package/dist/document_loaders/web/puppeteer.cjs +0 -139
- package/dist/document_loaders/web/puppeteer.d.ts +0 -82
- package/dist/document_loaders/web/puppeteer.js +0 -135
- package/dist/document_loaders/web/recursive_url.cjs +0 -198
- package/dist/document_loaders/web/recursive_url.d.ts +0 -33
- package/dist/document_loaders/web/recursive_url.js +0 -194
- package/dist/document_loaders/web/s3.cjs +0 -164
- package/dist/document_loaders/web/s3.d.ts +0 -78
- package/dist/document_loaders/web/s3.js +0 -137
- package/dist/document_loaders/web/searchapi.cjs +0 -150
- package/dist/document_loaders/web/searchapi.d.ts +0 -76
- package/dist/document_loaders/web/searchapi.js +0 -146
- package/dist/document_loaders/web/serpapi.cjs +0 -127
- package/dist/document_loaders/web/serpapi.d.ts +0 -62
- package/dist/document_loaders/web/serpapi.js +0 -123
- package/dist/document_loaders/web/sitemap.cjs +0 -118
- package/dist/document_loaders/web/sitemap.d.ts +0 -41
- package/dist/document_loaders/web/sitemap.js +0 -114
- package/dist/document_loaders/web/sonix_audio.cjs +0 -68
- package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
- package/dist/document_loaders/web/sonix_audio.js +0 -64
- package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
- package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
- package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
- package/dist/document_loaders/web/youtube.cjs +0 -116
- package/dist/document_loaders/web/youtube.d.ts +0 -55
- package/dist/document_loaders/web/youtube.js +0 -112
- package/dist/experimental/tools/pyinterpreter.cjs +0 -248
- package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
- package/dist/experimental/tools/pyinterpreter.js +0 -244
- package/dist/retrievers/self_query/chroma.cjs +0 -48
- package/dist/retrievers/self_query/chroma.d.ts +0 -26
- package/dist/retrievers/self_query/chroma.js +0 -44
- package/dist/retrievers/self_query/pinecone.cjs +0 -47
- package/dist/retrievers/self_query/pinecone.d.ts +0 -26
- package/dist/retrievers/self_query/pinecone.js +0 -43
- package/dist/retrievers/self_query/supabase.cjs +0 -278
- package/dist/retrievers/self_query/supabase.d.ts +0 -109
- package/dist/retrievers/self_query/supabase.js +0 -274
- package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
- package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
- package/dist/retrievers/self_query/supabase_utils.js +0 -259
- package/dist/retrievers/self_query/vectara.cjs +0 -143
- package/dist/retrievers/self_query/vectara.d.ts +0 -42
- package/dist/retrievers/self_query/vectara.js +0 -139
- package/dist/retrievers/self_query/weaviate.cjs +0 -201
- package/dist/retrievers/self_query/weaviate.d.ts +0 -99
- package/dist/retrievers/self_query/weaviate.js +0 -197
- package/dist/types/assemblyai-types.cjs +0 -2
- package/dist/types/assemblyai-types.d.ts +0 -4
- package/dist/types/assemblyai-types.js +0 -1
- package/document_loaders/fs/chatgpt.cjs +0 -1
- package/document_loaders/fs/chatgpt.d.cts +0 -1
- package/document_loaders/fs/chatgpt.d.ts +0 -1
- package/document_loaders/fs/chatgpt.js +0 -1
- package/document_loaders/fs/csv.cjs +0 -1
- package/document_loaders/fs/csv.d.cts +0 -1
- package/document_loaders/fs/csv.d.ts +0 -1
- package/document_loaders/fs/csv.js +0 -1
- package/document_loaders/fs/docx.cjs +0 -1
- package/document_loaders/fs/docx.d.cts +0 -1
- package/document_loaders/fs/docx.d.ts +0 -1
- package/document_loaders/fs/docx.js +0 -1
- package/document_loaders/fs/epub.cjs +0 -1
- package/document_loaders/fs/epub.d.cts +0 -1
- package/document_loaders/fs/epub.d.ts +0 -1
- package/document_loaders/fs/epub.js +0 -1
- package/document_loaders/fs/notion.cjs +0 -1
- package/document_loaders/fs/notion.d.cts +0 -1
- package/document_loaders/fs/notion.d.ts +0 -1
- package/document_loaders/fs/notion.js +0 -1
- package/document_loaders/fs/obsidian.cjs +0 -1
- package/document_loaders/fs/obsidian.d.cts +0 -1
- package/document_loaders/fs/obsidian.d.ts +0 -1
- package/document_loaders/fs/obsidian.js +0 -1
- package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
- package/document_loaders/fs/openai_whisper_audio.js +0 -1
- package/document_loaders/fs/pdf.cjs +0 -1
- package/document_loaders/fs/pdf.d.cts +0 -1
- package/document_loaders/fs/pdf.d.ts +0 -1
- package/document_loaders/fs/pdf.js +0 -1
- package/document_loaders/fs/pptx.cjs +0 -1
- package/document_loaders/fs/pptx.d.cts +0 -1
- package/document_loaders/fs/pptx.d.ts +0 -1
- package/document_loaders/fs/pptx.js +0 -1
- package/document_loaders/fs/srt.cjs +0 -1
- package/document_loaders/fs/srt.d.cts +0 -1
- package/document_loaders/fs/srt.d.ts +0 -1
- package/document_loaders/fs/srt.js +0 -1
- package/document_loaders/fs/unstructured.cjs +0 -1
- package/document_loaders/fs/unstructured.d.cts +0 -1
- package/document_loaders/fs/unstructured.d.ts +0 -1
- package/document_loaders/fs/unstructured.js +0 -1
- package/document_loaders/web/apify_dataset.cjs +0 -1
- package/document_loaders/web/apify_dataset.d.cts +0 -1
- package/document_loaders/web/apify_dataset.d.ts +0 -1
- package/document_loaders/web/apify_dataset.js +0 -1
- package/document_loaders/web/assemblyai.cjs +0 -1
- package/document_loaders/web/assemblyai.d.cts +0 -1
- package/document_loaders/web/assemblyai.d.ts +0 -1
- package/document_loaders/web/assemblyai.js +0 -1
- package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_container.js +0 -1
- package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_file.js +0 -1
- package/document_loaders/web/browserbase.cjs +0 -1
- package/document_loaders/web/browserbase.d.cts +0 -1
- package/document_loaders/web/browserbase.d.ts +0 -1
- package/document_loaders/web/browserbase.js +0 -1
- package/document_loaders/web/cheerio.cjs +0 -1
- package/document_loaders/web/cheerio.d.cts +0 -1
- package/document_loaders/web/cheerio.d.ts +0 -1
- package/document_loaders/web/cheerio.js +0 -1
- package/document_loaders/web/college_confidential.cjs +0 -1
- package/document_loaders/web/college_confidential.d.cts +0 -1
- package/document_loaders/web/college_confidential.d.ts +0 -1
- package/document_loaders/web/college_confidential.js +0 -1
- package/document_loaders/web/confluence.cjs +0 -1
- package/document_loaders/web/confluence.d.cts +0 -1
- package/document_loaders/web/confluence.d.ts +0 -1
- package/document_loaders/web/confluence.js +0 -1
- package/document_loaders/web/couchbase.cjs +0 -1
- package/document_loaders/web/couchbase.d.cts +0 -1
- package/document_loaders/web/couchbase.d.ts +0 -1
- package/document_loaders/web/couchbase.js +0 -1
- package/document_loaders/web/figma.cjs +0 -1
- package/document_loaders/web/figma.d.cts +0 -1
- package/document_loaders/web/figma.d.ts +0 -1
- package/document_loaders/web/figma.js +0 -1
- package/document_loaders/web/firecrawl.cjs +0 -1
- package/document_loaders/web/firecrawl.d.cts +0 -1
- package/document_loaders/web/firecrawl.d.ts +0 -1
- package/document_loaders/web/firecrawl.js +0 -1
- package/document_loaders/web/gitbook.cjs +0 -1
- package/document_loaders/web/gitbook.d.cts +0 -1
- package/document_loaders/web/gitbook.d.ts +0 -1
- package/document_loaders/web/gitbook.js +0 -1
- package/document_loaders/web/github.cjs +0 -1
- package/document_loaders/web/github.d.cts +0 -1
- package/document_loaders/web/github.d.ts +0 -1
- package/document_loaders/web/github.js +0 -1
- package/document_loaders/web/hn.cjs +0 -1
- package/document_loaders/web/hn.d.cts +0 -1
- package/document_loaders/web/hn.d.ts +0 -1
- package/document_loaders/web/hn.js +0 -1
- package/document_loaders/web/imsdb.cjs +0 -1
- package/document_loaders/web/imsdb.d.cts +0 -1
- package/document_loaders/web/imsdb.d.ts +0 -1
- package/document_loaders/web/imsdb.js +0 -1
- package/document_loaders/web/notionapi.cjs +0 -1
- package/document_loaders/web/notionapi.d.cts +0 -1
- package/document_loaders/web/notionapi.d.ts +0 -1
- package/document_loaders/web/notionapi.js +0 -1
- package/document_loaders/web/notiondb.cjs +0 -1
- package/document_loaders/web/notiondb.d.cts +0 -1
- package/document_loaders/web/notiondb.d.ts +0 -1
- package/document_loaders/web/notiondb.js +0 -1
- package/document_loaders/web/pdf.cjs +0 -1
- package/document_loaders/web/pdf.d.cts +0 -1
- package/document_loaders/web/pdf.d.ts +0 -1
- package/document_loaders/web/pdf.js +0 -1
- package/document_loaders/web/playwright.cjs +0 -1
- package/document_loaders/web/playwright.d.cts +0 -1
- package/document_loaders/web/playwright.d.ts +0 -1
- package/document_loaders/web/playwright.js +0 -1
- package/document_loaders/web/puppeteer.cjs +0 -1
- package/document_loaders/web/puppeteer.d.cts +0 -1
- package/document_loaders/web/puppeteer.d.ts +0 -1
- package/document_loaders/web/puppeteer.js +0 -1
- package/document_loaders/web/recursive_url.cjs +0 -1
- package/document_loaders/web/recursive_url.d.cts +0 -1
- package/document_loaders/web/recursive_url.d.ts +0 -1
- package/document_loaders/web/recursive_url.js +0 -1
- package/document_loaders/web/s3.cjs +0 -1
- package/document_loaders/web/s3.d.cts +0 -1
- package/document_loaders/web/s3.d.ts +0 -1
- package/document_loaders/web/s3.js +0 -1
- package/document_loaders/web/searchapi.cjs +0 -1
- package/document_loaders/web/searchapi.d.cts +0 -1
- package/document_loaders/web/searchapi.d.ts +0 -1
- package/document_loaders/web/searchapi.js +0 -1
- package/document_loaders/web/serpapi.cjs +0 -1
- package/document_loaders/web/serpapi.d.cts +0 -1
- package/document_loaders/web/serpapi.d.ts +0 -1
- package/document_loaders/web/serpapi.js +0 -1
- package/document_loaders/web/sitemap.cjs +0 -1
- package/document_loaders/web/sitemap.d.cts +0 -1
- package/document_loaders/web/sitemap.d.ts +0 -1
- package/document_loaders/web/sitemap.js +0 -1
- package/document_loaders/web/sonix_audio.cjs +0 -1
- package/document_loaders/web/sonix_audio.d.cts +0 -1
- package/document_loaders/web/sonix_audio.d.ts +0 -1
- package/document_loaders/web/sonix_audio.js +0 -1
- package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.js +0 -1
- package/document_loaders/web/youtube.cjs +0 -1
- package/document_loaders/web/youtube.d.cts +0 -1
- package/document_loaders/web/youtube.d.ts +0 -1
- package/document_loaders/web/youtube.js +0 -1
- package/experimental/tools/pyinterpreter.cjs +0 -1
- package/experimental/tools/pyinterpreter.d.cts +0 -1
- package/experimental/tools/pyinterpreter.d.ts +0 -1
- package/experimental/tools/pyinterpreter.js +0 -1
- package/memory/index.cjs +0 -1
- package/memory/index.d.cts +0 -1
- package/memory/index.d.ts +0 -1
- package/memory/index.js +0 -1
- package/retrievers/self_query/chroma.cjs +0 -1
- package/retrievers/self_query/chroma.d.cts +0 -1
- package/retrievers/self_query/chroma.d.ts +0 -1
- package/retrievers/self_query/chroma.js +0 -1
- package/retrievers/self_query/pinecone.cjs +0 -1
- package/retrievers/self_query/pinecone.d.cts +0 -1
- package/retrievers/self_query/pinecone.d.ts +0 -1
- package/retrievers/self_query/pinecone.js +0 -1
- package/retrievers/self_query/supabase.cjs +0 -1
- package/retrievers/self_query/supabase.d.cts +0 -1
- package/retrievers/self_query/supabase.d.ts +0 -1
- package/retrievers/self_query/supabase.js +0 -1
- package/retrievers/self_query/vectara.cjs +0 -1
- package/retrievers/self_query/vectara.d.cts +0 -1
- package/retrievers/self_query/vectara.d.ts +0 -1
- package/retrievers/self_query/vectara.js +0 -1
- package/retrievers/self_query/weaviate.cjs +0 -1
- package/retrievers/self_query/weaviate.d.cts +0 -1
- package/retrievers/self_query/weaviate.d.ts +0 -1
- package/retrievers/self_query/weaviate.js +0 -1
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { CheerioWebBaseLoader } from "./cheerio.js";
|
|
3
|
-
/**
|
|
4
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/hn" instead. This entrypoint will be removed in 0.3.0.
|
|
5
|
-
*
|
|
6
|
-
* A class that extends the CheerioWebBaseLoader class. It represents a
|
|
7
|
-
* loader for loading web pages from the Hacker News website.
|
|
8
|
-
*/
|
|
9
|
-
export declare class HNLoader extends CheerioWebBaseLoader {
|
|
10
|
-
webPath: string;
|
|
11
|
-
constructor(webPath: string);
|
|
12
|
-
/**
|
|
13
|
-
* An asynchronous method that loads the web page. If the webPath includes
|
|
14
|
-
* "item", it calls the loadComments() method to load the comments from
|
|
15
|
-
* the web page. Otherwise, it calls the loadResults() method to load the
|
|
16
|
-
* results from the web page.
|
|
17
|
-
* @returns A Promise that resolves to an array of Document instances.
|
|
18
|
-
*/
|
|
19
|
-
load(): Promise<Document[]>;
|
|
20
|
-
/**
|
|
21
|
-
* A private method that loads the comments from the web page. It selects
|
|
22
|
-
* the elements with the class "athing comtr" using the $ function
|
|
23
|
-
* provided by Cheerio. It also extracts the title of the web page from
|
|
24
|
-
* the element with the id "pagespace". It creates Document instances for
|
|
25
|
-
* each comment, with the comment text as the page content and the source
|
|
26
|
-
* and title as metadata.
|
|
27
|
-
* @param $ A CheerioAPI instance.
|
|
28
|
-
* @returns An array of Document instances.
|
|
29
|
-
*/
|
|
30
|
-
private loadComments;
|
|
31
|
-
/**
|
|
32
|
-
* A private method that loads the results from the web page. It selects
|
|
33
|
-
* the elements with the class "athing" using the $ function provided by
|
|
34
|
-
* Cheerio. It extracts the ranking, link, title, and other metadata from
|
|
35
|
-
* each result item. It creates Document instances for each result item,
|
|
36
|
-
* with the title as the page content and the source, title, link, and
|
|
37
|
-
* ranking as metadata.
|
|
38
|
-
* @param $ A CheerioAPI instance.
|
|
39
|
-
* @returns An array of Document instances.
|
|
40
|
-
*/
|
|
41
|
-
private loadResults;
|
|
42
|
-
}
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { CheerioWebBaseLoader } from "./cheerio.js";
|
|
3
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
4
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
5
|
-
oldEntrypointName: "document_loaders/web/hn",
|
|
6
|
-
newPackageName: "@langchain/community",
|
|
7
|
-
});
|
|
8
|
-
/**
|
|
9
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/hn" instead. This entrypoint will be removed in 0.3.0.
|
|
10
|
-
*
|
|
11
|
-
* A class that extends the CheerioWebBaseLoader class. It represents a
|
|
12
|
-
* loader for loading web pages from the Hacker News website.
|
|
13
|
-
*/
|
|
14
|
-
export class HNLoader extends CheerioWebBaseLoader {
|
|
15
|
-
constructor(webPath) {
|
|
16
|
-
super(webPath);
|
|
17
|
-
Object.defineProperty(this, "webPath", {
|
|
18
|
-
enumerable: true,
|
|
19
|
-
configurable: true,
|
|
20
|
-
writable: true,
|
|
21
|
-
value: webPath
|
|
22
|
-
});
|
|
23
|
-
}
|
|
24
|
-
/**
|
|
25
|
-
* An asynchronous method that loads the web page. If the webPath includes
|
|
26
|
-
* "item", it calls the loadComments() method to load the comments from
|
|
27
|
-
* the web page. Otherwise, it calls the loadResults() method to load the
|
|
28
|
-
* results from the web page.
|
|
29
|
-
* @returns A Promise that resolves to an array of Document instances.
|
|
30
|
-
*/
|
|
31
|
-
async load() {
|
|
32
|
-
const $ = await this.scrape();
|
|
33
|
-
if (this.webPath.includes("item")) {
|
|
34
|
-
return this.loadComments($);
|
|
35
|
-
}
|
|
36
|
-
return this.loadResults($);
|
|
37
|
-
}
|
|
38
|
-
/**
|
|
39
|
-
* A private method that loads the comments from the web page. It selects
|
|
40
|
-
* the elements with the class "athing comtr" using the $ function
|
|
41
|
-
* provided by Cheerio. It also extracts the title of the web page from
|
|
42
|
-
* the element with the id "pagespace". It creates Document instances for
|
|
43
|
-
* each comment, with the comment text as the page content and the source
|
|
44
|
-
* and title as metadata.
|
|
45
|
-
* @param $ A CheerioAPI instance.
|
|
46
|
-
* @returns An array of Document instances.
|
|
47
|
-
*/
|
|
48
|
-
loadComments($) {
|
|
49
|
-
const comments = $("tr[class='athing comtr']");
|
|
50
|
-
const title = $("tr[id='pagespace']").attr("title");
|
|
51
|
-
const documents = [];
|
|
52
|
-
comments.each((_index, comment) => {
|
|
53
|
-
const text = $(comment).text().trim();
|
|
54
|
-
const metadata = { source: this.webPath, title };
|
|
55
|
-
documents.push(new Document({ pageContent: text, metadata }));
|
|
56
|
-
});
|
|
57
|
-
return documents;
|
|
58
|
-
}
|
|
59
|
-
/**
|
|
60
|
-
* A private method that loads the results from the web page. It selects
|
|
61
|
-
* the elements with the class "athing" using the $ function provided by
|
|
62
|
-
* Cheerio. It extracts the ranking, link, title, and other metadata from
|
|
63
|
-
* each result item. It creates Document instances for each result item,
|
|
64
|
-
* with the title as the page content and the source, title, link, and
|
|
65
|
-
* ranking as metadata.
|
|
66
|
-
* @param $ A CheerioAPI instance.
|
|
67
|
-
* @returns An array of Document instances.
|
|
68
|
-
*/
|
|
69
|
-
loadResults($) {
|
|
70
|
-
const items = $("tr[class='athing']");
|
|
71
|
-
const documents = [];
|
|
72
|
-
items.each((_index, item) => {
|
|
73
|
-
const ranking = $(item).find("span[class='rank']").text();
|
|
74
|
-
const link = $(item).find("span[class='titleline'] a").attr("href");
|
|
75
|
-
const title = $(item).find("span[class='titleline']").text().trim();
|
|
76
|
-
const metadata = {
|
|
77
|
-
source: this.webPath,
|
|
78
|
-
title,
|
|
79
|
-
link,
|
|
80
|
-
ranking,
|
|
81
|
-
};
|
|
82
|
-
documents.push(new Document({ pageContent: title, metadata }));
|
|
83
|
-
});
|
|
84
|
-
return documents;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.IMSDBLoader = void 0;
|
|
4
|
-
const documents_1 = require("@langchain/core/documents");
|
|
5
|
-
const cheerio_js_1 = require("./cheerio.cjs");
|
|
6
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
7
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
8
|
-
oldEntrypointName: "document_loaders/web/imsdb",
|
|
9
|
-
newPackageName: "@langchain/community",
|
|
10
|
-
});
|
|
11
|
-
/**
|
|
12
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/imsdb" instead. This entrypoint will be removed in 0.3.0.
|
|
13
|
-
*
|
|
14
|
-
* A class that extends the CheerioWebBaseLoader class. It represents a
|
|
15
|
-
* loader for loading web pages from the IMSDB (Internet Movie Script
|
|
16
|
-
* Database) website.
|
|
17
|
-
*/
|
|
18
|
-
class IMSDBLoader extends cheerio_js_1.CheerioWebBaseLoader {
|
|
19
|
-
constructor(webPath) {
|
|
20
|
-
super(webPath);
|
|
21
|
-
Object.defineProperty(this, "webPath", {
|
|
22
|
-
enumerable: true,
|
|
23
|
-
configurable: true,
|
|
24
|
-
writable: true,
|
|
25
|
-
value: webPath
|
|
26
|
-
});
|
|
27
|
-
}
|
|
28
|
-
/**
|
|
29
|
-
* An asynchronous method that loads the web page using the scrape()
|
|
30
|
-
* method inherited from the base class. It selects the element with the
|
|
31
|
-
* class 'scrtext' using the $ function provided by Cheerio and extracts
|
|
32
|
-
* the text content. It creates a Document instance with the text content
|
|
33
|
-
* as the page content and the source as metadata. It returns an array
|
|
34
|
-
* containing the Document instance.
|
|
35
|
-
* @returns An array containing a Document instance.
|
|
36
|
-
*/
|
|
37
|
-
async load() {
|
|
38
|
-
const $ = await this.scrape();
|
|
39
|
-
const text = $("td[class='scrtext']").text().trim();
|
|
40
|
-
const metadata = { source: this.webPath };
|
|
41
|
-
return [new documents_1.Document({ pageContent: text, metadata })];
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
exports.IMSDBLoader = IMSDBLoader;
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { CheerioWebBaseLoader } from "./cheerio.js";
|
|
3
|
-
/**
|
|
4
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/imsdb" instead. This entrypoint will be removed in 0.3.0.
|
|
5
|
-
*
|
|
6
|
-
* A class that extends the CheerioWebBaseLoader class. It represents a
|
|
7
|
-
* loader for loading web pages from the IMSDB (Internet Movie Script
|
|
8
|
-
* Database) website.
|
|
9
|
-
*/
|
|
10
|
-
export declare class IMSDBLoader extends CheerioWebBaseLoader {
|
|
11
|
-
webPath: string;
|
|
12
|
-
constructor(webPath: string);
|
|
13
|
-
/**
|
|
14
|
-
* An asynchronous method that loads the web page using the scrape()
|
|
15
|
-
* method inherited from the base class. It selects the element with the
|
|
16
|
-
* class 'scrtext' using the $ function provided by Cheerio and extracts
|
|
17
|
-
* the text content. It creates a Document instance with the text content
|
|
18
|
-
* as the page content and the source as metadata. It returns an array
|
|
19
|
-
* containing the Document instance.
|
|
20
|
-
* @returns An array containing a Document instance.
|
|
21
|
-
*/
|
|
22
|
-
load(): Promise<Document[]>;
|
|
23
|
-
}
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { CheerioWebBaseLoader } from "./cheerio.js";
|
|
3
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
4
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
5
|
-
oldEntrypointName: "document_loaders/web/imsdb",
|
|
6
|
-
newPackageName: "@langchain/community",
|
|
7
|
-
});
|
|
8
|
-
/**
|
|
9
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/imsdb" instead. This entrypoint will be removed in 0.3.0.
|
|
10
|
-
*
|
|
11
|
-
* A class that extends the CheerioWebBaseLoader class. It represents a
|
|
12
|
-
* loader for loading web pages from the IMSDB (Internet Movie Script
|
|
13
|
-
* Database) website.
|
|
14
|
-
*/
|
|
15
|
-
export class IMSDBLoader extends CheerioWebBaseLoader {
|
|
16
|
-
constructor(webPath) {
|
|
17
|
-
super(webPath);
|
|
18
|
-
Object.defineProperty(this, "webPath", {
|
|
19
|
-
enumerable: true,
|
|
20
|
-
configurable: true,
|
|
21
|
-
writable: true,
|
|
22
|
-
value: webPath
|
|
23
|
-
});
|
|
24
|
-
}
|
|
25
|
-
/**
|
|
26
|
-
* An asynchronous method that loads the web page using the scrape()
|
|
27
|
-
* method inherited from the base class. It selects the element with the
|
|
28
|
-
* class 'scrtext' using the $ function provided by Cheerio and extracts
|
|
29
|
-
* the text content. It creates a Document instance with the text content
|
|
30
|
-
* as the page content and the source as metadata. It returns an array
|
|
31
|
-
* containing the Document instance.
|
|
32
|
-
* @returns An array containing a Document instance.
|
|
33
|
-
*/
|
|
34
|
-
async load() {
|
|
35
|
-
const $ = await this.scrape();
|
|
36
|
-
const text = $("td[class='scrtext']").text().trim();
|
|
37
|
-
const metadata = { source: this.webPath };
|
|
38
|
-
return [new Document({ pageContent: text, metadata })];
|
|
39
|
-
}
|
|
40
|
-
}
|
|
@@ -1,404 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.NotionAPILoader = exports.isDatabase = exports.isPage = exports.isErrorResponse = exports.isDatabaseResponse = exports.isPageResponse = void 0;
|
|
7
|
-
const client_1 = require("@notionhq/client");
|
|
8
|
-
const notion_to_md_1 = require("notion-to-md");
|
|
9
|
-
const notion_js_1 = require("notion-to-md/build/utils/notion.js");
|
|
10
|
-
const js_yaml_1 = __importDefault(require("js-yaml"));
|
|
11
|
-
const documents_1 = require("@langchain/core/documents");
|
|
12
|
-
const async_caller_1 = require("@langchain/core/utils/async_caller");
|
|
13
|
-
const base_js_1 = require("../base.cjs");
|
|
14
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
15
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
16
|
-
oldEntrypointName: "document_loaders/web/notionapi",
|
|
17
|
-
newPackageName: "@langchain/community",
|
|
18
|
-
});
|
|
19
|
-
const isPageResponse = (res) => !(0, client_1.isNotionClientError)(res) && res.object === "page";
|
|
20
|
-
exports.isPageResponse = isPageResponse;
|
|
21
|
-
const isDatabaseResponse = (res) => !(0, client_1.isNotionClientError)(res) && res.object === "database";
|
|
22
|
-
exports.isDatabaseResponse = isDatabaseResponse;
|
|
23
|
-
const isErrorResponse = (res) => (0, client_1.isNotionClientError)(res);
|
|
24
|
-
exports.isErrorResponse = isErrorResponse;
|
|
25
|
-
const isPage = (res) => (0, exports.isPageResponse)(res) && (0, client_1.isFullPage)(res);
|
|
26
|
-
exports.isPage = isPage;
|
|
27
|
-
const isDatabase = (res) => (0, exports.isDatabaseResponse)(res) && (0, client_1.isFullDatabase)(res);
|
|
28
|
-
exports.isDatabase = isDatabase;
|
|
29
|
-
/**
|
|
30
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/notionapi" instead. This entrypoint will be removed in 0.3.0.
|
|
31
|
-
*
|
|
32
|
-
* A class that extends the BaseDocumentLoader class. It represents a
|
|
33
|
-
* document loader for loading documents from Notion using the Notion API.
|
|
34
|
-
* @example
|
|
35
|
-
* ```typescript
|
|
36
|
-
* import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
|
|
37
|
-
*
|
|
38
|
-
* const pageLoader = new NotionAPILoader({
|
|
39
|
-
* clientOptions: { auth: "<NOTION_INTEGRATION_TOKEN>" },
|
|
40
|
-
* id: "<PAGE_ID>",
|
|
41
|
-
* type: "page",
|
|
42
|
-
* });
|
|
43
|
-
* const splitter = new RecursiveCharacterTextSplitter();
|
|
44
|
-
* const pageDocs = await pageLoader.loadAndSplit(splitter);
|
|
45
|
-
* const dbLoader = new NotionAPILoader({
|
|
46
|
-
* clientOptions: { auth: "<NOTION_INTEGRATION_TOKEN>" },
|
|
47
|
-
* id: "<DATABASE_ID>",
|
|
48
|
-
* type: "database",
|
|
49
|
-
* propertiesAsHeader: true,
|
|
50
|
-
* });
|
|
51
|
-
* const dbDocs = await dbLoader.load();
|
|
52
|
-
* ```
|
|
53
|
-
*/
|
|
54
|
-
class NotionAPILoader extends base_js_1.BaseDocumentLoader {
|
|
55
|
-
constructor(options) {
|
|
56
|
-
super();
|
|
57
|
-
Object.defineProperty(this, "caller", {
|
|
58
|
-
enumerable: true,
|
|
59
|
-
configurable: true,
|
|
60
|
-
writable: true,
|
|
61
|
-
value: void 0
|
|
62
|
-
});
|
|
63
|
-
Object.defineProperty(this, "notionClient", {
|
|
64
|
-
enumerable: true,
|
|
65
|
-
configurable: true,
|
|
66
|
-
writable: true,
|
|
67
|
-
value: void 0
|
|
68
|
-
});
|
|
69
|
-
Object.defineProperty(this, "n2mClient", {
|
|
70
|
-
enumerable: true,
|
|
71
|
-
configurable: true,
|
|
72
|
-
writable: true,
|
|
73
|
-
value: void 0
|
|
74
|
-
});
|
|
75
|
-
Object.defineProperty(this, "id", {
|
|
76
|
-
enumerable: true,
|
|
77
|
-
configurable: true,
|
|
78
|
-
writable: true,
|
|
79
|
-
value: void 0
|
|
80
|
-
});
|
|
81
|
-
Object.defineProperty(this, "pageQueue", {
|
|
82
|
-
enumerable: true,
|
|
83
|
-
configurable: true,
|
|
84
|
-
writable: true,
|
|
85
|
-
value: void 0
|
|
86
|
-
});
|
|
87
|
-
Object.defineProperty(this, "pageCompleted", {
|
|
88
|
-
enumerable: true,
|
|
89
|
-
configurable: true,
|
|
90
|
-
writable: true,
|
|
91
|
-
value: void 0
|
|
92
|
-
});
|
|
93
|
-
Object.defineProperty(this, "pageQueueTotal", {
|
|
94
|
-
enumerable: true,
|
|
95
|
-
configurable: true,
|
|
96
|
-
writable: true,
|
|
97
|
-
value: void 0
|
|
98
|
-
});
|
|
99
|
-
Object.defineProperty(this, "documents", {
|
|
100
|
-
enumerable: true,
|
|
101
|
-
configurable: true,
|
|
102
|
-
writable: true,
|
|
103
|
-
value: void 0
|
|
104
|
-
});
|
|
105
|
-
Object.defineProperty(this, "rootTitle", {
|
|
106
|
-
enumerable: true,
|
|
107
|
-
configurable: true,
|
|
108
|
-
writable: true,
|
|
109
|
-
value: void 0
|
|
110
|
-
});
|
|
111
|
-
Object.defineProperty(this, "onDocumentLoaded", {
|
|
112
|
-
enumerable: true,
|
|
113
|
-
configurable: true,
|
|
114
|
-
writable: true,
|
|
115
|
-
value: void 0
|
|
116
|
-
});
|
|
117
|
-
Object.defineProperty(this, "propertiesAsHeader", {
|
|
118
|
-
enumerable: true,
|
|
119
|
-
configurable: true,
|
|
120
|
-
writable: true,
|
|
121
|
-
value: void 0
|
|
122
|
-
});
|
|
123
|
-
this.caller = new async_caller_1.AsyncCaller({
|
|
124
|
-
maxConcurrency: 64,
|
|
125
|
-
...options.callerOptions,
|
|
126
|
-
});
|
|
127
|
-
this.notionClient = new client_1.Client({
|
|
128
|
-
logger: () => { },
|
|
129
|
-
...options.clientOptions,
|
|
130
|
-
});
|
|
131
|
-
this.n2mClient = new notion_to_md_1.NotionToMarkdown({
|
|
132
|
-
notionClient: this.notionClient,
|
|
133
|
-
config: { parseChildPages: false, convertImagesToBase64: false },
|
|
134
|
-
});
|
|
135
|
-
this.id = options.id;
|
|
136
|
-
this.pageQueue = [];
|
|
137
|
-
this.pageCompleted = [];
|
|
138
|
-
this.pageQueueTotal = 0;
|
|
139
|
-
this.documents = [];
|
|
140
|
-
this.rootTitle = "";
|
|
141
|
-
this.onDocumentLoaded = options.onDocumentLoaded ?? ((_ti, _cu) => { });
|
|
142
|
-
this.propertiesAsHeader = options.propertiesAsHeader || false;
|
|
143
|
-
}
|
|
144
|
-
/**
|
|
145
|
-
* Adds a selection of page ids to the pageQueue and removes duplicates.
|
|
146
|
-
* @param items An array of string ids
|
|
147
|
-
*/
|
|
148
|
-
addToQueue(...items) {
|
|
149
|
-
const deDuped = items.filter((item) => !this.pageCompleted.concat(this.pageQueue).includes(item));
|
|
150
|
-
this.pageQueue.push(...deDuped);
|
|
151
|
-
this.pageQueueTotal += deDuped.length;
|
|
152
|
-
}
|
|
153
|
-
/**
|
|
154
|
-
* Parses a Notion GetResponse object (page or database) and returns a string of the title.
|
|
155
|
-
* @param obj The Notion GetResponse object to parse.
|
|
156
|
-
* @returns The string of the title.
|
|
157
|
-
*/
|
|
158
|
-
getTitle(obj) {
|
|
159
|
-
if ((0, exports.isPage)(obj)) {
|
|
160
|
-
const titleProp = Object.values(obj.properties).find((prop) => prop.type === "title");
|
|
161
|
-
if (titleProp)
|
|
162
|
-
return this.getPropValue(titleProp);
|
|
163
|
-
}
|
|
164
|
-
if ((0, exports.isDatabase)(obj))
|
|
165
|
-
return obj.title
|
|
166
|
-
.map((v) => this.n2mClient.annotatePlainText(v.plain_text, v.annotations))
|
|
167
|
-
.join("");
|
|
168
|
-
return null;
|
|
169
|
-
}
|
|
170
|
-
/**
|
|
171
|
-
* Parses the property type and returns a string
|
|
172
|
-
* @param page The Notion page property to parse.
|
|
173
|
-
* @returns A string of parsed property.
|
|
174
|
-
*/
|
|
175
|
-
getPropValue(prop) {
|
|
176
|
-
switch (prop.type) {
|
|
177
|
-
case "number": {
|
|
178
|
-
const propNumber = prop[prop.type];
|
|
179
|
-
return propNumber !== null ? propNumber.toString() : "";
|
|
180
|
-
}
|
|
181
|
-
case "url":
|
|
182
|
-
return prop[prop.type] || "";
|
|
183
|
-
case "select":
|
|
184
|
-
return prop[prop.type]?.name ?? "";
|
|
185
|
-
case "multi_select":
|
|
186
|
-
return `[${prop[prop.type].map((v) => `"${v.name}"`).join(", ")}]`;
|
|
187
|
-
case "status":
|
|
188
|
-
return prop[prop.type]?.name ?? "";
|
|
189
|
-
case "date":
|
|
190
|
-
return `${prop[prop.type]?.start ?? ""}${prop[prop.type]?.end ? ` - ${prop[prop.type]?.end}` : ""}`;
|
|
191
|
-
case "email":
|
|
192
|
-
return prop[prop.type] || "";
|
|
193
|
-
case "phone_number":
|
|
194
|
-
return prop[prop.type] || "";
|
|
195
|
-
case "checkbox":
|
|
196
|
-
return prop[prop.type].toString();
|
|
197
|
-
case "files":
|
|
198
|
-
return `[${prop[prop.type].map((v) => `"${v.name}"`).join(", ")}]`;
|
|
199
|
-
case "created_by":
|
|
200
|
-
return `["${prop[prop.type].object}", "${prop[prop.type].id}"]`;
|
|
201
|
-
case "created_time":
|
|
202
|
-
return prop[prop.type];
|
|
203
|
-
case "last_edited_by":
|
|
204
|
-
return `["${prop[prop.type].object}", "${prop[prop.type].id}"]`;
|
|
205
|
-
case "last_edited_time":
|
|
206
|
-
return prop[prop.type];
|
|
207
|
-
case "title":
|
|
208
|
-
return prop[prop.type]
|
|
209
|
-
.map((v) => this.n2mClient.annotatePlainText(v.plain_text, v.annotations))
|
|
210
|
-
.join("");
|
|
211
|
-
case "rich_text":
|
|
212
|
-
return prop[prop.type]
|
|
213
|
-
.map((v) => this.n2mClient.annotatePlainText(v.plain_text, v.annotations))
|
|
214
|
-
.join("");
|
|
215
|
-
case "people":
|
|
216
|
-
return `[${prop[prop.type]
|
|
217
|
-
.map((v) => `["${v.object}", "${v.id}"]`)
|
|
218
|
-
.join(", ")}]`;
|
|
219
|
-
case "unique_id":
|
|
220
|
-
return `${prop[prop.type].prefix || ""}${prop[prop.type].number}`;
|
|
221
|
-
case "relation":
|
|
222
|
-
return `[${prop[prop.type].map((v) => `"${v.id}"`).join(", ")}]`;
|
|
223
|
-
default:
|
|
224
|
-
return `Unsupported type: ${prop.type}`;
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
/**
|
|
228
|
-
* Parses the properties of a Notion page and returns them as key-value
|
|
229
|
-
* pairs.
|
|
230
|
-
* @param page The Notion page to parse.
|
|
231
|
-
* @returns An object containing the parsed properties as key-value pairs.
|
|
232
|
-
*/
|
|
233
|
-
parsePageProperties(page) {
|
|
234
|
-
return Object.entries(page.properties).reduce((accum, [propName, prop]) => {
|
|
235
|
-
const value = this.getPropValue(prop);
|
|
236
|
-
const props = { ...accum, [propName]: value };
|
|
237
|
-
return prop.type === "title" ? { ...props, _title: value } : props;
|
|
238
|
-
}, {});
|
|
239
|
-
}
|
|
240
|
-
/**
|
|
241
|
-
* Parses the details of a Notion page and returns them as an object.
|
|
242
|
-
* @param page The Notion page to parse.
|
|
243
|
-
* @returns An object containing the parsed details of the page.
|
|
244
|
-
*/
|
|
245
|
-
parsePageDetails(page) {
|
|
246
|
-
const { id, ...rest } = page;
|
|
247
|
-
return {
|
|
248
|
-
...rest,
|
|
249
|
-
notionId: id,
|
|
250
|
-
properties: this.parsePageProperties(page),
|
|
251
|
-
};
|
|
252
|
-
}
|
|
253
|
-
/**
|
|
254
|
-
* Loads a Notion block and returns it as an MdBlock object.
|
|
255
|
-
* @param block The Notion block to load.
|
|
256
|
-
* @returns A Promise that resolves to an MdBlock object.
|
|
257
|
-
*/
|
|
258
|
-
async loadBlock(block) {
|
|
259
|
-
const mdBlock = {
|
|
260
|
-
type: block.type,
|
|
261
|
-
blockId: block.id,
|
|
262
|
-
parent: await this.caller.call(() => this.n2mClient.blockToMarkdown(block)),
|
|
263
|
-
children: [],
|
|
264
|
-
};
|
|
265
|
-
if (block.has_children) {
|
|
266
|
-
const block_id = block.type === "synced_block" &&
|
|
267
|
-
block.synced_block?.synced_from?.block_id
|
|
268
|
-
? block.synced_block.synced_from.block_id
|
|
269
|
-
: block.id;
|
|
270
|
-
const childBlocks = await this.loadBlocks(await this.caller.call(() => (0, notion_js_1.getBlockChildren)(this.notionClient, block_id, null)));
|
|
271
|
-
mdBlock.children = childBlocks;
|
|
272
|
-
}
|
|
273
|
-
return mdBlock;
|
|
274
|
-
}
|
|
275
|
-
/**
|
|
276
|
-
* Loads Notion blocks and their children recursively.
|
|
277
|
-
* @param blocksResponse The response from the Notion API containing the blocks to load.
|
|
278
|
-
* @returns A Promise that resolves to an array containing the loaded MdBlocks.
|
|
279
|
-
*/
|
|
280
|
-
async loadBlocks(blocksResponse) {
|
|
281
|
-
const blocks = blocksResponse.filter(client_1.isFullBlock);
|
|
282
|
-
// Add child pages to queue
|
|
283
|
-
const childPages = blocks
|
|
284
|
-
.filter((block) => block.type.includes("child_page"))
|
|
285
|
-
.map((block) => block.id);
|
|
286
|
-
if (childPages.length > 0)
|
|
287
|
-
this.addToQueue(...childPages);
|
|
288
|
-
// Add child database pages to queue
|
|
289
|
-
const childDatabases = blocks
|
|
290
|
-
.filter((block) => block.type.includes("child_database"))
|
|
291
|
-
.map((block) => this.caller.call(() => this.loadDatabase(block.id)));
|
|
292
|
-
// Load this block and child blocks
|
|
293
|
-
const loadingMdBlocks = blocks
|
|
294
|
-
.filter((block) => !["child_page", "child_database"].includes(block.type))
|
|
295
|
-
.map((block) => this.loadBlock(block));
|
|
296
|
-
const [mdBlocks] = await Promise.all([
|
|
297
|
-
Promise.all(loadingMdBlocks),
|
|
298
|
-
Promise.all(childDatabases),
|
|
299
|
-
]);
|
|
300
|
-
return mdBlocks;
|
|
301
|
-
}
|
|
302
|
-
/**
|
|
303
|
-
* Loads a Notion page and its child documents, then adds it to the completed documents array.
|
|
304
|
-
* @param page The Notion page or page ID to load.
|
|
305
|
-
*/
|
|
306
|
-
async loadPage(page) {
|
|
307
|
-
// Check page is a page ID or a PageObjectResponse
|
|
308
|
-
const [pageData, pageId] = typeof page === "string"
|
|
309
|
-
? [
|
|
310
|
-
this.caller.call(() => this.notionClient.pages.retrieve({ page_id: page })),
|
|
311
|
-
page,
|
|
312
|
-
]
|
|
313
|
-
: [page, page.id];
|
|
314
|
-
const [pageDetails, pageBlocks] = await Promise.all([
|
|
315
|
-
pageData,
|
|
316
|
-
this.caller.call(() => (0, notion_js_1.getBlockChildren)(this.notionClient, pageId, null)),
|
|
317
|
-
]);
|
|
318
|
-
if (!(0, client_1.isFullPage)(pageDetails)) {
|
|
319
|
-
this.pageCompleted.push(pageId);
|
|
320
|
-
return;
|
|
321
|
-
}
|
|
322
|
-
const mdBlocks = await this.loadBlocks(pageBlocks);
|
|
323
|
-
const mdStringObject = this.n2mClient.toMarkdownString(mdBlocks);
|
|
324
|
-
let pageContent = mdStringObject.parent;
|
|
325
|
-
const metadata = this.parsePageDetails(pageDetails);
|
|
326
|
-
if (this.propertiesAsHeader) {
|
|
327
|
-
pageContent =
|
|
328
|
-
`---\n` +
|
|
329
|
-
`${js_yaml_1.default.dump(metadata.properties)}` +
|
|
330
|
-
`---\n\n` +
|
|
331
|
-
`${pageContent ?? ""}`;
|
|
332
|
-
}
|
|
333
|
-
if (!pageContent) {
|
|
334
|
-
this.pageCompleted.push(pageId);
|
|
335
|
-
return;
|
|
336
|
-
}
|
|
337
|
-
const pageDocument = new documents_1.Document({ pageContent, metadata });
|
|
338
|
-
this.documents.push(pageDocument);
|
|
339
|
-
this.pageCompleted.push(pageId);
|
|
340
|
-
this.onDocumentLoaded(this.documents.length, this.pageQueueTotal, this.getTitle(pageDetails) || undefined, this.rootTitle);
|
|
341
|
-
}
|
|
342
|
-
/**
|
|
343
|
-
* Loads a Notion database and adds it's pages to the queue.
|
|
344
|
-
* @param id The ID of the Notion database to load.
|
|
345
|
-
*/
|
|
346
|
-
async loadDatabase(id) {
|
|
347
|
-
try {
|
|
348
|
-
for await (const page of (0, client_1.iteratePaginatedAPI)(this.notionClient.databases.query, {
|
|
349
|
-
database_id: id,
|
|
350
|
-
page_size: 50,
|
|
351
|
-
})) {
|
|
352
|
-
this.addToQueue(page.id);
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
catch (e) {
|
|
356
|
-
console.log(e);
|
|
357
|
-
// TODO: Catch and report api request errors
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
/**
|
|
361
|
-
* Loads the documents from Notion based on the specified options.
|
|
362
|
-
* @returns A Promise that resolves to an array of Documents.
|
|
363
|
-
*/
|
|
364
|
-
async load() {
|
|
365
|
-
const resPagePromise = this.notionClient.pages
|
|
366
|
-
.retrieve({ page_id: this.id })
|
|
367
|
-
.then((res) => {
|
|
368
|
-
this.addToQueue(this.id);
|
|
369
|
-
return res;
|
|
370
|
-
})
|
|
371
|
-
.catch((error) => error);
|
|
372
|
-
const resDatabasePromise = this.notionClient.databases
|
|
373
|
-
.retrieve({ database_id: this.id })
|
|
374
|
-
.then(async (res) => {
|
|
375
|
-
await this.loadDatabase(this.id);
|
|
376
|
-
return res;
|
|
377
|
-
})
|
|
378
|
-
.catch((error) => error);
|
|
379
|
-
const [resPage, resDatabase] = await Promise.all([
|
|
380
|
-
resPagePromise,
|
|
381
|
-
resDatabasePromise,
|
|
382
|
-
]);
|
|
383
|
-
// Check if both resPage and resDatabase resulted in error responses
|
|
384
|
-
const errors = [resPage, resDatabase].filter(exports.isErrorResponse);
|
|
385
|
-
if (errors.length === 2) {
|
|
386
|
-
if (errors.every((e) => e.code === client_1.APIErrorCode.ObjectNotFound)) {
|
|
387
|
-
throw new AggregateError([
|
|
388
|
-
Error(`Could not find object with ID: ${this.id}. Make sure the relevant pages and databases are shared with your integration.`),
|
|
389
|
-
...errors,
|
|
390
|
-
]);
|
|
391
|
-
}
|
|
392
|
-
throw new AggregateError(errors);
|
|
393
|
-
}
|
|
394
|
-
this.rootTitle =
|
|
395
|
-
this.getTitle(resPage) || this.getTitle(resDatabase) || this.id;
|
|
396
|
-
let pageId = this.pageQueue.shift();
|
|
397
|
-
while (pageId) {
|
|
398
|
-
await this.loadPage(pageId);
|
|
399
|
-
pageId = this.pageQueue.shift();
|
|
400
|
-
}
|
|
401
|
-
return this.documents;
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
exports.NotionAPILoader = NotionAPILoader;
|