langchain 0.2.18 → 0.3.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/openai_functions/index.cjs +2 -2
- package/dist/agents/openai_functions/index.js +2 -2
- package/dist/chains/combine_documents/stuff.cjs +2 -2
- package/dist/chains/combine_documents/stuff.js +2 -2
- package/dist/chains/openai_functions/openapi.cjs +3 -1
- package/dist/chains/openai_functions/openapi.js +3 -1
- package/dist/load/import_constants.cjs +2 -39
- package/dist/load/import_constants.js +2 -39
- package/dist/load/import_map.cjs +2 -3
- package/dist/load/import_map.d.ts +0 -1
- package/dist/load/import_map.js +0 -1
- package/dist/smith/config.d.ts +1 -5
- package/package.json +31 -854
- package/dist/document_loaders/fs/chatgpt.cjs +0 -90
- package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
- package/dist/document_loaders/fs/chatgpt.js +0 -86
- package/dist/document_loaders/fs/csv.cjs +0 -73
- package/dist/document_loaders/fs/csv.d.ts +0 -65
- package/dist/document_loaders/fs/csv.js +0 -69
- package/dist/document_loaders/fs/docx.cjs +0 -58
- package/dist/document_loaders/fs/docx.d.ts +0 -25
- package/dist/document_loaders/fs/docx.js +0 -54
- package/dist/document_loaders/fs/epub.cjs +0 -103
- package/dist/document_loaders/fs/epub.d.ts +0 -33
- package/dist/document_loaders/fs/epub.js +0 -99
- package/dist/document_loaders/fs/notion.cjs +0 -26
- package/dist/document_loaders/fs/notion.d.ts +0 -12
- package/dist/document_loaders/fs/notion.js +0 -22
- package/dist/document_loaders/fs/obsidian.cjs +0 -247
- package/dist/document_loaders/fs/obsidian.d.ts +0 -28
- package/dist/document_loaders/fs/obsidian.js +0 -240
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
- package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
- package/dist/document_loaders/fs/pdf.cjs +0 -148
- package/dist/document_loaders/fs/pdf.d.ts +0 -49
- package/dist/document_loaders/fs/pdf.js +0 -144
- package/dist/document_loaders/fs/pptx.cjs +0 -46
- package/dist/document_loaders/fs/pptx.d.ts +0 -25
- package/dist/document_loaders/fs/pptx.js +0 -42
- package/dist/document_loaders/fs/srt.cjs +0 -57
- package/dist/document_loaders/fs/srt.d.ts +0 -32
- package/dist/document_loaders/fs/srt.js +0 -50
- package/dist/document_loaders/fs/unstructured.cjs +0 -338
- package/dist/document_loaders/fs/unstructured.d.ts +0 -125
- package/dist/document_loaders/fs/unstructured.js +0 -333
- package/dist/document_loaders/web/apify_dataset.cjs +0 -130
- package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
- package/dist/document_loaders/web/apify_dataset.js +0 -126
- package/dist/document_loaders/web/assemblyai.cjs +0 -200
- package/dist/document_loaders/web/assemblyai.d.ts +0 -95
- package/dist/document_loaders/web/assemblyai.js +0 -193
- package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
- package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
- package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
- package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
- package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
- package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
- package/dist/document_loaders/web/browserbase.cjs +0 -93
- package/dist/document_loaders/web/browserbase.d.ts +0 -48
- package/dist/document_loaders/web/browserbase.js +0 -86
- package/dist/document_loaders/web/cheerio.cjs +0 -118
- package/dist/document_loaders/web/cheerio.d.ts +0 -77
- package/dist/document_loaders/web/cheerio.js +0 -114
- package/dist/document_loaders/web/college_confidential.cjs +0 -41
- package/dist/document_loaders/web/college_confidential.d.ts +0 -25
- package/dist/document_loaders/web/college_confidential.js +0 -37
- package/dist/document_loaders/web/confluence.cjs +0 -190
- package/dist/document_loaders/web/confluence.d.ts +0 -114
- package/dist/document_loaders/web/confluence.js +0 -186
- package/dist/document_loaders/web/couchbase.cjs +0 -95
- package/dist/document_loaders/web/couchbase.d.ts +0 -32
- package/dist/document_loaders/web/couchbase.js +0 -91
- package/dist/document_loaders/web/figma.cjs +0 -102
- package/dist/document_loaders/web/figma.d.ts +0 -82
- package/dist/document_loaders/web/figma.js +0 -98
- package/dist/document_loaders/web/firecrawl.cjs +0 -95
- package/dist/document_loaders/web/firecrawl.d.ts +0 -50
- package/dist/document_loaders/web/firecrawl.js +0 -88
- package/dist/document_loaders/web/gitbook.cjs +0 -110
- package/dist/document_loaders/web/gitbook.d.ts +0 -55
- package/dist/document_loaders/web/gitbook.js +0 -106
- package/dist/document_loaders/web/github.cjs +0 -615
- package/dist/document_loaders/web/github.d.ts +0 -203
- package/dist/document_loaders/web/github.js +0 -608
- package/dist/document_loaders/web/hn.cjs +0 -90
- package/dist/document_loaders/web/hn.d.ts +0 -42
- package/dist/document_loaders/web/hn.js +0 -86
- package/dist/document_loaders/web/imsdb.cjs +0 -44
- package/dist/document_loaders/web/imsdb.d.ts +0 -23
- package/dist/document_loaders/web/imsdb.js +0 -40
- package/dist/document_loaders/web/notionapi.cjs +0 -404
- package/dist/document_loaders/web/notionapi.d.ts +0 -133
- package/dist/document_loaders/web/notionapi.js +0 -392
- package/dist/document_loaders/web/notiondb.cjs +0 -199
- package/dist/document_loaders/web/notiondb.d.ts +0 -56
- package/dist/document_loaders/web/notiondb.js +0 -195
- package/dist/document_loaders/web/pdf.cjs +0 -140
- package/dist/document_loaders/web/pdf.d.ts +0 -35
- package/dist/document_loaders/web/pdf.js +0 -136
- package/dist/document_loaders/web/playwright.cjs +0 -89
- package/dist/document_loaders/web/playwright.d.ts +0 -58
- package/dist/document_loaders/web/playwright.js +0 -85
- package/dist/document_loaders/web/puppeteer.cjs +0 -139
- package/dist/document_loaders/web/puppeteer.d.ts +0 -82
- package/dist/document_loaders/web/puppeteer.js +0 -135
- package/dist/document_loaders/web/recursive_url.cjs +0 -198
- package/dist/document_loaders/web/recursive_url.d.ts +0 -33
- package/dist/document_loaders/web/recursive_url.js +0 -194
- package/dist/document_loaders/web/s3.cjs +0 -164
- package/dist/document_loaders/web/s3.d.ts +0 -78
- package/dist/document_loaders/web/s3.js +0 -137
- package/dist/document_loaders/web/searchapi.cjs +0 -150
- package/dist/document_loaders/web/searchapi.d.ts +0 -76
- package/dist/document_loaders/web/searchapi.js +0 -146
- package/dist/document_loaders/web/serpapi.cjs +0 -127
- package/dist/document_loaders/web/serpapi.d.ts +0 -62
- package/dist/document_loaders/web/serpapi.js +0 -123
- package/dist/document_loaders/web/sitemap.cjs +0 -118
- package/dist/document_loaders/web/sitemap.d.ts +0 -41
- package/dist/document_loaders/web/sitemap.js +0 -114
- package/dist/document_loaders/web/sonix_audio.cjs +0 -68
- package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
- package/dist/document_loaders/web/sonix_audio.js +0 -64
- package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
- package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
- package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
- package/dist/document_loaders/web/youtube.cjs +0 -116
- package/dist/document_loaders/web/youtube.d.ts +0 -55
- package/dist/document_loaders/web/youtube.js +0 -112
- package/dist/experimental/tools/pyinterpreter.cjs +0 -248
- package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
- package/dist/experimental/tools/pyinterpreter.js +0 -244
- package/dist/retrievers/self_query/chroma.cjs +0 -48
- package/dist/retrievers/self_query/chroma.d.ts +0 -26
- package/dist/retrievers/self_query/chroma.js +0 -44
- package/dist/retrievers/self_query/pinecone.cjs +0 -47
- package/dist/retrievers/self_query/pinecone.d.ts +0 -26
- package/dist/retrievers/self_query/pinecone.js +0 -43
- package/dist/retrievers/self_query/supabase.cjs +0 -278
- package/dist/retrievers/self_query/supabase.d.ts +0 -109
- package/dist/retrievers/self_query/supabase.js +0 -274
- package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
- package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
- package/dist/retrievers/self_query/supabase_utils.js +0 -259
- package/dist/retrievers/self_query/vectara.cjs +0 -143
- package/dist/retrievers/self_query/vectara.d.ts +0 -42
- package/dist/retrievers/self_query/vectara.js +0 -139
- package/dist/retrievers/self_query/weaviate.cjs +0 -201
- package/dist/retrievers/self_query/weaviate.d.ts +0 -99
- package/dist/retrievers/self_query/weaviate.js +0 -197
- package/dist/types/assemblyai-types.cjs +0 -2
- package/dist/types/assemblyai-types.d.ts +0 -4
- package/dist/types/assemblyai-types.js +0 -1
- package/document_loaders/fs/chatgpt.cjs +0 -1
- package/document_loaders/fs/chatgpt.d.cts +0 -1
- package/document_loaders/fs/chatgpt.d.ts +0 -1
- package/document_loaders/fs/chatgpt.js +0 -1
- package/document_loaders/fs/csv.cjs +0 -1
- package/document_loaders/fs/csv.d.cts +0 -1
- package/document_loaders/fs/csv.d.ts +0 -1
- package/document_loaders/fs/csv.js +0 -1
- package/document_loaders/fs/docx.cjs +0 -1
- package/document_loaders/fs/docx.d.cts +0 -1
- package/document_loaders/fs/docx.d.ts +0 -1
- package/document_loaders/fs/docx.js +0 -1
- package/document_loaders/fs/epub.cjs +0 -1
- package/document_loaders/fs/epub.d.cts +0 -1
- package/document_loaders/fs/epub.d.ts +0 -1
- package/document_loaders/fs/epub.js +0 -1
- package/document_loaders/fs/notion.cjs +0 -1
- package/document_loaders/fs/notion.d.cts +0 -1
- package/document_loaders/fs/notion.d.ts +0 -1
- package/document_loaders/fs/notion.js +0 -1
- package/document_loaders/fs/obsidian.cjs +0 -1
- package/document_loaders/fs/obsidian.d.cts +0 -1
- package/document_loaders/fs/obsidian.d.ts +0 -1
- package/document_loaders/fs/obsidian.js +0 -1
- package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
- package/document_loaders/fs/openai_whisper_audio.js +0 -1
- package/document_loaders/fs/pdf.cjs +0 -1
- package/document_loaders/fs/pdf.d.cts +0 -1
- package/document_loaders/fs/pdf.d.ts +0 -1
- package/document_loaders/fs/pdf.js +0 -1
- package/document_loaders/fs/pptx.cjs +0 -1
- package/document_loaders/fs/pptx.d.cts +0 -1
- package/document_loaders/fs/pptx.d.ts +0 -1
- package/document_loaders/fs/pptx.js +0 -1
- package/document_loaders/fs/srt.cjs +0 -1
- package/document_loaders/fs/srt.d.cts +0 -1
- package/document_loaders/fs/srt.d.ts +0 -1
- package/document_loaders/fs/srt.js +0 -1
- package/document_loaders/fs/unstructured.cjs +0 -1
- package/document_loaders/fs/unstructured.d.cts +0 -1
- package/document_loaders/fs/unstructured.d.ts +0 -1
- package/document_loaders/fs/unstructured.js +0 -1
- package/document_loaders/web/apify_dataset.cjs +0 -1
- package/document_loaders/web/apify_dataset.d.cts +0 -1
- package/document_loaders/web/apify_dataset.d.ts +0 -1
- package/document_loaders/web/apify_dataset.js +0 -1
- package/document_loaders/web/assemblyai.cjs +0 -1
- package/document_loaders/web/assemblyai.d.cts +0 -1
- package/document_loaders/web/assemblyai.d.ts +0 -1
- package/document_loaders/web/assemblyai.js +0 -1
- package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_container.js +0 -1
- package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_file.js +0 -1
- package/document_loaders/web/browserbase.cjs +0 -1
- package/document_loaders/web/browserbase.d.cts +0 -1
- package/document_loaders/web/browserbase.d.ts +0 -1
- package/document_loaders/web/browserbase.js +0 -1
- package/document_loaders/web/cheerio.cjs +0 -1
- package/document_loaders/web/cheerio.d.cts +0 -1
- package/document_loaders/web/cheerio.d.ts +0 -1
- package/document_loaders/web/cheerio.js +0 -1
- package/document_loaders/web/college_confidential.cjs +0 -1
- package/document_loaders/web/college_confidential.d.cts +0 -1
- package/document_loaders/web/college_confidential.d.ts +0 -1
- package/document_loaders/web/college_confidential.js +0 -1
- package/document_loaders/web/confluence.cjs +0 -1
- package/document_loaders/web/confluence.d.cts +0 -1
- package/document_loaders/web/confluence.d.ts +0 -1
- package/document_loaders/web/confluence.js +0 -1
- package/document_loaders/web/couchbase.cjs +0 -1
- package/document_loaders/web/couchbase.d.cts +0 -1
- package/document_loaders/web/couchbase.d.ts +0 -1
- package/document_loaders/web/couchbase.js +0 -1
- package/document_loaders/web/figma.cjs +0 -1
- package/document_loaders/web/figma.d.cts +0 -1
- package/document_loaders/web/figma.d.ts +0 -1
- package/document_loaders/web/figma.js +0 -1
- package/document_loaders/web/firecrawl.cjs +0 -1
- package/document_loaders/web/firecrawl.d.cts +0 -1
- package/document_loaders/web/firecrawl.d.ts +0 -1
- package/document_loaders/web/firecrawl.js +0 -1
- package/document_loaders/web/gitbook.cjs +0 -1
- package/document_loaders/web/gitbook.d.cts +0 -1
- package/document_loaders/web/gitbook.d.ts +0 -1
- package/document_loaders/web/gitbook.js +0 -1
- package/document_loaders/web/github.cjs +0 -1
- package/document_loaders/web/github.d.cts +0 -1
- package/document_loaders/web/github.d.ts +0 -1
- package/document_loaders/web/github.js +0 -1
- package/document_loaders/web/hn.cjs +0 -1
- package/document_loaders/web/hn.d.cts +0 -1
- package/document_loaders/web/hn.d.ts +0 -1
- package/document_loaders/web/hn.js +0 -1
- package/document_loaders/web/imsdb.cjs +0 -1
- package/document_loaders/web/imsdb.d.cts +0 -1
- package/document_loaders/web/imsdb.d.ts +0 -1
- package/document_loaders/web/imsdb.js +0 -1
- package/document_loaders/web/notionapi.cjs +0 -1
- package/document_loaders/web/notionapi.d.cts +0 -1
- package/document_loaders/web/notionapi.d.ts +0 -1
- package/document_loaders/web/notionapi.js +0 -1
- package/document_loaders/web/notiondb.cjs +0 -1
- package/document_loaders/web/notiondb.d.cts +0 -1
- package/document_loaders/web/notiondb.d.ts +0 -1
- package/document_loaders/web/notiondb.js +0 -1
- package/document_loaders/web/pdf.cjs +0 -1
- package/document_loaders/web/pdf.d.cts +0 -1
- package/document_loaders/web/pdf.d.ts +0 -1
- package/document_loaders/web/pdf.js +0 -1
- package/document_loaders/web/playwright.cjs +0 -1
- package/document_loaders/web/playwright.d.cts +0 -1
- package/document_loaders/web/playwright.d.ts +0 -1
- package/document_loaders/web/playwright.js +0 -1
- package/document_loaders/web/puppeteer.cjs +0 -1
- package/document_loaders/web/puppeteer.d.cts +0 -1
- package/document_loaders/web/puppeteer.d.ts +0 -1
- package/document_loaders/web/puppeteer.js +0 -1
- package/document_loaders/web/recursive_url.cjs +0 -1
- package/document_loaders/web/recursive_url.d.cts +0 -1
- package/document_loaders/web/recursive_url.d.ts +0 -1
- package/document_loaders/web/recursive_url.js +0 -1
- package/document_loaders/web/s3.cjs +0 -1
- package/document_loaders/web/s3.d.cts +0 -1
- package/document_loaders/web/s3.d.ts +0 -1
- package/document_loaders/web/s3.js +0 -1
- package/document_loaders/web/searchapi.cjs +0 -1
- package/document_loaders/web/searchapi.d.cts +0 -1
- package/document_loaders/web/searchapi.d.ts +0 -1
- package/document_loaders/web/searchapi.js +0 -1
- package/document_loaders/web/serpapi.cjs +0 -1
- package/document_loaders/web/serpapi.d.cts +0 -1
- package/document_loaders/web/serpapi.d.ts +0 -1
- package/document_loaders/web/serpapi.js +0 -1
- package/document_loaders/web/sitemap.cjs +0 -1
- package/document_loaders/web/sitemap.d.cts +0 -1
- package/document_loaders/web/sitemap.d.ts +0 -1
- package/document_loaders/web/sitemap.js +0 -1
- package/document_loaders/web/sonix_audio.cjs +0 -1
- package/document_loaders/web/sonix_audio.d.cts +0 -1
- package/document_loaders/web/sonix_audio.d.ts +0 -1
- package/document_loaders/web/sonix_audio.js +0 -1
- package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.js +0 -1
- package/document_loaders/web/youtube.cjs +0 -1
- package/document_loaders/web/youtube.d.cts +0 -1
- package/document_loaders/web/youtube.d.ts +0 -1
- package/document_loaders/web/youtube.js +0 -1
- package/experimental/tools/pyinterpreter.cjs +0 -1
- package/experimental/tools/pyinterpreter.d.cts +0 -1
- package/experimental/tools/pyinterpreter.d.ts +0 -1
- package/experimental/tools/pyinterpreter.js +0 -1
- package/memory/index.cjs +0 -1
- package/memory/index.d.cts +0 -1
- package/memory/index.d.ts +0 -1
- package/memory/index.js +0 -1
- package/retrievers/self_query/chroma.cjs +0 -1
- package/retrievers/self_query/chroma.d.cts +0 -1
- package/retrievers/self_query/chroma.d.ts +0 -1
- package/retrievers/self_query/chroma.js +0 -1
- package/retrievers/self_query/pinecone.cjs +0 -1
- package/retrievers/self_query/pinecone.d.cts +0 -1
- package/retrievers/self_query/pinecone.d.ts +0 -1
- package/retrievers/self_query/pinecone.js +0 -1
- package/retrievers/self_query/supabase.cjs +0 -1
- package/retrievers/self_query/supabase.d.cts +0 -1
- package/retrievers/self_query/supabase.d.ts +0 -1
- package/retrievers/self_query/supabase.js +0 -1
- package/retrievers/self_query/vectara.cjs +0 -1
- package/retrievers/self_query/vectara.d.cts +0 -1
- package/retrievers/self_query/vectara.d.ts +0 -1
- package/retrievers/self_query/vectara.js +0 -1
- package/retrievers/self_query/weaviate.cjs +0 -1
- package/retrievers/self_query/weaviate.d.cts +0 -1
- package/retrievers/self_query/weaviate.d.ts +0 -1
- package/retrievers/self_query/weaviate.js +0 -1
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
import type { CheerioAPI, CheerioOptions, load as LoadT, SelectorType } from "cheerio";
|
|
2
|
-
import { Document } from "@langchain/core/documents";
|
|
3
|
-
import { AsyncCaller, AsyncCallerParams } from "@langchain/core/utils/async_caller";
|
|
4
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
5
|
-
import type { DocumentLoader } from "../base.js";
|
|
6
|
-
/**
|
|
7
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/cheerio" instead. This entrypoint will be removed in 0.3.0.
|
|
8
|
-
*
|
|
9
|
-
* Represents the parameters for configuring the CheerioWebBaseLoader. It
|
|
10
|
-
* extends the AsyncCallerParams interface and adds additional parameters
|
|
11
|
-
* specific to web-based loaders.
|
|
12
|
-
*/
|
|
13
|
-
export interface WebBaseLoaderParams extends AsyncCallerParams {
|
|
14
|
-
/**
|
|
15
|
-
* The timeout in milliseconds for the fetch request. Defaults to 10s.
|
|
16
|
-
*/
|
|
17
|
-
timeout?: number;
|
|
18
|
-
/**
|
|
19
|
-
* The selector to use to extract the text from the document. Defaults to
|
|
20
|
-
* "body".
|
|
21
|
-
*/
|
|
22
|
-
selector?: SelectorType;
|
|
23
|
-
/**
|
|
24
|
-
* The text decoder to use to decode the response. Defaults to UTF-8.
|
|
25
|
-
*/
|
|
26
|
-
textDecoder?: TextDecoder;
|
|
27
|
-
}
|
|
28
|
-
/**
|
|
29
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/cheerio" instead. This entrypoint will be removed in 0.3.0.
|
|
30
|
-
*
|
|
31
|
-
* A class that extends the BaseDocumentLoader and implements the
|
|
32
|
-
* DocumentLoader interface. It represents a document loader for loading
|
|
33
|
-
* web-based documents using Cheerio.
|
|
34
|
-
* @example
|
|
35
|
-
* ```typescript
|
|
36
|
-
* const loader = new CheerioWebBaseLoader("https:exampleurl.com");
|
|
37
|
-
* const docs = await loader.load();
|
|
38
|
-
* console.log({ docs });
|
|
39
|
-
* ```
|
|
40
|
-
*/
|
|
41
|
-
export declare class CheerioWebBaseLoader extends BaseDocumentLoader implements DocumentLoader {
|
|
42
|
-
webPath: string;
|
|
43
|
-
timeout: number;
|
|
44
|
-
caller: AsyncCaller;
|
|
45
|
-
selector?: SelectorType;
|
|
46
|
-
textDecoder?: TextDecoder;
|
|
47
|
-
constructor(webPath: string, fields?: WebBaseLoaderParams);
|
|
48
|
-
/**
|
|
49
|
-
* Fetches web documents from the given array of URLs and loads them using Cheerio.
|
|
50
|
-
* It returns an array of CheerioAPI instances.
|
|
51
|
-
* @param urls An array of URLs to fetch and load.
|
|
52
|
-
* @returns A Promise that resolves to an array of CheerioAPI instances.
|
|
53
|
-
*/
|
|
54
|
-
static scrapeAll(urls: string[], caller: AsyncCaller, timeout: number | undefined, textDecoder?: TextDecoder, options?: CheerioOptions): Promise<CheerioAPI[]>;
|
|
55
|
-
static _scrape(url: string, caller: AsyncCaller, timeout: number | undefined, textDecoder?: TextDecoder, options?: CheerioOptions): Promise<CheerioAPI>;
|
|
56
|
-
/**
|
|
57
|
-
* Fetches the web document from the webPath and loads it using Cheerio.
|
|
58
|
-
* It returns a CheerioAPI instance.
|
|
59
|
-
* @returns A Promise that resolves to a CheerioAPI instance.
|
|
60
|
-
*/
|
|
61
|
-
scrape(): Promise<CheerioAPI>;
|
|
62
|
-
/**
|
|
63
|
-
* Extracts the text content from the loaded document using the selector
|
|
64
|
-
* and creates a Document instance with the extracted text and metadata.
|
|
65
|
-
* It returns an array of Document instances.
|
|
66
|
-
* @returns A Promise that resolves to an array of Document instances.
|
|
67
|
-
*/
|
|
68
|
-
load(): Promise<Document[]>;
|
|
69
|
-
/**
|
|
70
|
-
* A static method that dynamically imports the Cheerio library and
|
|
71
|
-
* returns the load function. If the import fails, it throws an error.
|
|
72
|
-
* @returns A Promise that resolves to an object containing the load function from the Cheerio library.
|
|
73
|
-
*/
|
|
74
|
-
static imports(): Promise<{
|
|
75
|
-
load: typeof LoadT;
|
|
76
|
-
}>;
|
|
77
|
-
}
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { AsyncCaller, } from "@langchain/core/utils/async_caller";
|
|
3
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
4
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
5
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
6
|
-
oldEntrypointName: "document_loaders/web/cheerio",
|
|
7
|
-
newPackageName: "@langchain/community",
|
|
8
|
-
});
|
|
9
|
-
/**
|
|
10
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/cheerio" instead. This entrypoint will be removed in 0.3.0.
|
|
11
|
-
*
|
|
12
|
-
* A class that extends the BaseDocumentLoader and implements the
|
|
13
|
-
* DocumentLoader interface. It represents a document loader for loading
|
|
14
|
-
* web-based documents using Cheerio.
|
|
15
|
-
* @example
|
|
16
|
-
* ```typescript
|
|
17
|
-
* const loader = new CheerioWebBaseLoader("https:exampleurl.com");
|
|
18
|
-
* const docs = await loader.load();
|
|
19
|
-
* console.log({ docs });
|
|
20
|
-
* ```
|
|
21
|
-
*/
|
|
22
|
-
export class CheerioWebBaseLoader extends BaseDocumentLoader {
|
|
23
|
-
constructor(webPath, fields) {
|
|
24
|
-
super();
|
|
25
|
-
Object.defineProperty(this, "webPath", {
|
|
26
|
-
enumerable: true,
|
|
27
|
-
configurable: true,
|
|
28
|
-
writable: true,
|
|
29
|
-
value: webPath
|
|
30
|
-
});
|
|
31
|
-
Object.defineProperty(this, "timeout", {
|
|
32
|
-
enumerable: true,
|
|
33
|
-
configurable: true,
|
|
34
|
-
writable: true,
|
|
35
|
-
value: void 0
|
|
36
|
-
});
|
|
37
|
-
Object.defineProperty(this, "caller", {
|
|
38
|
-
enumerable: true,
|
|
39
|
-
configurable: true,
|
|
40
|
-
writable: true,
|
|
41
|
-
value: void 0
|
|
42
|
-
});
|
|
43
|
-
Object.defineProperty(this, "selector", {
|
|
44
|
-
enumerable: true,
|
|
45
|
-
configurable: true,
|
|
46
|
-
writable: true,
|
|
47
|
-
value: void 0
|
|
48
|
-
});
|
|
49
|
-
Object.defineProperty(this, "textDecoder", {
|
|
50
|
-
enumerable: true,
|
|
51
|
-
configurable: true,
|
|
52
|
-
writable: true,
|
|
53
|
-
value: void 0
|
|
54
|
-
});
|
|
55
|
-
const { timeout, selector, textDecoder, ...rest } = fields ?? {};
|
|
56
|
-
this.timeout = timeout ?? 10000;
|
|
57
|
-
this.caller = new AsyncCaller(rest);
|
|
58
|
-
this.selector = selector ?? "body";
|
|
59
|
-
this.textDecoder = textDecoder;
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* Fetches web documents from the given array of URLs and loads them using Cheerio.
|
|
63
|
-
* It returns an array of CheerioAPI instances.
|
|
64
|
-
* @param urls An array of URLs to fetch and load.
|
|
65
|
-
* @returns A Promise that resolves to an array of CheerioAPI instances.
|
|
66
|
-
*/
|
|
67
|
-
static async scrapeAll(urls, caller, timeout, textDecoder, options) {
|
|
68
|
-
return Promise.all(urls.map((url) => CheerioWebBaseLoader._scrape(url, caller, timeout, textDecoder, options)));
|
|
69
|
-
}
|
|
70
|
-
static async _scrape(url, caller, timeout, textDecoder, options) {
|
|
71
|
-
const { load } = await CheerioWebBaseLoader.imports();
|
|
72
|
-
const response = await caller.call(fetch, url, {
|
|
73
|
-
signal: timeout ? AbortSignal.timeout(timeout) : undefined,
|
|
74
|
-
});
|
|
75
|
-
const html = textDecoder?.decode(await response.arrayBuffer()) ??
|
|
76
|
-
(await response.text());
|
|
77
|
-
return load(html, options);
|
|
78
|
-
}
|
|
79
|
-
/**
|
|
80
|
-
* Fetches the web document from the webPath and loads it using Cheerio.
|
|
81
|
-
* It returns a CheerioAPI instance.
|
|
82
|
-
* @returns A Promise that resolves to a CheerioAPI instance.
|
|
83
|
-
*/
|
|
84
|
-
async scrape() {
|
|
85
|
-
return CheerioWebBaseLoader._scrape(this.webPath, this.caller, this.timeout, this.textDecoder);
|
|
86
|
-
}
|
|
87
|
-
/**
|
|
88
|
-
* Extracts the text content from the loaded document using the selector
|
|
89
|
-
* and creates a Document instance with the extracted text and metadata.
|
|
90
|
-
* It returns an array of Document instances.
|
|
91
|
-
* @returns A Promise that resolves to an array of Document instances.
|
|
92
|
-
*/
|
|
93
|
-
async load() {
|
|
94
|
-
const $ = await this.scrape();
|
|
95
|
-
const text = $(this.selector).text();
|
|
96
|
-
const metadata = { source: this.webPath };
|
|
97
|
-
return [new Document({ pageContent: text, metadata })];
|
|
98
|
-
}
|
|
99
|
-
/**
|
|
100
|
-
* A static method that dynamically imports the Cheerio library and
|
|
101
|
-
* returns the load function. If the import fails, it throws an error.
|
|
102
|
-
* @returns A Promise that resolves to an object containing the load function from the Cheerio library.
|
|
103
|
-
*/
|
|
104
|
-
static async imports() {
|
|
105
|
-
try {
|
|
106
|
-
const { load } = await import("cheerio");
|
|
107
|
-
return { load };
|
|
108
|
-
}
|
|
109
|
-
catch (e) {
|
|
110
|
-
console.error(e);
|
|
111
|
-
throw new Error("Please install cheerio as a dependency with, e.g. `yarn add cheerio`");
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
}
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.CollegeConfidentialLoader = void 0;
|
|
4
|
-
const documents_1 = require("@langchain/core/documents");
|
|
5
|
-
const cheerio_js_1 = require("./cheerio.cjs");
|
|
6
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
7
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
8
|
-
oldEntrypointName: "document_loaders/web/college_confidential",
|
|
9
|
-
newPackageName: "@langchain/community",
|
|
10
|
-
});
|
|
11
|
-
/**
|
|
12
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/college_confidential" instead. This entrypoint will be removed in 0.3.0.
|
|
13
|
-
*
|
|
14
|
-
* A document loader specifically designed for loading documents from the
|
|
15
|
-
* College Confidential website. It extends the CheerioWebBaseLoader.
|
|
16
|
-
* @example
|
|
17
|
-
* ```typescript
|
|
18
|
-
* const loader = new CollegeConfidentialLoader("https:exampleurl.com");
|
|
19
|
-
* const docs = await loader.load();
|
|
20
|
-
* console.log({ docs });
|
|
21
|
-
* ```
|
|
22
|
-
*/
|
|
23
|
-
class CollegeConfidentialLoader extends cheerio_js_1.CheerioWebBaseLoader {
|
|
24
|
-
constructor(webPath) {
|
|
25
|
-
super(webPath);
|
|
26
|
-
}
|
|
27
|
-
/**
|
|
28
|
-
* Overrides the base load() method to extract the text content from the
|
|
29
|
-
* loaded document using a specific selector for the College Confidential
|
|
30
|
-
* website. It creates a Document instance with the extracted text and
|
|
31
|
-
* metadata, and returns an array containing the Document instance.
|
|
32
|
-
* @returns An array containing a Document instance with the extracted text and metadata from the loaded College Confidential web document.
|
|
33
|
-
*/
|
|
34
|
-
async load() {
|
|
35
|
-
const $ = await this.scrape();
|
|
36
|
-
const text = $("main[class='skin-handler']").text();
|
|
37
|
-
const metadata = { source: this.webPath };
|
|
38
|
-
return [new documents_1.Document({ pageContent: text, metadata })];
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
exports.CollegeConfidentialLoader = CollegeConfidentialLoader;
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { CheerioWebBaseLoader } from "./cheerio.js";
|
|
3
|
-
/**
|
|
4
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/college_confidential" instead. This entrypoint will be removed in 0.3.0.
|
|
5
|
-
*
|
|
6
|
-
* A document loader specifically designed for loading documents from the
|
|
7
|
-
* College Confidential website. It extends the CheerioWebBaseLoader.
|
|
8
|
-
* @example
|
|
9
|
-
* ```typescript
|
|
10
|
-
* const loader = new CollegeConfidentialLoader("https:exampleurl.com");
|
|
11
|
-
* const docs = await loader.load();
|
|
12
|
-
* console.log({ docs });
|
|
13
|
-
* ```
|
|
14
|
-
*/
|
|
15
|
-
export declare class CollegeConfidentialLoader extends CheerioWebBaseLoader {
|
|
16
|
-
constructor(webPath: string);
|
|
17
|
-
/**
|
|
18
|
-
* Overrides the base load() method to extract the text content from the
|
|
19
|
-
* loaded document using a specific selector for the College Confidential
|
|
20
|
-
* website. It creates a Document instance with the extracted text and
|
|
21
|
-
* metadata, and returns an array containing the Document instance.
|
|
22
|
-
* @returns An array containing a Document instance with the extracted text and metadata from the loaded College Confidential web document.
|
|
23
|
-
*/
|
|
24
|
-
load(): Promise<Document[]>;
|
|
25
|
-
}
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { CheerioWebBaseLoader } from "./cheerio.js";
|
|
3
|
-
import { logVersion020MigrationWarning } from "../../util/entrypoint_deprecation.js";
|
|
4
|
-
/* #__PURE__ */ logVersion020MigrationWarning({
|
|
5
|
-
oldEntrypointName: "document_loaders/web/college_confidential",
|
|
6
|
-
newPackageName: "@langchain/community",
|
|
7
|
-
});
|
|
8
|
-
/**
|
|
9
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/college_confidential" instead. This entrypoint will be removed in 0.3.0.
|
|
10
|
-
*
|
|
11
|
-
* A document loader specifically designed for loading documents from the
|
|
12
|
-
* College Confidential website. It extends the CheerioWebBaseLoader.
|
|
13
|
-
* @example
|
|
14
|
-
* ```typescript
|
|
15
|
-
* const loader = new CollegeConfidentialLoader("https:exampleurl.com");
|
|
16
|
-
* const docs = await loader.load();
|
|
17
|
-
* console.log({ docs });
|
|
18
|
-
* ```
|
|
19
|
-
*/
|
|
20
|
-
export class CollegeConfidentialLoader extends CheerioWebBaseLoader {
|
|
21
|
-
constructor(webPath) {
|
|
22
|
-
super(webPath);
|
|
23
|
-
}
|
|
24
|
-
/**
|
|
25
|
-
* Overrides the base load() method to extract the text content from the
|
|
26
|
-
* loaded document using a specific selector for the College Confidential
|
|
27
|
-
* website. It creates a Document instance with the extracted text and
|
|
28
|
-
* metadata, and returns an array containing the Document instance.
|
|
29
|
-
* @returns An array containing a Document instance with the extracted text and metadata from the loaded College Confidential web document.
|
|
30
|
-
*/
|
|
31
|
-
async load() {
|
|
32
|
-
const $ = await this.scrape();
|
|
33
|
-
const text = $("main[class='skin-handler']").text();
|
|
34
|
-
const metadata = { source: this.webPath };
|
|
35
|
-
return [new Document({ pageContent: text, metadata })];
|
|
36
|
-
}
|
|
37
|
-
}
|
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.ConfluencePagesLoader = void 0;
|
|
4
|
-
const html_to_text_1 = require("html-to-text");
|
|
5
|
-
const documents_1 = require("@langchain/core/documents");
|
|
6
|
-
const base_js_1 = require("../base.cjs");
|
|
7
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
8
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
9
|
-
oldEntrypointName: "document_loaders/web/confluence",
|
|
10
|
-
newPackageName: "@langchain/community",
|
|
11
|
-
});
|
|
12
|
-
/**
|
|
13
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/confluence" instead. This entrypoint will be removed in 0.3.0.
|
|
14
|
-
*
|
|
15
|
-
* Class representing a document loader for loading pages from Confluence.
|
|
16
|
-
* @example
|
|
17
|
-
* ```typescript
|
|
18
|
-
* const loader = new ConfluencePagesLoader({
|
|
19
|
-
* baseUrl: "https:
|
|
20
|
-
* spaceKey: "~EXAMPLE362906de5d343d49dcdbae5dEXAMPLE",
|
|
21
|
-
* username: "your-username",
|
|
22
|
-
* accessToken: "your-access-token",
|
|
23
|
-
* });
|
|
24
|
-
* const documents = await loader.load();
|
|
25
|
-
* console.log(documents);
|
|
26
|
-
* ```
|
|
27
|
-
*/
|
|
28
|
-
class ConfluencePagesLoader extends base_js_1.BaseDocumentLoader {
|
|
29
|
-
constructor({ baseUrl, spaceKey, username, accessToken, limit = 25, expand = "body.storage,version", personalAccessToken, }) {
|
|
30
|
-
super();
|
|
31
|
-
Object.defineProperty(this, "baseUrl", {
|
|
32
|
-
enumerable: true,
|
|
33
|
-
configurable: true,
|
|
34
|
-
writable: true,
|
|
35
|
-
value: void 0
|
|
36
|
-
});
|
|
37
|
-
Object.defineProperty(this, "spaceKey", {
|
|
38
|
-
enumerable: true,
|
|
39
|
-
configurable: true,
|
|
40
|
-
writable: true,
|
|
41
|
-
value: void 0
|
|
42
|
-
});
|
|
43
|
-
Object.defineProperty(this, "username", {
|
|
44
|
-
enumerable: true,
|
|
45
|
-
configurable: true,
|
|
46
|
-
writable: true,
|
|
47
|
-
value: void 0
|
|
48
|
-
});
|
|
49
|
-
Object.defineProperty(this, "accessToken", {
|
|
50
|
-
enumerable: true,
|
|
51
|
-
configurable: true,
|
|
52
|
-
writable: true,
|
|
53
|
-
value: void 0
|
|
54
|
-
});
|
|
55
|
-
Object.defineProperty(this, "limit", {
|
|
56
|
-
enumerable: true,
|
|
57
|
-
configurable: true,
|
|
58
|
-
writable: true,
|
|
59
|
-
value: void 0
|
|
60
|
-
});
|
|
61
|
-
/**
|
|
62
|
-
* expand parameter for confluence rest api
|
|
63
|
-
* description can be found at https://developer.atlassian.com/server/confluence/expansions-in-the-rest-api/
|
|
64
|
-
*/
|
|
65
|
-
Object.defineProperty(this, "expand", {
|
|
66
|
-
enumerable: true,
|
|
67
|
-
configurable: true,
|
|
68
|
-
writable: true,
|
|
69
|
-
value: void 0
|
|
70
|
-
});
|
|
71
|
-
Object.defineProperty(this, "personalAccessToken", {
|
|
72
|
-
enumerable: true,
|
|
73
|
-
configurable: true,
|
|
74
|
-
writable: true,
|
|
75
|
-
value: void 0
|
|
76
|
-
});
|
|
77
|
-
this.baseUrl = baseUrl;
|
|
78
|
-
this.spaceKey = spaceKey;
|
|
79
|
-
this.username = username;
|
|
80
|
-
this.accessToken = accessToken;
|
|
81
|
-
this.limit = limit;
|
|
82
|
-
this.expand = expand;
|
|
83
|
-
this.personalAccessToken = personalAccessToken;
|
|
84
|
-
}
|
|
85
|
-
/**
|
|
86
|
-
* Returns the authorization header for the request.
|
|
87
|
-
* @returns The authorization header as a string, or undefined if no credentials were provided.
|
|
88
|
-
*/
|
|
89
|
-
get authorizationHeader() {
|
|
90
|
-
if (this.personalAccessToken) {
|
|
91
|
-
return `Bearer ${this.personalAccessToken}`;
|
|
92
|
-
}
|
|
93
|
-
else if (this.username && this.accessToken) {
|
|
94
|
-
const authToken = Buffer.from(`${this.username}:${this.accessToken}`).toString("base64");
|
|
95
|
-
return `Basic ${authToken}`;
|
|
96
|
-
}
|
|
97
|
-
return undefined;
|
|
98
|
-
}
|
|
99
|
-
/**
|
|
100
|
-
* Fetches all the pages in the specified space and converts each page to
|
|
101
|
-
* a Document instance.
|
|
102
|
-
* @param options the extra options of the load function
|
|
103
|
-
* @param options.limit The limit parameter to overwrite the size to fetch pages.
|
|
104
|
-
* @param options.start The start parameter to set inital offset to fetch pages.
|
|
105
|
-
* @returns Promise resolving to an array of Document instances.
|
|
106
|
-
*/
|
|
107
|
-
async load(options) {
|
|
108
|
-
try {
|
|
109
|
-
const pages = await this.fetchAllPagesInSpace(options?.start, options?.limit);
|
|
110
|
-
return pages.map((page) => this.createDocumentFromPage(page));
|
|
111
|
-
}
|
|
112
|
-
catch (error) {
|
|
113
|
-
console.error("Error:", error);
|
|
114
|
-
return [];
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
/**
|
|
118
|
-
* Fetches data from the Confluence API using the provided URL.
|
|
119
|
-
* @param url The URL to fetch data from.
|
|
120
|
-
* @returns Promise resolving to the JSON response from the API.
|
|
121
|
-
*/
|
|
122
|
-
async fetchConfluenceData(url) {
|
|
123
|
-
try {
|
|
124
|
-
const initialHeaders = {
|
|
125
|
-
"Content-Type": "application/json",
|
|
126
|
-
Accept: "application/json",
|
|
127
|
-
};
|
|
128
|
-
const authHeader = this.authorizationHeader;
|
|
129
|
-
if (authHeader) {
|
|
130
|
-
initialHeaders.Authorization = authHeader;
|
|
131
|
-
}
|
|
132
|
-
const response = await fetch(url, {
|
|
133
|
-
headers: initialHeaders,
|
|
134
|
-
});
|
|
135
|
-
if (!response.ok) {
|
|
136
|
-
throw new Error(`Failed to fetch ${url} from Confluence: ${response.status}`);
|
|
137
|
-
}
|
|
138
|
-
return await response.json();
|
|
139
|
-
}
|
|
140
|
-
catch (error) {
|
|
141
|
-
throw new Error(`Failed to fetch ${url} from Confluence: ${error}`);
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
/**
|
|
145
|
-
* Recursively fetches all the pages in the specified space.
|
|
146
|
-
* @param start The start parameter to paginate through the results.
|
|
147
|
-
* @returns Promise resolving to an array of ConfluencePage objects.
|
|
148
|
-
*/
|
|
149
|
-
async fetchAllPagesInSpace(start = 0, limit = this.limit) {
|
|
150
|
-
const url = `${this.baseUrl}/rest/api/content?spaceKey=${this.spaceKey}&limit=${limit}&start=${start}&expand=${this.expand}`;
|
|
151
|
-
const data = await this.fetchConfluenceData(url);
|
|
152
|
-
if (data.size === 0) {
|
|
153
|
-
return [];
|
|
154
|
-
}
|
|
155
|
-
const nextPageStart = start + data.size;
|
|
156
|
-
const nextPageResults = await this.fetchAllPagesInSpace(nextPageStart, limit);
|
|
157
|
-
return data.results.concat(nextPageResults);
|
|
158
|
-
}
|
|
159
|
-
/**
|
|
160
|
-
* Creates a Document instance from a ConfluencePage object.
|
|
161
|
-
* @param page The ConfluencePage object to convert.
|
|
162
|
-
* @returns A Document instance.
|
|
163
|
-
*/
|
|
164
|
-
createDocumentFromPage(page) {
|
|
165
|
-
// Convert the HTML content to plain text
|
|
166
|
-
const plainTextContent = (0, html_to_text_1.htmlToText)(page.body.storage.value, {
|
|
167
|
-
wordwrap: false,
|
|
168
|
-
preserveNewlines: false,
|
|
169
|
-
});
|
|
170
|
-
// Remove empty lines
|
|
171
|
-
const textWithoutEmptyLines = plainTextContent.replace(/^\s*[\r\n]/gm, "");
|
|
172
|
-
// Generate the URL
|
|
173
|
-
const pageUrl = `${this.baseUrl}/spaces/${this.spaceKey}/pages/${page.id}`;
|
|
174
|
-
// Return a langchain document
|
|
175
|
-
return new documents_1.Document({
|
|
176
|
-
pageContent: textWithoutEmptyLines,
|
|
177
|
-
metadata: {
|
|
178
|
-
id: page.id,
|
|
179
|
-
status: page.status,
|
|
180
|
-
title: page.title,
|
|
181
|
-
type: page.type,
|
|
182
|
-
url: pageUrl,
|
|
183
|
-
version: page.version?.number,
|
|
184
|
-
updated_by: page.version?.by?.displayName,
|
|
185
|
-
updated_at: page.version?.when,
|
|
186
|
-
},
|
|
187
|
-
});
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
exports.ConfluencePagesLoader = ConfluencePagesLoader;
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
import { Document } from "@langchain/core/documents";
|
|
2
|
-
import { BaseDocumentLoader } from "../base.js";
|
|
3
|
-
/**
|
|
4
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/confluence" instead. This entrypoint will be removed in 0.3.0.
|
|
5
|
-
*
|
|
6
|
-
* Interface representing the parameters for configuring the
|
|
7
|
-
* ConfluencePagesLoader.
|
|
8
|
-
*/
|
|
9
|
-
export interface ConfluencePagesLoaderParams {
|
|
10
|
-
baseUrl: string;
|
|
11
|
-
spaceKey: string;
|
|
12
|
-
username?: string;
|
|
13
|
-
accessToken?: string;
|
|
14
|
-
personalAccessToken?: string;
|
|
15
|
-
limit?: number;
|
|
16
|
-
expand?: string;
|
|
17
|
-
}
|
|
18
|
-
/**
|
|
19
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/confluence" instead. This entrypoint will be removed in 0.3.0.
|
|
20
|
-
*
|
|
21
|
-
* Interface representing a Confluence page.
|
|
22
|
-
*/
|
|
23
|
-
export interface ConfluencePage {
|
|
24
|
-
id: string;
|
|
25
|
-
title: string;
|
|
26
|
-
type: string;
|
|
27
|
-
body: {
|
|
28
|
-
storage: {
|
|
29
|
-
value: string;
|
|
30
|
-
};
|
|
31
|
-
};
|
|
32
|
-
status: string;
|
|
33
|
-
version?: {
|
|
34
|
-
number: number;
|
|
35
|
-
when: string;
|
|
36
|
-
by: {
|
|
37
|
-
displayName: string;
|
|
38
|
-
};
|
|
39
|
-
};
|
|
40
|
-
}
|
|
41
|
-
/**
|
|
42
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/confluence" instead. This entrypoint will be removed in 0.3.0.
|
|
43
|
-
*
|
|
44
|
-
* Interface representing the response from the Confluence API.
|
|
45
|
-
*/
|
|
46
|
-
export interface ConfluenceAPIResponse {
|
|
47
|
-
size: number;
|
|
48
|
-
results: ConfluencePage[];
|
|
49
|
-
}
|
|
50
|
-
/**
|
|
51
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/confluence" instead. This entrypoint will be removed in 0.3.0.
|
|
52
|
-
*
|
|
53
|
-
* Class representing a document loader for loading pages from Confluence.
|
|
54
|
-
* @example
|
|
55
|
-
* ```typescript
|
|
56
|
-
* const loader = new ConfluencePagesLoader({
|
|
57
|
-
* baseUrl: "https:
|
|
58
|
-
* spaceKey: "~EXAMPLE362906de5d343d49dcdbae5dEXAMPLE",
|
|
59
|
-
* username: "your-username",
|
|
60
|
-
* accessToken: "your-access-token",
|
|
61
|
-
* });
|
|
62
|
-
* const documents = await loader.load();
|
|
63
|
-
* console.log(documents);
|
|
64
|
-
* ```
|
|
65
|
-
*/
|
|
66
|
-
export declare class ConfluencePagesLoader extends BaseDocumentLoader {
|
|
67
|
-
readonly baseUrl: string;
|
|
68
|
-
readonly spaceKey: string;
|
|
69
|
-
readonly username?: string;
|
|
70
|
-
readonly accessToken?: string;
|
|
71
|
-
readonly limit: number;
|
|
72
|
-
/**
|
|
73
|
-
* expand parameter for confluence rest api
|
|
74
|
-
* description can be found at https://developer.atlassian.com/server/confluence/expansions-in-the-rest-api/
|
|
75
|
-
*/
|
|
76
|
-
readonly expand?: string;
|
|
77
|
-
readonly personalAccessToken?: string;
|
|
78
|
-
constructor({ baseUrl, spaceKey, username, accessToken, limit, expand, personalAccessToken, }: ConfluencePagesLoaderParams);
|
|
79
|
-
/**
|
|
80
|
-
* Returns the authorization header for the request.
|
|
81
|
-
* @returns The authorization header as a string, or undefined if no credentials were provided.
|
|
82
|
-
*/
|
|
83
|
-
private get authorizationHeader();
|
|
84
|
-
/**
|
|
85
|
-
* Fetches all the pages in the specified space and converts each page to
|
|
86
|
-
* a Document instance.
|
|
87
|
-
* @param options the extra options of the load function
|
|
88
|
-
* @param options.limit The limit parameter to overwrite the size to fetch pages.
|
|
89
|
-
* @param options.start The start parameter to set inital offset to fetch pages.
|
|
90
|
-
* @returns Promise resolving to an array of Document instances.
|
|
91
|
-
*/
|
|
92
|
-
load(options?: {
|
|
93
|
-
start?: number;
|
|
94
|
-
limit?: number;
|
|
95
|
-
}): Promise<Document[]>;
|
|
96
|
-
/**
|
|
97
|
-
* Fetches data from the Confluence API using the provided URL.
|
|
98
|
-
* @param url The URL to fetch data from.
|
|
99
|
-
* @returns Promise resolving to the JSON response from the API.
|
|
100
|
-
*/
|
|
101
|
-
protected fetchConfluenceData(url: string): Promise<ConfluenceAPIResponse>;
|
|
102
|
-
/**
|
|
103
|
-
* Recursively fetches all the pages in the specified space.
|
|
104
|
-
* @param start The start parameter to paginate through the results.
|
|
105
|
-
* @returns Promise resolving to an array of ConfluencePage objects.
|
|
106
|
-
*/
|
|
107
|
-
private fetchAllPagesInSpace;
|
|
108
|
-
/**
|
|
109
|
-
* Creates a Document instance from a ConfluencePage object.
|
|
110
|
-
* @param page The ConfluencePage object to convert.
|
|
111
|
-
* @returns A Document instance.
|
|
112
|
-
*/
|
|
113
|
-
private createDocumentFromPage;
|
|
114
|
-
}
|