langchain 0.0.132 → 0.0.134

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/dist/agents/chat/outputParser.cjs +2 -1
  2. package/dist/agents/chat/outputParser.js +2 -1
  3. package/dist/agents/executor.cjs +106 -7
  4. package/dist/agents/executor.d.ts +23 -0
  5. package/dist/agents/executor.js +104 -6
  6. package/dist/agents/mrkl/outputParser.cjs +2 -1
  7. package/dist/agents/mrkl/outputParser.js +2 -1
  8. package/dist/callbacks/index.cjs +2 -1
  9. package/dist/callbacks/index.d.ts +1 -1
  10. package/dist/callbacks/index.js +1 -1
  11. package/dist/chains/sql_db/sql_db_chain.d.ts +1 -1
  12. package/dist/chains/sql_db/sql_db_prompt.d.ts +6 -6
  13. package/dist/chat_models/googlevertexai.cjs +1 -1
  14. package/dist/chat_models/googlevertexai.d.ts +2 -2
  15. package/dist/chat_models/googlevertexai.js +2 -2
  16. package/dist/chat_models/ollama.cjs +8 -8
  17. package/dist/chat_models/ollama.js +8 -8
  18. package/dist/document_loaders/web/notionapi.cjs +153 -74
  19. package/dist/document_loaders/web/notionapi.d.ts +19 -10
  20. package/dist/document_loaders/web/notionapi.js +154 -75
  21. package/dist/document_loaders/web/recursive_url.cjs +177 -0
  22. package/dist/document_loaders/web/recursive_url.d.ts +27 -0
  23. package/dist/document_loaders/web/recursive_url.js +173 -0
  24. package/dist/embeddings/googlevertexai.cjs +1 -1
  25. package/dist/embeddings/googlevertexai.d.ts +2 -2
  26. package/dist/embeddings/googlevertexai.js +2 -2
  27. package/dist/experimental/multimodal_embeddings/googlevertexai.cjs +1 -1
  28. package/dist/experimental/multimodal_embeddings/googlevertexai.d.ts +2 -2
  29. package/dist/experimental/multimodal_embeddings/googlevertexai.js +2 -2
  30. package/dist/hub.cjs +16 -0
  31. package/dist/hub.d.ts +4 -0
  32. package/dist/hub.js +11 -0
  33. package/dist/llms/bedrock.cjs +63 -19
  34. package/dist/llms/bedrock.d.ts +9 -1
  35. package/dist/llms/bedrock.js +63 -19
  36. package/dist/llms/googlevertexai.cjs +1 -1
  37. package/dist/llms/googlevertexai.js +2 -2
  38. package/dist/load/import_constants.cjs +3 -0
  39. package/dist/load/import_constants.js +3 -0
  40. package/dist/schema/output_parser.cjs +2 -2
  41. package/dist/schema/output_parser.js +2 -2
  42. package/dist/tools/base.cjs +26 -2
  43. package/dist/tools/base.d.ts +9 -0
  44. package/dist/tools/base.js +24 -1
  45. package/dist/tools/sql.cjs +9 -3
  46. package/dist/tools/sql.d.ts +0 -1
  47. package/dist/tools/sql.js +9 -3
  48. package/dist/types/googlevertexai-types.d.ts +8 -3
  49. package/dist/util/googlevertexai-connection.cjs +49 -15
  50. package/dist/util/googlevertexai-connection.d.ts +12 -4
  51. package/dist/util/googlevertexai-connection.js +46 -13
  52. package/dist/vectorstores/googlevertexai.cjs +551 -0
  53. package/dist/vectorstores/googlevertexai.d.ts +180 -0
  54. package/dist/vectorstores/googlevertexai.js +520 -0
  55. package/dist/vectorstores/myscale.cjs +2 -2
  56. package/dist/vectorstores/myscale.d.ts +1 -1
  57. package/dist/vectorstores/myscale.js +2 -2
  58. package/dist/vectorstores/vectara.cjs +11 -2
  59. package/dist/vectorstores/vectara.d.ts +10 -1
  60. package/dist/vectorstores/vectara.js +11 -2
  61. package/document_loaders/web/recursive_url.cjs +1 -0
  62. package/document_loaders/web/recursive_url.d.ts +1 -0
  63. package/document_loaders/web/recursive_url.js +1 -0
  64. package/hub.cjs +1 -0
  65. package/hub.d.ts +1 -0
  66. package/hub.js +1 -0
  67. package/package.json +41 -2
  68. package/vectorstores/googlevertexai.cjs +1 -0
  69. package/vectorstores/googlevertexai.d.ts +1 -0
  70. package/vectorstores/googlevertexai.js +1 -0
@@ -1,8 +1,22 @@
1
- import { Client, isFullBlock, isFullPage, iteratePaginatedAPI, } from "@notionhq/client";
1
+ import { Client, isFullBlock, isFullPage, iteratePaginatedAPI, APIErrorCode, isNotionClientError, isFullDatabase, } from "@notionhq/client";
2
2
  import { NotionToMarkdown } from "notion-to-md";
3
3
  import { getBlockChildren } from "notion-to-md/build/utils/notion.js";
4
- import { BaseDocumentLoader } from "../base.js";
5
4
  import { Document } from "../../document.js";
5
+ import { BaseDocumentLoader } from "../base.js";
6
+ import { AsyncCaller } from "../../util/async_caller.js";
7
+ const isPageResponse = (res) => !isNotionClientError(res) && res.object === "page";
8
+ const isDatabaseResponse = (res) => !isNotionClientError(res) && res.object === "database";
9
+ const isErrorResponse = (res) => isNotionClientError(res);
10
+ const isPage = (res) => isPageResponse(res) && isFullPage(res);
11
+ const isDatabase = (res) => isDatabaseResponse(res) && isFullDatabase(res);
12
+ const getTitle = (obj) => {
13
+ if (isPage(obj) && obj.properties.title.type === "title") {
14
+ return obj.properties.title.title[0]?.plain_text;
15
+ }
16
+ if (isDatabase(obj))
17
+ return obj.title[0]?.plain_text;
18
+ return null;
19
+ };
6
20
  /**
7
21
  * A class that extends the BaseDocumentLoader class. It represents a
8
22
  * document loader for loading documents from Notion using the Notion API.
@@ -10,6 +24,12 @@ import { Document } from "../../document.js";
10
24
  export class NotionAPILoader extends BaseDocumentLoader {
11
25
  constructor(options) {
12
26
  super();
27
+ Object.defineProperty(this, "caller", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: void 0
32
+ });
13
33
  Object.defineProperty(this, "notionClient", {
14
34
  enumerable: true,
15
35
  configurable: true,
@@ -28,19 +48,66 @@ export class NotionAPILoader extends BaseDocumentLoader {
28
48
  writable: true,
29
49
  value: void 0
30
50
  });
31
- Object.defineProperty(this, "type", {
51
+ Object.defineProperty(this, "pageQueue", {
52
+ enumerable: true,
53
+ configurable: true,
54
+ writable: true,
55
+ value: void 0
56
+ });
57
+ Object.defineProperty(this, "pageCompleted", {
58
+ enumerable: true,
59
+ configurable: true,
60
+ writable: true,
61
+ value: void 0
62
+ });
63
+ Object.defineProperty(this, "pageQueueTotal", {
64
+ enumerable: true,
65
+ configurable: true,
66
+ writable: true,
67
+ value: void 0
68
+ });
69
+ Object.defineProperty(this, "documents", {
32
70
  enumerable: true,
33
71
  configurable: true,
34
72
  writable: true,
35
73
  value: void 0
36
74
  });
37
- this.notionClient = new Client(options.clientOptions);
75
+ Object.defineProperty(this, "rootTitle", {
76
+ enumerable: true,
77
+ configurable: true,
78
+ writable: true,
79
+ value: void 0
80
+ });
81
+ Object.defineProperty(this, "onDocumentLoaded", {
82
+ enumerable: true,
83
+ configurable: true,
84
+ writable: true,
85
+ value: void 0
86
+ });
87
+ this.caller = new AsyncCaller({
88
+ maxConcurrency: 64,
89
+ ...options.callerOptions,
90
+ });
91
+ this.notionClient = new Client({
92
+ logger: () => { },
93
+ ...options.clientOptions,
94
+ });
38
95
  this.n2mClient = new NotionToMarkdown({
39
96
  notionClient: this.notionClient,
40
97
  config: { parseChildPages: false, convertImagesToBase64: false },
41
98
  });
42
99
  this.id = options.id;
43
- this.type = options.type;
100
+ this.pageQueue = [];
101
+ this.pageCompleted = [];
102
+ this.pageQueueTotal = 0;
103
+ this.documents = [];
104
+ this.rootTitle = "";
105
+ this.onDocumentLoaded = options.onDocumentLoaded ?? ((_ti, _cu) => { });
106
+ }
107
+ addToQueue(...items) {
108
+ const deDuped = items.filter((item) => !this.pageCompleted.concat(this.pageQueue).includes(item));
109
+ this.pageQueue.push(...deDuped);
110
+ this.pageQueueTotal += deDuped.length;
44
111
  }
45
112
  /**
46
113
  * Parses the properties of a Notion page and returns them as key-value
@@ -123,123 +190,135 @@ export class NotionAPILoader extends BaseDocumentLoader {
123
190
  * @returns A Promise that resolves to an MdBlock object.
124
191
  */
125
192
  async loadBlock(block) {
126
- return {
193
+ const mdBlock = {
127
194
  type: block.type,
128
195
  blockId: block.id,
129
- parent: await this.n2mClient.blockToMarkdown(block),
196
+ parent: await this.caller.call(() => this.n2mClient.blockToMarkdown(block)),
130
197
  children: [],
131
198
  };
199
+ if (block.has_children) {
200
+ const block_id = block.type === "synced_block" &&
201
+ block.synced_block?.synced_from?.block_id
202
+ ? block.synced_block.synced_from.block_id
203
+ : block.id;
204
+ const childBlocks = await this.loadBlocks(await this.caller.call(() => getBlockChildren(this.notionClient, block_id, null)));
205
+ mdBlock.children = childBlocks;
206
+ }
207
+ return mdBlock;
132
208
  }
133
209
  /**
134
- * Loads Notion blocks and their child documents recursively.
210
+ * Loads Notion blocks and their children recursively.
135
211
  * @param blocksResponse The response from the Notion API containing the blocks to load.
136
- * @returns A Promise that resolves to an object containing the loaded MdBlocks and child Documents.
212
+ * @returns A Promise that resolves to an array containing the loaded MdBlocks.
137
213
  */
138
- async loadBlocksAndDocs(blocksResponse) {
214
+ async loadBlocks(blocksResponse) {
139
215
  const blocks = blocksResponse.filter(isFullBlock);
140
- const [childPageDocuments, childDatabaseDocuments, blocksDocsArray] = await Promise.all([
141
- Promise.all(blocks
142
- .filter((block) => block.type.includes("child_page"))
143
- .map((block) => this.loadPage(block.id))),
144
- Promise.all(blocks
145
- .filter((block) => block.type.includes("child_database"))
146
- .map((block) => this.loadDatabase(block.id))),
147
- Promise.all(blocks
148
- .filter((block) => !["child_page", "child_database"].includes(block.type))
149
- .map(async (block) => {
150
- const mdBlock = await this.loadBlock(block);
151
- let childDocuments = [];
152
- if (block.has_children) {
153
- const block_id = block.type === "synced_block" &&
154
- block.synced_block?.synced_from?.block_id
155
- ? block.synced_block.synced_from.block_id
156
- : block.id;
157
- const childBlocksDocs = await this.loadBlocksAndDocs(await getBlockChildren(this.notionClient, block_id, null));
158
- mdBlock.children = childBlocksDocs.mdBlocks;
159
- childDocuments = childBlocksDocs.childDocuments;
160
- }
161
- return {
162
- mdBlocks: [mdBlock],
163
- childDocuments,
164
- };
165
- })),
216
+ // Add child pages to queue
217
+ const childPages = blocks
218
+ .filter((block) => block.type.includes("child_page"))
219
+ .map((block) => block.id);
220
+ if (childPages.length > 0)
221
+ this.addToQueue(...childPages);
222
+ // Add child database pages to queue
223
+ const childDatabases = blocks
224
+ .filter((block) => block.type.includes("child_database"))
225
+ .map((block) => this.caller.call(() => this.loadDatabase(block.id)));
226
+ // Load this block and child blocks
227
+ const loadingMdBlocks = blocks
228
+ .filter((block) => !["child_page", "child_database"].includes(block.type))
229
+ .map((block) => this.loadBlock(block));
230
+ const [mdBlocks] = await Promise.all([
231
+ Promise.all(loadingMdBlocks),
232
+ Promise.all(childDatabases),
166
233
  ]);
167
- const allMdBlocks = blocksDocsArray
168
- .flat()
169
- .map((blockDoc) => blockDoc.mdBlocks);
170
- const childDocuments = blocksDocsArray
171
- .flat()
172
- .map((blockDoc) => blockDoc.childDocuments);
173
- return {
174
- mdBlocks: [...allMdBlocks.flat()],
175
- childDocuments: [
176
- ...childPageDocuments.flat(),
177
- ...childDatabaseDocuments.flat(),
178
- ...childDocuments.flat(),
179
- ],
180
- };
234
+ return mdBlocks;
181
235
  }
182
236
  /**
183
- * Loads a Notion page and its child documents.
237
+ * Loads a Notion page and its child documents, then adds it to the completed documents array.
184
238
  * @param page The Notion page or page ID to load.
185
- * @returns A Promise that resolves to an array of Documents.
186
239
  */
187
240
  async loadPage(page) {
188
- // Check page is a page ID or a GetPageResponse
241
+ // Check page is a page ID or a PageObjectResponse
189
242
  const [pageData, pageId] = typeof page === "string"
190
- ? [this.notionClient.pages.retrieve({ page_id: page }), page]
243
+ ? [
244
+ this.caller.call(() => this.notionClient.pages.retrieve({ page_id: page })),
245
+ page,
246
+ ]
191
247
  : [page, page.id];
192
248
  const [pageDetails, pageBlocks] = await Promise.all([
193
249
  pageData,
194
- getBlockChildren(this.notionClient, pageId, null),
250
+ this.caller.call(() => getBlockChildren(this.notionClient, pageId, null)),
195
251
  ]);
196
252
  if (!isFullPage(pageDetails))
197
- return [];
198
- const { mdBlocks, childDocuments } = await this.loadBlocksAndDocs(pageBlocks);
253
+ return;
254
+ const mdBlocks = await this.loadBlocks(pageBlocks);
199
255
  const mdStringObject = this.n2mClient.toMarkdownString(mdBlocks);
200
256
  const pageDocument = new Document({
201
257
  pageContent: mdStringObject.parent,
202
258
  metadata: this.parsePageDetails(pageDetails),
203
259
  });
204
- return [pageDocument, ...childDocuments];
260
+ this.documents.push(pageDocument);
261
+ this.pageCompleted.push(pageId);
262
+ this.onDocumentLoaded(this.documents.length, this.pageQueueTotal, pageDocument.metadata.properties.title, this.rootTitle);
205
263
  }
206
264
  /**
207
- * Loads a Notion database and its documents.
265
+ * Loads a Notion database and adds it's pages to the queue.
208
266
  * @param id The ID of the Notion database to load.
209
- * @returns A Promise that resolves to an array of Documents.
210
267
  */
211
268
  async loadDatabase(id) {
212
- const documents = [];
213
269
  try {
214
270
  for await (const page of iteratePaginatedAPI(this.notionClient.databases.query, {
215
271
  database_id: id,
272
+ page_size: 50,
216
273
  })) {
217
- if (!isFullPage(page))
218
- continue;
219
- documents.push(...(await this.loadPage(page)));
274
+ this.addToQueue(page.id);
220
275
  }
221
276
  }
222
277
  catch (e) {
223
278
  console.log(e);
224
279
  // TODO: Catch and report api request errors
225
280
  }
226
- return documents;
227
281
  }
228
282
  /**
229
283
  * Loads the documents from Notion based on the specified options.
230
284
  * @returns A Promise that resolves to an array of Documents.
231
285
  */
232
286
  async load() {
233
- const documents = [];
234
- switch (this.type) {
235
- case "page":
236
- documents.push(...(await this.loadPage(this.id)));
237
- break;
238
- case "database":
239
- documents.push(...(await this.loadDatabase(this.id)));
240
- break;
241
- default:
287
+ const resPagePromise = this.notionClient.pages
288
+ .retrieve({ page_id: this.id })
289
+ .then((res) => {
290
+ this.addToQueue(this.id);
291
+ return res;
292
+ })
293
+ .catch((error) => error);
294
+ const resDatabasePromise = this.notionClient.databases
295
+ .retrieve({ database_id: this.id })
296
+ .then(async (res) => {
297
+ await this.loadDatabase(this.id);
298
+ return res;
299
+ })
300
+ .catch((error) => error);
301
+ const [resPage, resDatabase] = await Promise.all([
302
+ resPagePromise,
303
+ resDatabasePromise,
304
+ ]);
305
+ // Check if both resPage and resDatabase resulted in error responses
306
+ const errors = [resPage, resDatabase].filter(isErrorResponse);
307
+ if (errors.length === 2) {
308
+ if (errors.every((e) => e.code === APIErrorCode.ObjectNotFound)) {
309
+ throw new AggregateError([
310
+ Error(`Could not find object with ID: ${this.id}. Make sure the relevant pages and databases are shared with your integration.`),
311
+ ...errors,
312
+ ]);
313
+ }
314
+ throw new AggregateError(errors);
315
+ }
316
+ this.rootTitle = getTitle(resPage) || getTitle(resDatabase) || this.id;
317
+ let pageId = this.pageQueue.shift();
318
+ while (pageId) {
319
+ await this.loadPage(pageId);
320
+ pageId = this.pageQueue.shift();
242
321
  }
243
- return documents;
322
+ return this.documents;
244
323
  }
245
324
  }
@@ -0,0 +1,177 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.RecursiveUrlLoader = void 0;
4
+ const jsdom_1 = require("jsdom");
5
+ const async_caller_js_1 = require("../../util/async_caller.cjs");
6
+ const base_js_1 = require("../base.cjs");
7
+ class RecursiveUrlLoader extends base_js_1.BaseDocumentLoader {
8
+ constructor(url, options) {
9
+ super();
10
+ Object.defineProperty(this, "caller", {
11
+ enumerable: true,
12
+ configurable: true,
13
+ writable: true,
14
+ value: void 0
15
+ });
16
+ Object.defineProperty(this, "url", {
17
+ enumerable: true,
18
+ configurable: true,
19
+ writable: true,
20
+ value: void 0
21
+ });
22
+ Object.defineProperty(this, "excludeDirs", {
23
+ enumerable: true,
24
+ configurable: true,
25
+ writable: true,
26
+ value: void 0
27
+ });
28
+ Object.defineProperty(this, "extractor", {
29
+ enumerable: true,
30
+ configurable: true,
31
+ writable: true,
32
+ value: void 0
33
+ });
34
+ Object.defineProperty(this, "maxDepth", {
35
+ enumerable: true,
36
+ configurable: true,
37
+ writable: true,
38
+ value: void 0
39
+ });
40
+ Object.defineProperty(this, "timeout", {
41
+ enumerable: true,
42
+ configurable: true,
43
+ writable: true,
44
+ value: void 0
45
+ });
46
+ Object.defineProperty(this, "preventOutside", {
47
+ enumerable: true,
48
+ configurable: true,
49
+ writable: true,
50
+ value: void 0
51
+ });
52
+ this.caller = new async_caller_js_1.AsyncCaller({
53
+ maxConcurrency: 64,
54
+ maxRetries: 0,
55
+ ...options.callerOptions,
56
+ });
57
+ this.url = url;
58
+ this.excludeDirs = options.excludeDirs ?? [];
59
+ this.extractor = options.extractor ?? ((s) => s);
60
+ this.maxDepth = options.maxDepth ?? 2;
61
+ this.timeout = options.timeout ?? 10000;
62
+ this.preventOutside = options.preventOutside ?? true;
63
+ }
64
+ async fetchWithTimeout(resource, options) {
65
+ const { timeout, ...rest } = options;
66
+ return this.caller.call(() => fetch(resource, { ...rest, signal: AbortSignal.timeout(timeout) }));
67
+ }
68
+ getChildLinks(html, baseUrl) {
69
+ const allLinks = Array.from(new jsdom_1.JSDOM(html).window.document.querySelectorAll("a")).map((a) => a.href);
70
+ const absolutePaths = [];
71
+ // eslint-disable-next-line no-script-url
72
+ const invalidPrefixes = ["javascript:", "mailto:", "#"];
73
+ const invalidSuffixes = [
74
+ ".css",
75
+ ".js",
76
+ ".ico",
77
+ ".png",
78
+ ".jpg",
79
+ ".jpeg",
80
+ ".gif",
81
+ ".svg",
82
+ ];
83
+ for (const link of allLinks) {
84
+ if (invalidPrefixes.some((prefix) => link.startsWith(prefix)) ||
85
+ invalidSuffixes.some((suffix) => link.endsWith(suffix)))
86
+ continue;
87
+ if (link.startsWith("http")) {
88
+ const isAllowed = !this.preventOutside || link.startsWith(baseUrl);
89
+ if (isAllowed)
90
+ absolutePaths.push(link);
91
+ }
92
+ else if (link.startsWith("//")) {
93
+ const base = new URL(baseUrl);
94
+ absolutePaths.push(base.protocol + link);
95
+ }
96
+ else {
97
+ const newLink = new URL(link, baseUrl).href;
98
+ absolutePaths.push(newLink);
99
+ }
100
+ }
101
+ return Array.from(new Set(absolutePaths));
102
+ }
103
+ extractMetadata(rawHtml, url) {
104
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
105
+ const metadata = { source: url };
106
+ const { document } = new jsdom_1.JSDOM(rawHtml).window;
107
+ const title = document.getElementsByTagName("title")[0];
108
+ if (title) {
109
+ metadata.title = title.textContent;
110
+ }
111
+ const description = document.querySelector("meta[name=description]");
112
+ if (description) {
113
+ metadata.description = description.getAttribute("content");
114
+ }
115
+ const html = document.getElementsByTagName("html")[0];
116
+ if (html) {
117
+ metadata.language = html.getAttribute("lang");
118
+ }
119
+ return metadata;
120
+ }
121
+ async getUrlAsDoc(url) {
122
+ let res;
123
+ try {
124
+ res = await this.fetchWithTimeout(url, { timeout: this.timeout });
125
+ res = await res.text();
126
+ }
127
+ catch (e) {
128
+ return null;
129
+ }
130
+ return {
131
+ pageContent: this.extractor(res),
132
+ metadata: this.extractMetadata(res, url),
133
+ };
134
+ }
135
+ async getChildUrlsRecursive(inputUrl, visited = new Set(), depth = 0) {
136
+ if (depth > this.maxDepth)
137
+ return [];
138
+ let url = inputUrl;
139
+ if (!inputUrl.endsWith("/"))
140
+ url += "/";
141
+ const isExcluded = this.excludeDirs.some((exDir) => url.startsWith(exDir));
142
+ if (isExcluded)
143
+ return [];
144
+ let res;
145
+ try {
146
+ res = await this.fetchWithTimeout(url, { timeout: this.timeout });
147
+ res = await res.text();
148
+ }
149
+ catch (e) {
150
+ return [];
151
+ }
152
+ const childUrls = this.getChildLinks(res, url);
153
+ const results = await Promise.all(childUrls.map((childUrl) => (async () => {
154
+ if (visited.has(childUrl))
155
+ return null;
156
+ visited.add(childUrl);
157
+ const childDoc = await this.getUrlAsDoc(childUrl);
158
+ if (!childDoc)
159
+ return null;
160
+ if (childUrl.endsWith("/")) {
161
+ const childUrlResponses = await this.getChildUrlsRecursive(childUrl, visited, depth + 1);
162
+ return [childDoc, ...childUrlResponses];
163
+ }
164
+ return [childDoc];
165
+ })()));
166
+ return results.flat().filter((docs) => docs !== null);
167
+ }
168
+ async load() {
169
+ const rootDoc = await this.getUrlAsDoc(this.url);
170
+ if (!rootDoc)
171
+ return [];
172
+ const docs = [rootDoc];
173
+ docs.push(...(await this.getChildUrlsRecursive(this.url, new Set([this.url]))));
174
+ return docs;
175
+ }
176
+ }
177
+ exports.RecursiveUrlLoader = RecursiveUrlLoader;
@@ -0,0 +1,27 @@
1
+ import { Document } from "../../document.js";
2
+ import { AsyncCaller } from "../../util/async_caller.js";
3
+ import { BaseDocumentLoader, DocumentLoader } from "../base.js";
4
+ export interface RecursiveUrlLoaderOptions {
5
+ excludeDirs?: string[];
6
+ extractor?: (text: string) => string;
7
+ maxDepth?: number;
8
+ timeout?: number;
9
+ preventOutside?: boolean;
10
+ callerOptions?: ConstructorParameters<typeof AsyncCaller>[0];
11
+ }
12
+ export declare class RecursiveUrlLoader extends BaseDocumentLoader implements DocumentLoader {
13
+ private caller;
14
+ private url;
15
+ private excludeDirs;
16
+ private extractor;
17
+ private maxDepth;
18
+ private timeout;
19
+ private preventOutside;
20
+ constructor(url: string, options: RecursiveUrlLoaderOptions);
21
+ private fetchWithTimeout;
22
+ private getChildLinks;
23
+ private extractMetadata;
24
+ private getUrlAsDoc;
25
+ private getChildUrlsRecursive;
26
+ load(): Promise<Document[]>;
27
+ }