langchain 0.1.26 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,13 @@ class GitbookLoader extends cheerio_js_1.CheerioWebBaseLoader {
23
23
  writable: true,
24
24
  value: false
25
25
  });
26
+ Object.defineProperty(this, "baseUrl", {
27
+ enumerable: true,
28
+ configurable: true,
29
+ writable: true,
30
+ value: void 0
31
+ });
32
+ this.baseUrl = webPath;
26
33
  this.webPath = path;
27
34
  this.shouldLoadAllPaths =
28
35
  params.shouldLoadAllPaths ?? this.shouldLoadAllPaths;
@@ -84,9 +91,10 @@ class GitbookLoader extends cheerio_js_1.CheerioWebBaseLoader {
84
91
  .map((element) => $(element).text());
85
92
  const documents = [];
86
93
  for (const url of urls) {
87
- console.log(`Fetching text from ${url}`);
88
- const html = await GitbookLoader._scrape(url, this.caller, this.timeout);
89
- documents.push(...this.loadPath(html, url));
94
+ const buildUrl = url.includes(this.baseUrl) ? url : this.baseUrl + url;
95
+ console.log(`Fetching text from ${buildUrl}`);
96
+ const html = await GitbookLoader._scrape(buildUrl, this.caller, this.timeout);
97
+ documents.push(...this.loadPath(html, buildUrl));
90
98
  }
91
99
  console.log(`Fetched ${documents.length} documents.`);
92
100
  return documents;
@@ -15,6 +15,7 @@ interface GitbookLoaderParams {
15
15
  export declare class GitbookLoader extends CheerioWebBaseLoader {
16
16
  webPath: string;
17
17
  shouldLoadAllPaths: boolean;
18
+ private readonly baseUrl;
18
19
  constructor(webPath: string, params?: GitbookLoaderParams);
19
20
  /**
20
21
  * Method that scrapes the web document using Cheerio and loads the
@@ -20,6 +20,13 @@ export class GitbookLoader extends CheerioWebBaseLoader {
20
20
  writable: true,
21
21
  value: false
22
22
  });
23
+ Object.defineProperty(this, "baseUrl", {
24
+ enumerable: true,
25
+ configurable: true,
26
+ writable: true,
27
+ value: void 0
28
+ });
29
+ this.baseUrl = webPath;
23
30
  this.webPath = path;
24
31
  this.shouldLoadAllPaths =
25
32
  params.shouldLoadAllPaths ?? this.shouldLoadAllPaths;
@@ -81,9 +88,10 @@ export class GitbookLoader extends CheerioWebBaseLoader {
81
88
  .map((element) => $(element).text());
82
89
  const documents = [];
83
90
  for (const url of urls) {
84
- console.log(`Fetching text from ${url}`);
85
- const html = await GitbookLoader._scrape(url, this.caller, this.timeout);
86
- documents.push(...this.loadPath(html, url));
91
+ const buildUrl = url.includes(this.baseUrl) ? url : this.baseUrl + url;
92
+ console.log(`Fetching text from ${buildUrl}`);
93
+ const html = await GitbookLoader._scrape(buildUrl, this.caller, this.timeout);
94
+ documents.push(...this.loadPath(html, buildUrl));
87
95
  }
88
96
  console.log(`Fetched ${documents.length} documents.`);
89
97
  return documents;
@@ -153,9 +153,10 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
153
153
  * This can be false if and only if `ids` are provided. You may want
154
154
  * to set this to False if the documents are already in the docstore
155
155
  * and you don't want to re-add them.
156
+ * @param config.chunkHeaderOptions Object with options for adding Contextual chunk headers
156
157
  */
157
158
  async addDocuments(docs, config) {
158
- const { ids, addToDocstore = true } = config ?? {};
159
+ const { ids, addToDocstore = true, childDocChunkHeaderOptions = {}, } = config ?? {};
159
160
  const parentDocs = this.parentSplitter
160
161
  ? await this.parentSplitter.splitDocuments(docs)
161
162
  : docs;
@@ -177,7 +178,7 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
177
178
  for (let i = 0; i < parentDocs.length; i += 1) {
178
179
  const parentDoc = parentDocs[i];
179
180
  const parentDocId = parentDocIds[i];
180
- const subDocs = await this.childSplitter.splitDocuments([parentDoc]);
181
+ const subDocs = await this.childSplitter.splitDocuments([parentDoc], childDocChunkHeaderOptions);
181
182
  const taggedSubDocs = subDocs.map((subDoc) => new documents_1.Document({
182
183
  pageContent: subDoc.pageContent,
183
184
  metadata: { ...subDoc.metadata, [this.idKey]: parentDocId },
@@ -1,6 +1,6 @@
1
1
  import { type VectorStoreInterface, type VectorStoreRetrieverInterface } from "@langchain/core/vectorstores";
2
2
  import { Document } from "@langchain/core/documents";
3
- import { TextSplitter } from "../text_splitter.js";
3
+ import { TextSplitter, TextSplitterChunkHeaderOptions } from "../text_splitter.js";
4
4
  import { MultiVectorRetriever, type MultiVectorRetrieverInput } from "./multi_vector.js";
5
5
  /**
6
6
  * Interface for the fields required to initialize a
@@ -69,9 +69,11 @@ export declare class ParentDocumentRetriever extends MultiVectorRetriever {
69
69
  * This can be false if and only if `ids` are provided. You may want
70
70
  * to set this to False if the documents are already in the docstore
71
71
  * and you don't want to re-add them.
72
+ * @param config.chunkHeaderOptions Object with options for adding Contextual chunk headers
72
73
  */
73
74
  addDocuments(docs: Document[], config?: {
74
75
  ids?: string[];
75
76
  addToDocstore?: boolean;
77
+ childDocChunkHeaderOptions?: TextSplitterChunkHeaderOptions;
76
78
  }): Promise<void>;
77
79
  }
@@ -127,9 +127,10 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
127
127
  * This can be false if and only if `ids` are provided. You may want
128
128
  * to set this to False if the documents are already in the docstore
129
129
  * and you don't want to re-add them.
130
+ * @param config.chunkHeaderOptions Object with options for adding Contextual chunk headers
130
131
  */
131
132
  async addDocuments(docs, config) {
132
- const { ids, addToDocstore = true } = config ?? {};
133
+ const { ids, addToDocstore = true, childDocChunkHeaderOptions = {}, } = config ?? {};
133
134
  const parentDocs = this.parentSplitter
134
135
  ? await this.parentSplitter.splitDocuments(docs)
135
136
  : docs;
@@ -151,7 +152,7 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
151
152
  for (let i = 0; i < parentDocs.length; i += 1) {
152
153
  const parentDoc = parentDocs[i];
153
154
  const parentDocId = parentDocIds[i];
154
- const subDocs = await this.childSplitter.splitDocuments([parentDoc]);
155
+ const subDocs = await this.childSplitter.splitDocuments([parentDoc], childDocChunkHeaderOptions);
155
156
  const taggedSubDocs = subDocs.map((subDoc) => new Document({
156
157
  pageContent: subDoc.pageContent,
157
158
  metadata: { ...subDoc.metadata, [this.idKey]: parentDocId },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "langchain",
3
- "version": "0.1.26",
3
+ "version": "0.1.27",
4
4
  "description": "Typescript bindings for langchain",
5
5
  "type": "module",
6
6
  "engines": {