npm - langchain - Versions diffs - 0.1.26 → 0.1.27 - Mend

langchain 0.1.26 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/document_loaders/web/gitbook.cjs +11 -3
package/dist/document_loaders/web/gitbook.d.ts +1 -0
package/dist/document_loaders/web/gitbook.js +11 -3
package/dist/retrievers/parent_document.cjs +3 -2
package/dist/retrievers/parent_document.d.ts +3 -1
package/dist/retrievers/parent_document.js +3 -2
package/package.json +1 -1

package/dist/document_loaders/web/gitbook.cjs CHANGED Viewed

@@ -23,6 +23,13 @@ class GitbookLoader extends cheerio_js_1.CheerioWebBaseLoader {
             writable: true,
             value: false
         });
+        Object.defineProperty(this, "baseUrl", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        this.baseUrl = webPath;
         this.webPath = path;
         this.shouldLoadAllPaths =
             params.shouldLoadAllPaths ?? this.shouldLoadAllPaths;
@@ -84,9 +91,10 @@ class GitbookLoader extends cheerio_js_1.CheerioWebBaseLoader {
             .map((element) => $(element).text());
         const documents = [];
         for (const url of urls) {
-            console.log(`Fetching text from ${url}`);
-            const html = await GitbookLoader._scrape(url, this.caller, this.timeout);
-            documents.push(...this.loadPath(html, url));
+            const buildUrl = url.includes(this.baseUrl) ? url : this.baseUrl + url;
+            console.log(`Fetching text from ${buildUrl}`);
+            const html = await GitbookLoader._scrape(buildUrl, this.caller, this.timeout);
+            documents.push(...this.loadPath(html, buildUrl));
         }
         console.log(`Fetched ${documents.length} documents.`);
         return documents;

package/dist/document_loaders/web/gitbook.d.ts CHANGED Viewed

@@ -15,6 +15,7 @@ interface GitbookLoaderParams {
 export declare class GitbookLoader extends CheerioWebBaseLoader {
     webPath: string;
     shouldLoadAllPaths: boolean;
+    private readonly baseUrl;
     constructor(webPath: string, params?: GitbookLoaderParams);
     /**
      * Method that scrapes the web document using Cheerio and loads the

package/dist/document_loaders/web/gitbook.js CHANGED Viewed

@@ -20,6 +20,13 @@ export class GitbookLoader extends CheerioWebBaseLoader {
             writable: true,
             value: false
         });
+        Object.defineProperty(this, "baseUrl", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        this.baseUrl = webPath;
         this.webPath = path;
         this.shouldLoadAllPaths =
             params.shouldLoadAllPaths ?? this.shouldLoadAllPaths;
@@ -81,9 +88,10 @@ export class GitbookLoader extends CheerioWebBaseLoader {
             .map((element) => $(element).text());
         const documents = [];
         for (const url of urls) {
-            console.log(`Fetching text from ${url}`);
-            const html = await GitbookLoader._scrape(url, this.caller, this.timeout);
-            documents.push(...this.loadPath(html, url));
+            const buildUrl = url.includes(this.baseUrl) ? url : this.baseUrl + url;
+            console.log(`Fetching text from ${buildUrl}`);
+            const html = await GitbookLoader._scrape(buildUrl, this.caller, this.timeout);
+            documents.push(...this.loadPath(html, buildUrl));
         }
         console.log(`Fetched ${documents.length} documents.`);
         return documents;

package/dist/retrievers/parent_document.cjs CHANGED Viewed

@@ -153,9 +153,10 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
      * This can be false if and only if `ids` are provided. You may want
      *   to set this to False if the documents are already in the docstore
      *   and you don't want to re-add them.
+     * @param config.chunkHeaderOptions Object with options for adding Contextual chunk headers
      */
     async addDocuments(docs, config) {
-        const { ids, addToDocstore = true } = config ?? {};
+        const { ids, addToDocstore = true, childDocChunkHeaderOptions = {}, } = config ?? {};
         const parentDocs = this.parentSplitter
             ? await this.parentSplitter.splitDocuments(docs)
             : docs;
@@ -177,7 +178,7 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
         for (let i = 0; i < parentDocs.length; i += 1) {
             const parentDoc = parentDocs[i];
             const parentDocId = parentDocIds[i];
-            const subDocs = await this.childSplitter.splitDocuments([parentDoc]);
+            const subDocs = await this.childSplitter.splitDocuments([parentDoc], childDocChunkHeaderOptions);
             const taggedSubDocs = subDocs.map((subDoc) => new documents_1.Document({
                 pageContent: subDoc.pageContent,
                 metadata: { ...subDoc.metadata, [this.idKey]: parentDocId },

package/dist/retrievers/parent_document.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { type VectorStoreInterface, type VectorStoreRetrieverInterface } from "@langchain/core/vectorstores";
 import { Document } from "@langchain/core/documents";
-import { TextSplitter } from "../text_splitter.js";
+import { TextSplitter, TextSplitterChunkHeaderOptions } from "../text_splitter.js";
 import { MultiVectorRetriever, type MultiVectorRetrieverInput } from "./multi_vector.js";
 /**
  * Interface for the fields required to initialize a
@@ -69,9 +69,11 @@ export declare class ParentDocumentRetriever extends MultiVectorRetriever {
      * This can be false if and only if `ids` are provided. You may want
      *   to set this to False if the documents are already in the docstore
      *   and you don't want to re-add them.
+     * @param config.chunkHeaderOptions Object with options for adding Contextual chunk headers
      */
     addDocuments(docs: Document[], config?: {
         ids?: string[];
         addToDocstore?: boolean;
+        childDocChunkHeaderOptions?: TextSplitterChunkHeaderOptions;
     }): Promise<void>;
 }

package/dist/retrievers/parent_document.js CHANGED Viewed

@@ -127,9 +127,10 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
      * This can be false if and only if `ids` are provided. You may want
      *   to set this to False if the documents are already in the docstore
      *   and you don't want to re-add them.
+     * @param config.chunkHeaderOptions Object with options for adding Contextual chunk headers
      */
     async addDocuments(docs, config) {
-        const { ids, addToDocstore = true } = config ?? {};
+        const { ids, addToDocstore = true, childDocChunkHeaderOptions = {}, } = config ?? {};
         const parentDocs = this.parentSplitter
             ? await this.parentSplitter.splitDocuments(docs)
             : docs;
@@ -151,7 +152,7 @@ export class ParentDocumentRetriever extends MultiVectorRetriever {
         for (let i = 0; i < parentDocs.length; i += 1) {
             const parentDoc = parentDocs[i];
             const parentDocId = parentDocIds[i];
-            const subDocs = await this.childSplitter.splitDocuments([parentDoc]);
+            const subDocs = await this.childSplitter.splitDocuments([parentDoc], childDocChunkHeaderOptions);
             const taggedSubDocs = subDocs.map((subDoc) => new Document({
                 pageContent: subDoc.pageContent,
                 metadata: { ...subDoc.metadata, [this.idKey]: parentDocId },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "langchain",
-  "version": "0.1.26",
+  "version": "0.1.27",
   "description": "Typescript bindings for langchain",
   "type": "module",
   "engines": {