npm - langchain - Versions diffs - 0.0.79 → 0.0.80 - Mend

langchain 0.0.79 → 0.0.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/document_loaders/index.cjs +1 -3
package/dist/document_loaders/index.d.ts +0 -1
package/dist/document_loaders/index.js +0 -1
package/dist/document_loaders/web/github.cjs +38 -23
package/dist/document_loaders/web/github.d.ts +5 -2
package/dist/document_loaders/web/github.js +38 -23
package/dist/llms/googlevertexai.cjs +97 -0
package/dist/llms/googlevertexai.d.ts +43 -0
package/dist/llms/googlevertexai.js +93 -0
package/dist/text_splitter.cjs +11 -4
package/dist/text_splitter.d.ts +7 -2
package/dist/text_splitter.js +11 -4
package/dist/types/googlevertexai-types.cjs +2 -0
package/dist/types/googlevertexai-types.d.ts +47 -0
package/dist/types/googlevertexai-types.js +1 -0
package/dist/util/googlevertexai-connection.cjs +66 -0
package/dist/util/googlevertexai-connection.d.ts +13 -0
package/dist/util/googlevertexai-connection.js +62 -0
package/dist/vectorstores/chroma.cjs +34 -7
package/dist/vectorstores/chroma.d.ts +5 -1
package/dist/vectorstores/chroma.js +34 -7
package/llms/googlevertexai.cjs +1 -0
package/llms/googlevertexai.d.ts +1 -0
package/llms/googlevertexai.js +1 -0
package/package.json +18 -3

package/dist/document_loaders/index.cjs CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.UnstructuredLoader = exports.GithubRepoLoader = exports.NotionLoader = exports.CSVLoader = exports.JSONLinesLoader = exports.JSONLoader = exports.TextLoader = exports.EPubLoader = exports.DocxLoader = exports.PDFLoader = exports.SRTLoader = exports.UnknownHandling = exports.DirectoryLoader = exports.IMSDBLoader = exports.HNLoader = exports.GitbookLoader = exports.CollegeConfidentialLoader = exports.PuppeteerWebBaseLoader = exports.CheerioWebBaseLoader = exports.BaseDocumentLoader = void 0;
+exports.UnstructuredLoader = exports.NotionLoader = exports.CSVLoader = exports.JSONLinesLoader = exports.JSONLoader = exports.TextLoader = exports.EPubLoader = exports.DocxLoader = exports.PDFLoader = exports.SRTLoader = exports.UnknownHandling = exports.DirectoryLoader = exports.IMSDBLoader = exports.HNLoader = exports.GitbookLoader = exports.CollegeConfidentialLoader = exports.PuppeteerWebBaseLoader = exports.CheerioWebBaseLoader = exports.BaseDocumentLoader = void 0;
 /* #__PURE__ */ console.error("[WARN] Importing from 'langchain/document_loaders' is deprecated. Import from eg. 'langchain/document_loaders/fs/text' or 'langchain/document_loaders/web/cheerio' instead. See https://js.langchain.com/docs/getting-started/install#updating-from-0052 for upgrade instructions.");
 var base_js_1 = require("./base.cjs");
 Object.defineProperty(exports, "BaseDocumentLoader", { enumerable: true, get: function () { return base_js_1.BaseDocumentLoader; } });
@@ -36,7 +36,5 @@ var csv_js_1 = require("./fs/csv.cjs");
 Object.defineProperty(exports, "CSVLoader", { enumerable: true, get: function () { return csv_js_1.CSVLoader; } });
 var notion_js_1 = require("./fs/notion.cjs");
 Object.defineProperty(exports, "NotionLoader", { enumerable: true, get: function () { return notion_js_1.NotionLoader; } });
-var github_js_1 = require("./web/github.cjs");
-Object.defineProperty(exports, "GithubRepoLoader", { enumerable: true, get: function () { return github_js_1.GithubRepoLoader; } });
 var unstructured_js_1 = require("./fs/unstructured.cjs");
 Object.defineProperty(exports, "UnstructuredLoader", { enumerable: true, get: function () { return unstructured_js_1.UnstructuredLoader; } });

package/dist/document_loaders/index.d.ts CHANGED Viewed

@@ -15,5 +15,4 @@ export { TextLoader } from "./fs/text.js";
 export { JSONLoader, JSONLinesLoader } from "./fs/json.js";
 export { CSVLoader } from "./fs/csv.js";
 export { NotionLoader } from "./fs/notion.js";
-export { GithubRepoLoader, GithubRepoLoaderParams } from "./web/github.js";
 export { UnstructuredLoader } from "./fs/unstructured.js";

package/dist/document_loaders/index.js CHANGED Viewed

@@ -15,5 +15,4 @@ export { TextLoader } from "./fs/text.js";
 export { JSONLoader, JSONLinesLoader } from "./fs/json.js";
 export { CSVLoader } from "./fs/csv.js";
 export { NotionLoader } from "./fs/notion.js";
-export { GithubRepoLoader } from "./web/github.js";
 export { UnstructuredLoader } from "./fs/unstructured.js";

package/dist/document_loaders/web/github.cjs CHANGED Viewed

@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.GithubRepoLoader = void 0;
+const ignore_1 = __importDefault(require("ignore"));
 const binary_extensions_1 = __importDefault(require("binary-extensions"));
 const document_js_1 = require("../../document.cjs");
 const base_js_1 = require("../base.cjs");
@@ -17,7 +18,7 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
     constructor(githubUrl, { accessToken = typeof process !== "undefined"
         ? // eslint-disable-next-line no-process-env
             process.env?.GITHUB_ACCESS_TOKEN
-        : undefined, branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], } = {}) {
+        : undefined, branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], ignorePaths, } = {}) {
         super();
         Object.defineProperty(this, "owner", {
             enumerable: true,
@@ -73,6 +74,12 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
             writable: true,
             value: void 0
         });
+        Object.defineProperty(this, "ignore", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
         this.owner = owner;
         this.repo = repo;
@@ -82,6 +89,9 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
         this.unknown = unknown;
         this.accessToken = accessToken;
         this.ignoreFiles = ignoreFiles;
+        if (ignorePaths) {
+            this.ignore = ignore_1.default.default().add(ignorePaths);
+        }
         if (this.accessToken) {
             this.headers = {
                 Authorization: `Bearer ${this.accessToken}`,
@@ -100,38 +110,43 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
         await this.processDirectory(this.initialPath, documents);
         return documents;
     }
-    shouldIgnore(path) {
-        return this.ignoreFiles.some((pattern) => {
-            if (typeof pattern === "string") {
-                return path === pattern;
-            }
-            try {
-                return pattern.test(path);
-            }
-            catch {
-                throw new Error(`Unknown ignore file pattern: ${pattern}`);
-            }
-        });
+    async shouldIgnore(path, fileType) {
+        if (fileType !== "dir" && isBinaryPath(path)) {
+            return true;
+        }
+        if (this.ignore !== undefined) {
+            return this.ignore.ignores(path);
+        }
+        return (fileType !== "dir" &&
+            this.ignoreFiles.some((pattern) => {
+                if (typeof pattern === "string") {
+                    return path === pattern;
+                }
+                try {
+                    return pattern.test(path);
+                }
+                catch {
+                    throw new Error(`Unknown ignore file pattern: ${pattern}`);
+                }
+            }));
     }
     async processDirectory(path, documents) {
         try {
             const files = await this.fetchRepoFiles(path);
             for (const file of files) {
-                if (file.type === "dir") {
-                    if (this.recursive) {
-                        await this.processDirectory(file.path, documents);
-                    }
-                }
-                else {
-                    try {
-                        if (!isBinaryPath(file.name) && !this.shouldIgnore(file.path)) {
+                if (!(await this.shouldIgnore(file.path, file.type))) {
+                    if (file.type !== "dir") {
+                        try {
                             const fileContent = await this.fetchFileContent(file);
                             const metadata = { source: file.path };
                             documents.push(new document_js_1.Document({ pageContent: fileContent, metadata }));
                         }
+                        catch (e) {
+                            this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
+                        }
                     }
-                    catch (e) {
-                        this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
+                    else if (this.recursive) {
+                        await this.processDirectory(file.path, documents);
                     }
                 }
             }

package/dist/document_loaders/web/github.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { Ignore } from "ignore";
 import { Document } from "../../document.js";
 import { BaseDocumentLoader } from "../base.js";
 import { UnknownHandling } from "../fs/directory.js";
@@ -7,6 +8,7 @@ export interface GithubRepoLoaderParams {
     unknown?: UnknownHandling;
     accessToken?: string;
     ignoreFiles?: (string | RegExp)[];
+    ignorePaths?: string[];
 }
 export declare class GithubRepoLoader extends BaseDocumentLoader implements GithubRepoLoaderParams {
     private readonly owner;
@@ -18,10 +20,11 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
     unknown: UnknownHandling;
     accessToken?: string;
     ignoreFiles: (string | RegExp)[];
-    constructor(githubUrl: string, { accessToken, branch, recursive, unknown, ignoreFiles, }?: GithubRepoLoaderParams);
+    ignore?: Ignore;
+    constructor(githubUrl: string, { accessToken, branch, recursive, unknown, ignoreFiles, ignorePaths, }?: GithubRepoLoaderParams);
     private extractOwnerAndRepoAndPath;
     load(): Promise<Document[]>;
-    private shouldIgnore;
+    protected shouldIgnore(path: string, fileType: string): Promise<boolean>;
     private processDirectory;
     private fetchRepoFiles;
     private fetchFileContent;

package/dist/document_loaders/web/github.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import ignore from "ignore";
 import binaryExtensions from "binary-extensions";
 import { Document } from "../../document.js";
 import { BaseDocumentLoader } from "../base.js";
@@ -11,7 +12,7 @@ export class GithubRepoLoader extends BaseDocumentLoader {
     constructor(githubUrl, { accessToken = typeof process !== "undefined"
         ? // eslint-disable-next-line no-process-env
             process.env?.GITHUB_ACCESS_TOKEN
-        : undefined, branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], } = {}) {
+        : undefined, branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], ignorePaths, } = {}) {
         super();
         Object.defineProperty(this, "owner", {
             enumerable: true,
@@ -67,6 +68,12 @@ export class GithubRepoLoader extends BaseDocumentLoader {
             writable: true,
             value: void 0
         });
+        Object.defineProperty(this, "ignore", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
         this.owner = owner;
         this.repo = repo;
@@ -76,6 +83,9 @@ export class GithubRepoLoader extends BaseDocumentLoader {
         this.unknown = unknown;
         this.accessToken = accessToken;
         this.ignoreFiles = ignoreFiles;
+        if (ignorePaths) {
+            this.ignore = ignore.default().add(ignorePaths);
+        }
         if (this.accessToken) {
             this.headers = {
                 Authorization: `Bearer ${this.accessToken}`,
@@ -94,38 +104,43 @@ export class GithubRepoLoader extends BaseDocumentLoader {
         await this.processDirectory(this.initialPath, documents);
         return documents;
     }
-    shouldIgnore(path) {
-        return this.ignoreFiles.some((pattern) => {
-            if (typeof pattern === "string") {
-                return path === pattern;
-            }
-            try {
-                return pattern.test(path);
-            }
-            catch {
-                throw new Error(`Unknown ignore file pattern: ${pattern}`);
-            }
-        });
+    async shouldIgnore(path, fileType) {
+        if (fileType !== "dir" && isBinaryPath(path)) {
+            return true;
+        }
+        if (this.ignore !== undefined) {
+            return this.ignore.ignores(path);
+        }
+        return (fileType !== "dir" &&
+            this.ignoreFiles.some((pattern) => {
+                if (typeof pattern === "string") {
+                    return path === pattern;
+                }
+                try {
+                    return pattern.test(path);
+                }
+                catch {
+                    throw new Error(`Unknown ignore file pattern: ${pattern}`);
+                }
+            }));
     }
     async processDirectory(path, documents) {
         try {
             const files = await this.fetchRepoFiles(path);
             for (const file of files) {
-                if (file.type === "dir") {
-                    if (this.recursive) {
-                        await this.processDirectory(file.path, documents);
-                    }
-                }
-                else {
-                    try {
-                        if (!isBinaryPath(file.name) && !this.shouldIgnore(file.path)) {
+                if (!(await this.shouldIgnore(file.path, file.type))) {
+                    if (file.type !== "dir") {
+                        try {
                             const fileContent = await this.fetchFileContent(file);
                             const metadata = { source: file.path };
                             documents.push(new Document({ pageContent: fileContent, metadata }));
                         }
+                        catch (e) {
+                            this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
+                        }
                     }
-                    catch (e) {
-                        this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
+                    else if (this.recursive) {
+                        await this.processDirectory(file.path, documents);
                     }
                 }
             }

package/dist/llms/googlevertexai.cjs ADDED Viewed

@@ -0,0 +1,97 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.GoogleVertexAI = void 0;
+const base_js_1 = require("./base.cjs");
+const googlevertexai_connection_js_1 = require("../util/googlevertexai-connection.cjs");
+/**
+ * Enables calls to the Google Cloud's Vertex AI API to access
+ * Large Language Models.
+ *
+ * To use, you will need to have one of the following authentication
+ * methods in place:
+ * - You are logged into an account permitted to the Google Cloud project
+ *   using Vertex AI.
+ * - You are running this on a machine using a service account permitted to
+ *   the Google Cloud project using Vertex AI.
+ * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
+ *   path of a credentials file for a service account permitted to the
+ *   Google Cloud project using Vertex AI.
+ */
+class GoogleVertexAI extends base_js_1.BaseLLM {
+    constructor(fields) {
+        super(fields ?? {});
+        Object.defineProperty(this, "model", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "text-bison"
+        });
+        Object.defineProperty(this, "temperature", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: 0.7
+        });
+        Object.defineProperty(this, "maxOutputTokens", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: 1024
+        });
+        Object.defineProperty(this, "topP", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: 0.8
+        });
+        Object.defineProperty(this, "topK", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: 40
+        });
+        Object.defineProperty(this, "connection", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        this.model = fields?.model ?? this.model;
+        this.temperature = fields?.temperature ?? this.temperature;
+        this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens;
+        this.topP = fields?.topP ?? this.topP;
+        this.topK = fields?.topK ?? this.topK;
+        this.connection = new googlevertexai_connection_js_1.GoogleVertexAIConnection({ ...fields, ...this }, this.caller);
+    }
+    _llmType() {
+        return "googlevertexai";
+    }
+    async _generate(prompts, options) {
+        const generations = await Promise.all(prompts.map((prompt) => this._generatePrompt(prompt, options)));
+        return { generations };
+    }
+    async _generatePrompt(prompt, options) {
+        const instance = this.formatInstance(prompt);
+        const parameters = {
+            temperature: this.temperature,
+            topK: this.topK,
+            topP: this.topP,
+            maxOutputTokens: this.maxOutputTokens,
+        };
+        const result = await this.connection.request([instance], parameters, options);
+        const prediction = this.extractPredictionFromResponse(result);
+        return [
+            {
+                text: prediction.content,
+                generationInfo: prediction,
+            },
+        ];
+    }
+    formatInstance(prompt) {
+        return { content: prompt };
+    }
+    extractPredictionFromResponse(result) {
+        return result?.data?.predictions[0];
+    }
+}
+exports.GoogleVertexAI = GoogleVertexAI;

package/dist/llms/googlevertexai.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import { BaseLLM } from "./base.js";
+import { Generation, LLMResult } from "../schema/index.js";
+import { GoogleVertexAIBaseLLMInput, GoogleVertexAIBasePrediction, GoogleVertexAILLMResponse } from "../types/googlevertexai-types.js";
+export interface GoogleVertexAITextInput extends GoogleVertexAIBaseLLMInput {
+}
+interface GoogleVertexAILLMTextInstance {
+    content: string;
+}
+/**
+ * Models the data returned from the API call
+ */
+interface TextPrediction extends GoogleVertexAIBasePrediction {
+    content: string;
+}
+/**
+ * Enables calls to the Google Cloud's Vertex AI API to access
+ * Large Language Models.
+ *
+ * To use, you will need to have one of the following authentication
+ * methods in place:
+ * - You are logged into an account permitted to the Google Cloud project
+ *   using Vertex AI.
+ * - You are running this on a machine using a service account permitted to
+ *   the Google Cloud project using Vertex AI.
+ * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
+ *   path of a credentials file for a service account permitted to the
+ *   Google Cloud project using Vertex AI.
+ */
+export declare class GoogleVertexAI extends BaseLLM implements GoogleVertexAITextInput {
+    model: string;
+    temperature: number;
+    maxOutputTokens: number;
+    topP: number;
+    topK: number;
+    private connection;
+    constructor(fields?: GoogleVertexAITextInput);
+    _llmType(): string;
+    _generate(prompts: string[], options: this["ParsedCallOptions"]): Promise<LLMResult>;
+    _generatePrompt(prompt: string, options: this["ParsedCallOptions"]): Promise<Generation[]>;
+    formatInstance(prompt: string): GoogleVertexAILLMTextInstance;
+    extractPredictionFromResponse(result: GoogleVertexAILLMResponse<TextPrediction>): TextPrediction;
+}
+export {};

package/dist/llms/googlevertexai.js ADDED Viewed

@@ -0,0 +1,93 @@
+import { BaseLLM } from "./base.js";
+import { GoogleVertexAIConnection } from "../util/googlevertexai-connection.js";
+/**
+ * Enables calls to the Google Cloud's Vertex AI API to access
+ * Large Language Models.
+ *
+ * To use, you will need to have one of the following authentication
+ * methods in place:
+ * - You are logged into an account permitted to the Google Cloud project
+ *   using Vertex AI.
+ * - You are running this on a machine using a service account permitted to
+ *   the Google Cloud project using Vertex AI.
+ * - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
+ *   path of a credentials file for a service account permitted to the
+ *   Google Cloud project using Vertex AI.
+ */
+export class GoogleVertexAI extends BaseLLM {
+    constructor(fields) {
+        super(fields ?? {});
+        Object.defineProperty(this, "model", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "text-bison"
+        });
+        Object.defineProperty(this, "temperature", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: 0.7
+        });
+        Object.defineProperty(this, "maxOutputTokens", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: 1024
+        });
+        Object.defineProperty(this, "topP", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: 0.8
+        });
+        Object.defineProperty(this, "topK", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: 40
+        });
+        Object.defineProperty(this, "connection", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        this.model = fields?.model ?? this.model;
+        this.temperature = fields?.temperature ?? this.temperature;
+        this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens;
+        this.topP = fields?.topP ?? this.topP;
+        this.topK = fields?.topK ?? this.topK;
+        this.connection = new GoogleVertexAIConnection({ ...fields, ...this }, this.caller);
+    }
+    _llmType() {
+        return "googlevertexai";
+    }
+    async _generate(prompts, options) {
+        const generations = await Promise.all(prompts.map((prompt) => this._generatePrompt(prompt, options)));
+        return { generations };
+    }
+    async _generatePrompt(prompt, options) {
+        const instance = this.formatInstance(prompt);
+        const parameters = {
+            temperature: this.temperature,
+            topK: this.topK,
+            topP: this.topP,
+            maxOutputTokens: this.maxOutputTokens,
+        };
+        const result = await this.connection.request([instance], parameters, options);
+        const prediction = this.extractPredictionFromResponse(result);
+        return [
+            {
+                text: prediction.content,
+                generationInfo: prediction,
+            },
+        ];
+    }
+    formatInstance(prompt) {
+        return { content: prompt };
+    }
+    extractPredictionFromResponse(result) {
+        return result?.data?.predictions[0];
+    }
+}

package/dist/text_splitter.cjs CHANGED Viewed

@@ -25,14 +25,17 @@ class TextSplitter {
     }
     async createDocuments(texts,
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    metadatas = []) {
+    metadatas = [], chunkHeaderOptions = {}) {
+        // if no metadata is provided, we create an empty one for each text
         const _metadatas = metadatas.length > 0 ? metadatas : new Array(texts.length).fill({});
+        const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;
         const documents = new Array();
         for (let i = 0; i < texts.length; i += 1) {
             const text = texts[i];
             let lineCounterIndex = 1;
             let prevChunk = null;
             for (const chunk of await this.splitText(text)) {
+                let pageContent = chunkHeader;
                 // we need to count the \n that are in the text before getting removed by the splitting
                 let numberOfIntermediateNewLines = 0;
                 if (prevChunk) {
@@ -40,6 +43,9 @@ class TextSplitter {
                     const indexEndPrevChunk = text.indexOf(prevChunk) + prevChunk.length;
                     const removedNewlinesFromSplittingText = text.slice(indexEndPrevChunk, indexChunk);
                     numberOfIntermediateNewLines = (removedNewlinesFromSplittingText.match(/\n/g) || []).length;
+                    if (appendChunkOverlapHeader) {
+                        pageContent += chunkOverlapHeader;
+                    }
                 }
                 lineCounterIndex += numberOfIntermediateNewLines;
                 const newLinesCount = (chunk.match(/\n/g) || []).length;
@@ -54,8 +60,9 @@ class TextSplitter {
                     ..._metadatas[i],
                     loc,
                 };
+                pageContent += chunk;
                 documents.push(new document_js_1.Document({
-                    pageContent: chunk,
+                    pageContent,
                     metadata: metadataWithLinesNumber,
                 }));
                 lineCounterIndex += newLinesCount;
@@ -64,11 +71,11 @@ class TextSplitter {
         }
         return documents;
     }
-    async splitDocuments(documents) {
+    async splitDocuments(documents, chunkHeaderOptions = {}) {
         const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);
         const texts = selectedDocuments.map((doc) => doc.pageContent);
         const metadatas = selectedDocuments.map((doc) => doc.metadata);
-        return this.createDocuments(texts, metadatas);
+        return this.createDocuments(texts, metadatas, chunkHeaderOptions);
     }
     joinDocs(docs, separator) {
         const text = docs.join(separator).trim();

package/dist/text_splitter.d.ts CHANGED Viewed

@@ -4,13 +4,18 @@ export interface TextSplitterParams {
     chunkSize: number;
     chunkOverlap: number;
 }
+export type TextSplitterChunkHeaderOptions = {
+    chunkHeader?: string;
+    chunkOverlapHeader?: string;
+    appendChunkOverlapHeader?: boolean;
+};
 export declare abstract class TextSplitter implements TextSplitterParams {
     chunkSize: number;
     chunkOverlap: number;
     constructor(fields?: Partial<TextSplitterParams>);
     abstract splitText(text: string): Promise<string[]>;
-    createDocuments(texts: string[], metadatas?: Record<string, any>[]): Promise<Document[]>;
-    splitDocuments(documents: Document[]): Promise<Document[]>;
+    createDocuments(texts: string[], metadatas?: Record<string, any>[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise<Document[]>;
+    splitDocuments(documents: Document[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise<Document[]>;
     private joinDocs;
     mergeSplits(splits: string[], separator: string): string[];
 }

package/dist/text_splitter.js CHANGED Viewed

@@ -22,14 +22,17 @@ export class TextSplitter {
     }
     async createDocuments(texts,
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    metadatas = []) {
+    metadatas = [], chunkHeaderOptions = {}) {
+        // if no metadata is provided, we create an empty one for each text
         const _metadatas = metadatas.length > 0 ? metadatas : new Array(texts.length).fill({});
+        const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;
         const documents = new Array();
         for (let i = 0; i < texts.length; i += 1) {
             const text = texts[i];
             let lineCounterIndex = 1;
             let prevChunk = null;
             for (const chunk of await this.splitText(text)) {
+                let pageContent = chunkHeader;
                 // we need to count the \n that are in the text before getting removed by the splitting
                 let numberOfIntermediateNewLines = 0;
                 if (prevChunk) {
@@ -37,6 +40,9 @@ export class TextSplitter {
                     const indexEndPrevChunk = text.indexOf(prevChunk) + prevChunk.length;
                     const removedNewlinesFromSplittingText = text.slice(indexEndPrevChunk, indexChunk);
                     numberOfIntermediateNewLines = (removedNewlinesFromSplittingText.match(/\n/g) || []).length;
+                    if (appendChunkOverlapHeader) {
+                        pageContent += chunkOverlapHeader;
+                    }
                 }
                 lineCounterIndex += numberOfIntermediateNewLines;
                 const newLinesCount = (chunk.match(/\n/g) || []).length;
@@ -51,8 +57,9 @@ export class TextSplitter {
                     ..._metadatas[i],
                     loc,
                 };
+                pageContent += chunk;
                 documents.push(new Document({
-                    pageContent: chunk,
+                    pageContent,
                     metadata: metadataWithLinesNumber,
                 }));
                 lineCounterIndex += newLinesCount;
@@ -61,11 +68,11 @@ export class TextSplitter {
         }
         return documents;
     }
-    async splitDocuments(documents) {
+    async splitDocuments(documents, chunkHeaderOptions = {}) {
         const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);
         const texts = selectedDocuments.map((doc) => doc.pageContent);
         const metadatas = selectedDocuments.map((doc) => doc.metadata);
-        return this.createDocuments(texts, metadatas);
+        return this.createDocuments(texts, metadatas, chunkHeaderOptions);
     }
     joinDocs(docs, separator) {
         const text = docs.join(separator).trim();

package/dist/types/googlevertexai-types.cjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ "use strict";
2	+ Object.defineProperty(exports, "__esModule", { value: true });

package/dist/types/googlevertexai-types.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+import { BaseLLMParams } from "../llms/index.js";
+export interface GoogleVertexAIConnectionParams {
+    /** Hostname for the API call */
+    endpoint?: string;
+    /** Region where the LLM is stored */
+    location?: string;
+    /** Model to use */
+    model?: string;
+}
+export interface GoogleVertexAIModelParams {
+    /** Sampling temperature to use */
+    temperature?: number;
+    /**
+     * Maximum number of tokens to generate in the completion.
+     */
+    maxOutputTokens?: number;
+    /**
+     * Top-p changes how the model selects tokens for output.
+     *
+     * Tokens are selected from most probable to least until the sum
+     * of their probabilities equals the top-p value.
+     *
+     * For example, if tokens A, B, and C have a probability of
+     * .3, .2, and .1 and the top-p value is .5, then the model will
+     * select either A or B as the next token (using temperature).
+     */
+    topP?: number;
+    /**
+     * Top-k changes how the model selects tokens for output.
+     *
+     * A top-k of 1 means the selected token is the most probable among
+     * all tokens in the model’s vocabulary (also called greedy decoding),
+     * while a top-k of 3 means that the next token is selected from
+     * among the 3 most probable tokens (using temperature).
+     */
+    topK?: number;
+}
+export interface GoogleVertexAIBaseLLMInput extends BaseLLMParams, GoogleVertexAIConnectionParams, GoogleVertexAIModelParams {
+}
+export interface GoogleVertexAIBasePrediction {
+    safetyAttributes?: any;
+}
+export interface GoogleVertexAILLMResponse<PredictionType extends GoogleVertexAIBasePrediction> {
+    data: {
+        predictions: PredictionType[];
+    };
+}

package/dist/types/googlevertexai-types.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/util/googlevertexai-connection.cjs ADDED Viewed

@@ -0,0 +1,66 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.GoogleVertexAIConnection = void 0;
+const google_auth_library_1 = require("google-auth-library");
+class GoogleVertexAIConnection {
+    constructor(fields, caller) {
+        Object.defineProperty(this, "caller", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        Object.defineProperty(this, "endpoint", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "us-central1-aiplatform.googleapis.com"
+        });
+        Object.defineProperty(this, "location", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "us-central1"
+        });
+        Object.defineProperty(this, "model", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        Object.defineProperty(this, "auth", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        this.caller = caller;
+        this.endpoint = fields?.endpoint ?? this.endpoint;
+        this.location = fields?.location ?? this.location;
+        this.model = fields?.model ?? this.model;
+        this.auth = new google_auth_library_1.GoogleAuth({
+            scopes: "https://www.googleapis.com/auth/cloud-platform",
+        });
+    }
+    async request(instances, parameters, options) {
+        const client = await this.auth.getClient();
+        const projectId = await this.auth.getProjectId();
+        const url = `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:predict`;
+        const method = "POST";
+        const data = {
+            instances,
+            parameters,
+        };
+        const opts = {
+            url,
+            method,
+            data,
+        };
+        async function _request() {
+            return client.request(opts);
+        }
+        const response = await this.caller.callWithOptions({ signal: options.signal }, _request.bind(client));
+        return response;
+    }
+}
+exports.GoogleVertexAIConnection = GoogleVertexAIConnection;

package/dist/util/googlevertexai-connection.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import { GoogleAuth } from "google-auth-library";
+import { BaseLanguageModelCallOptions } from "../base_language/index.js";
+import { AsyncCaller } from "./async_caller.js";
+import { GoogleVertexAIBasePrediction, GoogleVertexAIConnectionParams, GoogleVertexAILLMResponse, GoogleVertexAIModelParams } from "../types/googlevertexai-types.js";
+export declare class GoogleVertexAIConnection<CallOptions extends BaseLanguageModelCallOptions, InstanceType, PredictionType extends GoogleVertexAIBasePrediction> implements GoogleVertexAIConnectionParams {
+    caller: AsyncCaller;
+    endpoint: string;
+    location: string;
+    model: string;
+    auth: GoogleAuth;
+    constructor(fields: GoogleVertexAIConnectionParams | undefined, caller: AsyncCaller);
+    request(instances: [InstanceType], parameters: GoogleVertexAIModelParams, options: CallOptions): Promise<GoogleVertexAILLMResponse<PredictionType>>;
+}

package/dist/util/googlevertexai-connection.js ADDED Viewed

@@ -0,0 +1,62 @@
+import { GoogleAuth } from "google-auth-library";
+export class GoogleVertexAIConnection {
+    constructor(fields, caller) {
+        Object.defineProperty(this, "caller", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        Object.defineProperty(this, "endpoint", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "us-central1-aiplatform.googleapis.com"
+        });
+        Object.defineProperty(this, "location", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "us-central1"
+        });
+        Object.defineProperty(this, "model", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        Object.defineProperty(this, "auth", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
+        this.caller = caller;
+        this.endpoint = fields?.endpoint ?? this.endpoint;
+        this.location = fields?.location ?? this.location;
+        this.model = fields?.model ?? this.model;
+        this.auth = new GoogleAuth({
+            scopes: "https://www.googleapis.com/auth/cloud-platform",
+        });
+    }
+    async request(instances, parameters, options) {
+        const client = await this.auth.getClient();
+        const projectId = await this.auth.getProjectId();
+        const url = `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:predict`;
+        const method = "POST";
+        const data = {
+            instances,
+            parameters,
+        };
+        const opts = {
+            url,
+            method,
+            data,
+        };
+        async function _request() {
+            return client.request(opts);
+        }
+        const response = await this.caller.callWithOptions({ signal: options.signal }, _request.bind(client));
+        return response;
+    }
+}

package/dist/vectorstores/chroma.cjs CHANGED Viewed

@@ -60,6 +60,12 @@ class Chroma extends base_js_1.VectorStore {
             writable: true,
             value: void 0
         });
+        Object.defineProperty(this, "filter", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         this.numDimensions = args.numDimensions;
         this.embeddings = embeddings;
         this.collectionName = ensureCollectionName(args.collectionName);
@@ -69,6 +75,7 @@ class Chroma extends base_js_1.VectorStore {
         else if ("url" in args) {
             this.url = args.url || "http://localhost:8000";
         }
+        this.filter = args.filter;
     }
     async addDocuments(documents) {
         const texts = documents.map(({ pageContent }) => pageContent);
@@ -78,9 +85,16 @@ class Chroma extends base_js_1.VectorStore {
         if (!this.collection) {
             if (!this.index) {
                 const { ChromaClient } = await Chroma.imports();
-                this.index = new ChromaClient(this.url);
+                this.index = new ChromaClient({ path: this.url });
+            }
+            try {
+                this.collection = await this.index.getOrCreateCollection({
+                    name: this.collectionName,
+                });
+            }
+            catch (err) {
+                throw new Error(`Chroma getOrCreateCollection error: ${err}`);
             }
-            this.collection = await this.index.getOrCreateCollection(this.collectionName);
         }
         return this.collection;
     }
@@ -99,13 +113,26 @@ class Chroma extends base_js_1.VectorStore {
         }
         const collection = await this.ensureCollection();
         const docstoreSize = await collection.count();
-        await collection.add(Array.from({ length: vectors.length }, (_, i) => (docstoreSize + i).toString()), vectors, documents.map(({ metadata }) => metadata), documents.map(({ pageContent }) => pageContent));
+        await collection.add({
+            ids: Array.from({ length: vectors.length }, (_, i) => (docstoreSize + i).toString()),
+            embeddings: vectors,
+            metadatas: documents.map(({ metadata }) => metadata),
+            documents: documents.map(({ pageContent }) => pageContent),
+        });
     }
-    async similaritySearchVectorWithScore(query, k) {
+    async similaritySearchVectorWithScore(query, k, filter) {
+        if (filter && this.filter) {
+            throw new Error("cannot provide both `filter` and `this.filter`");
+        }
+        const _filter = filter ?? this.filter;
         const collection = await this.ensureCollection();
         // similaritySearchVectorWithScore supports one query vector at a time
         // chroma supports multiple query vectors at a time
-        const result = await collection.query(query, k);
+        const result = await collection.query({
+            query_embeddings: query,
+            n_results: k,
+            where: { ..._filter },
+        });
         const { ids, distances, documents, metadatas } = result;
         if (!ids || !distances || !documents || !metadatas) {
             return [];
@@ -119,8 +146,8 @@ class Chroma extends base_js_1.VectorStore {
         for (let i = 0; i < firstIds.length; i += 1) {
             results.push([
                 new document_js_1.Document({
-                    pageContent: firstDocuments[i],
-                    metadata: firstMetadatas[i],
+                    pageContent: firstDocuments?.[i] ?? "",
+                    metadata: firstMetadatas?.[i] ?? {},
                 }),
                 firstDistances[i],
             ]);

package/dist/vectorstores/chroma.d.ts CHANGED Viewed

@@ -6,22 +6,26 @@ export type ChromaLibArgs = {
     url?: string;
     numDimensions?: number;
     collectionName?: string;
+    filter?: object;
 } | {
     index?: ChromaClientT;
     numDimensions?: number;
     collectionName?: string;
+    filter?: object;
 };
 export declare class Chroma extends VectorStore {
+    FilterType: object;
     index?: ChromaClientT;
     collection?: Collection;
     collectionName: string;
     numDimensions?: number;
     url: string;
+    filter?: object;
     constructor(embeddings: Embeddings, args: ChromaLibArgs);
     addDocuments(documents: Document[]): Promise<void>;
     ensureCollection(): Promise<Collection>;
     addVectors(vectors: number[][], documents: Document[]): Promise<void>;
-    similaritySearchVectorWithScore(query: number[], k: number): Promise<[Document<Record<string, any>>, number][]>;
+    similaritySearchVectorWithScore(query: number[], k: number, filter?: this["FilterType"]): Promise<[Document<Record<string, any>>, number][]>;
     static fromTexts(texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig: {
         collectionName?: string;
         url?: string;

package/dist/vectorstores/chroma.js CHANGED Viewed

@@ -34,6 +34,12 @@ export class Chroma extends VectorStore {
             writable: true,
             value: void 0
         });
+        Object.defineProperty(this, "filter", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: void 0
+        });
         this.numDimensions = args.numDimensions;
         this.embeddings = embeddings;
         this.collectionName = ensureCollectionName(args.collectionName);
@@ -43,6 +49,7 @@ export class Chroma extends VectorStore {
         else if ("url" in args) {
             this.url = args.url || "http://localhost:8000";
         }
+        this.filter = args.filter;
     }
     async addDocuments(documents) {
         const texts = documents.map(({ pageContent }) => pageContent);
@@ -52,9 +59,16 @@ export class Chroma extends VectorStore {
         if (!this.collection) {
             if (!this.index) {
                 const { ChromaClient } = await Chroma.imports();
-                this.index = new ChromaClient(this.url);
+                this.index = new ChromaClient({ path: this.url });
+            }
+            try {
+                this.collection = await this.index.getOrCreateCollection({
+                    name: this.collectionName,
+                });
+            }
+            catch (err) {
+                throw new Error(`Chroma getOrCreateCollection error: ${err}`);
             }
-            this.collection = await this.index.getOrCreateCollection(this.collectionName);
         }
         return this.collection;
     }
@@ -73,13 +87,26 @@ export class Chroma extends VectorStore {
         }
         const collection = await this.ensureCollection();
         const docstoreSize = await collection.count();
-        await collection.add(Array.from({ length: vectors.length }, (_, i) => (docstoreSize + i).toString()), vectors, documents.map(({ metadata }) => metadata), documents.map(({ pageContent }) => pageContent));
+        await collection.add({
+            ids: Array.from({ length: vectors.length }, (_, i) => (docstoreSize + i).toString()),
+            embeddings: vectors,
+            metadatas: documents.map(({ metadata }) => metadata),
+            documents: documents.map(({ pageContent }) => pageContent),
+        });
     }
-    async similaritySearchVectorWithScore(query, k) {
+    async similaritySearchVectorWithScore(query, k, filter) {
+        if (filter && this.filter) {
+            throw new Error("cannot provide both `filter` and `this.filter`");
+        }
+        const _filter = filter ?? this.filter;
         const collection = await this.ensureCollection();
         // similaritySearchVectorWithScore supports one query vector at a time
         // chroma supports multiple query vectors at a time
-        const result = await collection.query(query, k);
+        const result = await collection.query({
+            query_embeddings: query,
+            n_results: k,
+            where: { ..._filter },
+        });
         const { ids, distances, documents, metadatas } = result;
         if (!ids || !distances || !documents || !metadatas) {
             return [];
@@ -93,8 +120,8 @@ export class Chroma extends VectorStore {
         for (let i = 0; i < firstIds.length; i += 1) {
             results.push([
                 new Document({
-                    pageContent: firstDocuments[i],
-                    metadata: firstMetadatas[i],
+                    pageContent: firstDocuments?.[i] ?? "",
+                    metadata: firstMetadatas?.[i] ?? {},
                 }),
                 firstDistances[i],
             ]);

package/llms/googlevertexai.cjs ADDED Viewed

	@@ -0,0 +1 @@
1	+ module.exports = require('../dist/llms/googlevertexai.cjs');

package/llms/googlevertexai.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export * from '../dist/llms/googlevertexai.js'

package/llms/googlevertexai.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export * from '../dist/llms/googlevertexai.js'

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "langchain",
-  "version": "0.0.79",
+  "version": "0.0.80",
   "description": "Typescript bindings for langchain",
   "type": "module",
   "engines": {
@@ -85,6 +85,9 @@
     "llms/replicate.cjs",
     "llms/replicate.js",
     "llms/replicate.d.ts",
+    "llms/googlevertexai.cjs",
+    "llms/googlevertexai.js",
+    "llms/googlevertexai.d.ts",
     "llms/sagemaker_endpoint.cjs",
     "llms/sagemaker_endpoint.js",
     "llms/sagemaker_endpoint.d.ts",
@@ -377,7 +380,7 @@
     "apify-client": "^2.7.1",
     "axios": "^0.26.0",
     "cheerio": "^1.0.0-rc.12",
-    "chromadb": "^1.4.0",
+    "chromadb": "^1.4.2",
     "cohere-ai": "^5.0.2",
     "d3-dsv": "^2.0.0",
     "dotenv": "^16.0.3",
@@ -390,9 +393,11 @@
     "eslint-plugin-no-instanceof": "^1.0.1",
     "eslint-plugin-prettier": "^4.2.1",
     "faiss-node": "^0.1.1",
+    "google-auth-library": "^8.8.0",
     "graphql": "^16.6.0",
     "hnswlib-node": "^1.4.2",
     "html-to-text": "^9.0.5",
+    "ignore": "^5.2.0",
     "jest": "^29.5.0",
     "mammoth": "^1.5.1",
     "meriyah": "^4.3.7",
@@ -431,13 +436,15 @@
     "apify-client": "^2.7.1",
     "axios": "*",
     "cheerio": "^1.0.0-rc.12",
-    "chromadb": "^1.4.0",
+    "chromadb": "^1.4.2",
     "cohere-ai": "^5.0.2",
     "d3-dsv": "^2.0.0",
     "epub2": "^3.0.1",
     "faiss-node": "^0.1.1",
+    "google-auth-library": "^8.8.0",
     "hnswlib-node": "^1.4.2",
     "html-to-text": "^9.0.5",
+    "ignore": "^5.2.0",
     "mammoth": "*",
     "meriyah": "*",
     "mongodb": "^5.2.0",
@@ -524,6 +531,9 @@
     "html-to-text": {
       "optional": true
     },
+    "ignore": {
+      "optional": true
+    },
     "mammoth": {
       "optional": true
     },
@@ -731,6 +741,11 @@
       "import": "./llms/replicate.js",
       "require": "./llms/replicate.cjs"
     },
+    "./llms/googlevertexai": {
+      "types": "./llms/googlevertexai.d.ts",
+      "import": "./llms/googlevertexai.js",
+      "require": "./llms/googlevertexai.cjs"
+    },
     "./llms/sagemaker_endpoint": {
       "types": "./llms/sagemaker_endpoint.d.ts",
       "import": "./llms/sagemaker_endpoint.js",