npm - langchain - Versions diffs - 0.0.85 → 0.0.87 - Mend

langchain 0.0.85 → 0.0.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/callbacks/handlers/tracer_langchain.cjs +12 -67
package/dist/callbacks/handlers/tracer_langchain.d.ts +5 -23
package/dist/callbacks/handlers/tracer_langchain.js +12 -67
package/dist/chains/conversational_retrieval_chain.cjs +20 -2
package/dist/chains/conversational_retrieval_chain.d.ts +3 -2
package/dist/chains/conversational_retrieval_chain.js +20 -2
package/dist/client/langchainplus.cjs +37 -56
package/dist/client/langchainplus.d.ts +8 -5
package/dist/client/langchainplus.js +38 -57
package/dist/memory/zep.cjs +13 -1
package/dist/memory/zep.js +14 -2
package/dist/text_splitter.cjs +406 -79
package/dist/text_splitter.d.ts +8 -2
package/dist/text_splitter.js +405 -78
package/package.json +1 -1

package/dist/text_splitter.js CHANGED Viewed

@@ -14,12 +14,35 @@ export class TextSplitter {
             writable: true,
             value: 200
         });
+        Object.defineProperty(this, "keepSeparator", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: false
+        });
         this.chunkSize = fields?.chunkSize ?? this.chunkSize;
         this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
+        this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator;
         if (this.chunkOverlap >= this.chunkSize) {
             throw new Error("Cannot have chunkOverlap >= chunkSize");
         }
     }
+    splitOnSeparator(text, separator) {
+        let splits;
+        if (separator) {
+            if (this.keepSeparator) {
+                const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, "\\$&");
+                splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
+            }
+            else {
+                splits = text.split(separator);
+            }
+        }
+        else {
+            splits = text.split("");
+        }
+        return splits.filter((s) => s !== "");
+    }
     async createDocuments(texts,
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     metadatas = [], chunkHeaderOptions = {}) {
@@ -128,16 +151,27 @@ export class CharacterTextSplitter extends TextSplitter {
     }
     async splitText(text) {
         // First we naively split the large input into a bunch of smaller ones.
-        let splits;
-        if (this.separator) {
-            splits = text.split(this.separator);
-        }
-        else {
-            splits = text.split("");
-        }
-        return this.mergeSplits(splits, this.separator);
+        const splits = this.splitOnSeparator(text, this.separator);
+        return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator);
     }
 }
+export const SupportedTextSplitterLanguages = [
+    "cpp",
+    "go",
+    "java",
+    "js",
+    "php",
+    "proto",
+    "python",
+    "rst",
+    "ruby",
+    "rust",
+    "scala",
+    "swift",
+    "markdown",
+    "latex",
+    "html",
+];
 export class RecursiveCharacterTextSplitter extends TextSplitter {
     constructor(fields) {
         super(fields);
@@ -148,51 +182,394 @@ export class RecursiveCharacterTextSplitter extends TextSplitter {
             value: ["\n\n", "\n", " ", ""]
         });
         this.separators = fields?.separators ?? this.separators;
+        this.keepSeparator = fields?.keepSeparator ?? true;
     }
-    async splitText(text) {
+    async _splitText(text, separators) {
         const finalChunks = [];
         // Get appropriate separator to use
-        let separator = this.separators[this.separators.length - 1];
-        for (const s of this.separators) {
+        let separator = separators[separators.length - 1];
+        let newSeparators;
+        for (let i = 0; i < separators.length; i += 1) {
+            const s = separators[i];
             if (s === "") {
                 separator = s;
                 break;
             }
             if (text.includes(s)) {
                 separator = s;
+                newSeparators = separators.slice(i + 1);
                 break;
             }
         }
         // Now that we have the separator, split the text
-        let splits;
-        if (separator) {
-            splits = text.split(separator);
-        }
-        else {
-            splits = text.split("");
-        }
+        const splits = this.splitOnSeparator(text, separator);
         // Now go merging things, recursively splitting longer texts.
         let goodSplits = [];
+        const _separator = this.keepSeparator ? "" : separator;
         for (const s of splits) {
             if (s.length < this.chunkSize) {
                 goodSplits.push(s);
             }
             else {
                 if (goodSplits.length) {
-                    const mergedText = this.mergeSplits(goodSplits, separator);
+                    const mergedText = this.mergeSplits(goodSplits, _separator);
                     finalChunks.push(...mergedText);
                     goodSplits = [];
                 }
-                const otherInfo = await this.splitText(s);
-                finalChunks.push(...otherInfo);
+                if (!newSeparators) {
+                    finalChunks.push(s);
+                }
+                else {
+                    const otherInfo = await this._splitText(s, newSeparators);
+                    finalChunks.push(...otherInfo);
+                }
             }
         }
         if (goodSplits.length) {
-            const mergedText = this.mergeSplits(goodSplits, separator);
+            const mergedText = this.mergeSplits(goodSplits, _separator);
             finalChunks.push(...mergedText);
         }
         return finalChunks;
     }
+    async splitText(text) {
+        return this._splitText(text, this.separators);
+    }
+    static fromLanguage(language, options) {
+        return new RecursiveCharacterTextSplitter({
+            ...options,
+            separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language),
+        });
+    }
+    static getSeparatorsForLanguage(language) {
+        if (language === "cpp") {
+            return [
+                // Split along class definitions
+                "\nclass ",
+                // Split along function definitions
+                "\nvoid ",
+                "\nint ",
+                "\nfloat ",
+                "\ndouble ",
+                // Split along control flow statements
+                "\nif ",
+                "\nfor ",
+                "\nwhile ",
+                "\nswitch ",
+                "\ncase ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "go") {
+            return [
+                // Split along function definitions
+                "\nfunc ",
+                "\nvar ",
+                "\nconst ",
+                "\ntype ",
+                // Split along control flow statements
+                "\nif ",
+                "\nfor ",
+                "\nswitch ",
+                "\ncase ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "java") {
+            return [
+                // Split along class definitions
+                "\nclass ",
+                // Split along method definitions
+                "\npublic ",
+                "\nprotected ",
+                "\nprivate ",
+                "\nstatic ",
+                // Split along control flow statements
+                "\nif ",
+                "\nfor ",
+                "\nwhile ",
+                "\nswitch ",
+                "\ncase ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "js") {
+            return [
+                // Split along function definitions
+                "\nfunction ",
+                "\nconst ",
+                "\nlet ",
+                "\nvar ",
+                "\nclass ",
+                // Split along control flow statements
+                "\nif ",
+                "\nfor ",
+                "\nwhile ",
+                "\nswitch ",
+                "\ncase ",
+                "\ndefault ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "php") {
+            return [
+                // Split along function definitions
+                "\nfunction ",
+                // Split along class definitions
+                "\nclass ",
+                // Split along control flow statements
+                "\nif ",
+                "\nforeach ",
+                "\nwhile ",
+                "\ndo ",
+                "\nswitch ",
+                "\ncase ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "proto") {
+            return [
+                // Split along message definitions
+                "\nmessage ",
+                // Split along service definitions
+                "\nservice ",
+                // Split along enum definitions
+                "\nenum ",
+                // Split along option definitions
+                "\noption ",
+                // Split along import statements
+                "\nimport ",
+                // Split along syntax declarations
+                "\nsyntax ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "python") {
+            return [
+                // First, try to split along class definitions
+                "\nclass ",
+                "\ndef ",
+                "\n\tdef ",
+                // Now split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "rst") {
+            return [
+                // Split along section titles
+                "\n===\n",
+                "\n---\n",
+                "\n***\n",
+                // Split along directive markers
+                "\n.. ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "ruby") {
+            return [
+                // Split along method definitions
+                "\ndef ",
+                "\nclass ",
+                // Split along control flow statements
+                "\nif ",
+                "\nunless ",
+                "\nwhile ",
+                "\nfor ",
+                "\ndo ",
+                "\nbegin ",
+                "\nrescue ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "rust") {
+            return [
+                // Split along function definitions
+                "\nfn ",
+                "\nconst ",
+                "\nlet ",
+                // Split along control flow statements
+                "\nif ",
+                "\nwhile ",
+                "\nfor ",
+                "\nloop ",
+                "\nmatch ",
+                "\nconst ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "scala") {
+            return [
+                // Split along class definitions
+                "\nclass ",
+                "\nobject ",
+                // Split along method definitions
+                "\ndef ",
+                "\nval ",
+                "\nvar ",
+                // Split along control flow statements
+                "\nif ",
+                "\nfor ",
+                "\nwhile ",
+                "\nmatch ",
+                "\ncase ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "swift") {
+            return [
+                // Split along function definitions
+                "\nfunc ",
+                // Split along class definitions
+                "\nclass ",
+                "\nstruct ",
+                "\nenum ",
+                // Split along control flow statements
+                "\nif ",
+                "\nfor ",
+                "\nwhile ",
+                "\ndo ",
+                "\nswitch ",
+                "\ncase ",
+                // Split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "markdown") {
+            return [
+                // First, try to split along Markdown headings (starting with level 2)
+                "\n## ",
+                "\n### ",
+                "\n#### ",
+                "\n##### ",
+                "\n###### ",
+                // Note the alternative syntax for headings (below) is not handled here
+                // Heading level 2
+                // ---------------
+                // End of code block
+                "```\n\n",
+                // Horizontal lines
+                "\n\n***\n\n",
+                "\n\n---\n\n",
+                "\n\n___\n\n",
+                // Note that this splitter doesn't handle horizontal lines defined
+                // by *three or more* of ***, ---, or ___, but this is not handled
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "latex") {
+            return [
+                // First, try to split along Latex sections
+                "\n\\chapter{",
+                "\n\\section{",
+                "\n\\subsection{",
+                "\n\\subsubsection{",
+                // Now split by environments
+                "\n\\begin{enumerate}",
+                "\n\\begin{itemize}",
+                "\n\\begin{description}",
+                "\n\\begin{list}",
+                "\n\\begin{quote}",
+                "\n\\begin{quotation}",
+                "\n\\begin{verse}",
+                "\n\\begin{verbatim}",
+                // Now split by math environments
+                "\n\\begin{align}",
+                "$$",
+                "$",
+                // Now split by the normal type of lines
+                "\n\n",
+                "\n",
+                " ",
+                "",
+            ];
+        }
+        else if (language === "html") {
+            return [
+                // First, try to split along HTML tags
+                "<body>",
+                "<div>",
+                "<p>",
+                "<br>",
+                "<li>",
+                "<h1>",
+                "<h2>",
+                "<h3>",
+                "<h4>",
+                "<h5>",
+                "<h6>",
+                "<span>",
+                "<table>",
+                "<tr>",
+                "<td>",
+                "<th>",
+                "<ul>",
+                "<ol>",
+                "<header>",
+                "<footer>",
+                "<nav>",
+                // Head
+                "<head>",
+                "<style>",
+                "<script>",
+                "<meta>",
+                "<title>",
+                // Normal type of lines
+                " ",
+                "",
+            ];
+        }
+        else {
+            throw new Error(`Language ${language} is not supported.`);
+        }
+    }
 }
 /**
  * Implementation of splitter which looks at tokens.
@@ -248,67 +625,17 @@ export class TokenTextSplitter extends TextSplitter {
 }
 export class MarkdownTextSplitter extends RecursiveCharacterTextSplitter {
     constructor(fields) {
-        super(fields);
-        Object.defineProperty(this, "separators", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: [
-                // First, try to split along Markdown headings (starting with level 2)
-                "\n## ",
-                "\n### ",
-                "\n#### ",
-                "\n##### ",
-                "\n###### ",
-                // Note the alternative syntax for headings (below) is not handled here
-                // Heading level 2
-                // ---------------
-                // End of code block
-                "```\n\n",
-                // Horizontal lines
-                "\n\n***\n\n",
-                "\n\n---\n\n",
-                "\n\n___\n\n",
-                // Note that this splitter doesn't handle horizontal lines defined
-                // by *three or more* of ***, ---, or ___, but this is not handled
-                "\n\n",
-                "\n",
-                " ",
-                "",
-            ]
+        super({
+            ...fields,
+            separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("markdown"),
         });
     }
 }
 export class LatexTextSplitter extends RecursiveCharacterTextSplitter {
     constructor(fields) {
-        super(fields);
-        Object.defineProperty(this, "separators", {
-            enumerable: true,
-            configurable: true,
-            writable: true,
-            value: [
-                // First, try to split along Latex sections
-                "\n\\chapter{",
-                "\n\\section{",
-                "\n\\subsection{",
-                "\n\\subsubsection{",
-                // Now split by environments
-                "\n\\begin{enumerate}",
-                "\n\\begin{itemize}",
-                "\n\\begin{description}",
-                "\n\\begin{list}",
-                "\n\\begin{quote}",
-                "\n\\begin{quotation}",
-                "\n\\begin{verse}",
-                "\n\\begin{verbatim}",
-                // Now split by math environments
-                "\n\\begin{align}",
-                "$$",
-                "$",
-                // Now split by the normal type of lines
-                " ",
-                "",
-            ]
+        super({
+            ...fields,
+            separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("latex"),
         });
     }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "langchain",
-  "version": "0.0.85",
+  "version": "0.0.87",
   "description": "Typescript bindings for langchain",
   "type": "module",
   "engines": {