@n8n/n8n-nodes-langchain 1.101.1 → 1.101.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nodes/text_splitters/TextSplitterTokenSplitter/TokenTextSplitter.js +1 -3
- package/dist/nodes/text_splitters/TextSplitterTokenSplitter/TokenTextSplitter.js.map +1 -1
- package/dist/utils/tokenizer/tiktoken.js +27 -30
- package/dist/utils/tokenizer/tiktoken.js.map +1 -1
- package/dist/utils/tokenizer/token-estimator.js +1 -1
- package/dist/utils/tokenizer/token-estimator.js.map +1 -1
- package/package.json +2 -2
|
@@ -50,9 +50,7 @@ class TokenTextSplitter extends import_textsplitters.TextSplitter {
|
|
|
50
50
|
return splits;
|
|
51
51
|
}
|
|
52
52
|
try {
|
|
53
|
-
|
|
54
|
-
this.tokenizer = await (0, import_tiktoken.getEncoding)(this.encodingName);
|
|
55
|
-
}
|
|
53
|
+
this.tokenizer ??= (0, import_tiktoken.getEncoding)(this.encodingName);
|
|
56
54
|
const splits = [];
|
|
57
55
|
const input_ids = this.tokenizer.encode(text, this.allowedSpecial, this.disallowedSpecial);
|
|
58
56
|
let start_idx = 0;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../nodes/text_splitters/TextSplitterTokenSplitter/TokenTextSplitter.ts"],"sourcesContent":["import type { TokenTextSplitterParams } from '@langchain/textsplitters';\nimport { TextSplitter } from '@langchain/textsplitters';\nimport
|
|
1
|
+
{"version":3,"sources":["../../../../nodes/text_splitters/TextSplitterTokenSplitter/TokenTextSplitter.ts"],"sourcesContent":["import type { TokenTextSplitterParams } from '@langchain/textsplitters';\nimport { TextSplitter } from '@langchain/textsplitters';\nimport { hasLongSequentialRepeat } from '@utils/helpers';\nimport { getEncoding } from '@utils/tokenizer/tiktoken';\nimport { estimateTextSplitsByTokens } from '@utils/tokenizer/token-estimator';\nimport type * as tiktoken from 'js-tiktoken';\n\n/**\n * Implementation of splitter which looks at tokens.\n * This is override of the LangChain TokenTextSplitter\n * to use the n8n tokenizer utility which uses local JSON encodings\n */\nexport class TokenTextSplitter extends TextSplitter implements TokenTextSplitterParams {\n\tstatic lc_name() {\n\t\treturn 'TokenTextSplitter';\n\t}\n\n\tencodingName: tiktoken.TiktokenEncoding;\n\n\tallowedSpecial: 'all' | string[];\n\n\tdisallowedSpecial: 'all' | string[];\n\n\tprivate tokenizer: tiktoken.Tiktoken | undefined;\n\n\tconstructor(fields?: Partial<TokenTextSplitterParams>) {\n\t\tsuper(fields);\n\n\t\tthis.encodingName = fields?.encodingName ?? 'cl100k_base';\n\t\tthis.allowedSpecial = fields?.allowedSpecial ?? [];\n\t\tthis.disallowedSpecial = fields?.disallowedSpecial ?? 'all';\n\t}\n\n\tasync splitText(text: string): Promise<string[]> {\n\t\ttry {\n\t\t\t// Validate input\n\t\t\tif (!text || typeof text !== 'string') {\n\t\t\t\treturn [];\n\t\t\t}\n\n\t\t\t// Check for repetitive content\n\t\t\tif (hasLongSequentialRepeat(text)) {\n\t\t\t\tconst splits = estimateTextSplitsByTokens(\n\t\t\t\t\ttext,\n\t\t\t\t\tthis.chunkSize,\n\t\t\t\t\tthis.chunkOverlap,\n\t\t\t\t\tthis.encodingName,\n\t\t\t\t);\n\t\t\t\treturn splits;\n\t\t\t}\n\n\t\t\t// Use tiktoken for normal text\n\t\t\ttry {\n\t\t\t\tthis.tokenizer ??= getEncoding(this.encodingName);\n\n\t\t\t\tconst splits: string[] = [];\n\t\t\t\tconst input_ids = this.tokenizer.encode(text, this.allowedSpecial, this.disallowedSpecial);\n\n\t\t\t\tlet start_idx = 0;\n\t\t\t\tlet chunkCount = 0;\n\n\t\t\t\twhile (start_idx < input_ids.length) {\n\t\t\t\t\tif (start_idx > 0) {\n\t\t\t\t\t\tstart_idx = Math.max(0, start_idx - this.chunkOverlap);\n\t\t\t\t\t}\n\t\t\t\t\tconst end_idx = Math.min(start_idx + this.chunkSize, input_ids.length);\n\t\t\t\t\tconst chunk_ids = input_ids.slice(start_idx, end_idx);\n\n\t\t\t\t\tsplits.push(this.tokenizer.decode(chunk_ids));\n\n\t\t\t\t\tchunkCount++;\n\t\t\t\t\tstart_idx = end_idx;\n\t\t\t\t}\n\n\t\t\t\treturn splits;\n\t\t\t} catch (tiktokenError) {\n\t\t\t\t// Fall back to character-based splitting if tiktoken fails\n\t\t\t\treturn estimateTextSplitsByTokens(\n\t\t\t\t\ttext,\n\t\t\t\t\tthis.chunkSize,\n\t\t\t\t\tthis.chunkOverlap,\n\t\t\t\t\tthis.encodingName,\n\t\t\t\t);\n\t\t\t}\n\t\t} catch (error) {\n\t\t\t// Return empty array on complete failure\n\t\t\treturn [];\n\t\t}\n\t}\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,2BAA6B;AAC7B,qBAAwC;AACxC,sBAA4B;AAC5B,6BAA2C;AAQpC,MAAM,0BAA0B,kCAAgD;AAAA,EACtF,OAAO,UAAU;AAChB,WAAO;AAAA,EACR;AAAA,EAUA,YAAY,QAA2C;AACtD,UAAM,MAAM;AAEZ,SAAK,eAAe,QAAQ,gBAAgB;AAC5C,SAAK,iBAAiB,QAAQ,kBAAkB,CAAC;AACjD,SAAK,oBAAoB,QAAQ,qBAAqB;AAAA,EACvD;AAAA,EAEA,MAAM,UAAU,MAAiC;AAChD,QAAI;AAEH,UAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACtC,eAAO,CAAC;AAAA,MACT;AAGA,cAAI,wCAAwB,IAAI,GAAG;AAClC,cAAM,aAAS;AAAA,UACd;AAAA,UACA,KAAK;AAAA,UACL,KAAK;AAAA,UACL,KAAK;AAAA,QACN;AACA,eAAO;AAAA,MACR;AAGA,UAAI;AACH,aAAK,kBAAc,6BAAY,KAAK,YAAY;AAEhD,cAAM,SAAmB,CAAC;AAC1B,cAAM,YAAY,KAAK,UAAU,OAAO,MAAM,KAAK,gBAAgB,KAAK,iBAAiB;AAEzF,YAAI,YAAY;AAChB,YAAI,aAAa;AAEjB,eAAO,YAAY,UAAU,QAAQ;AACpC,cAAI,YAAY,GAAG;AAClB,wBAAY,KAAK,IAAI,GAAG,YAAY,KAAK,YAAY;AAAA,UACtD;AACA,gBAAM,UAAU,KAAK,IAAI,YAAY,KAAK,WAAW,UAAU,MAAM;AACrE,gBAAM,YAAY,UAAU,MAAM,WAAW,OAAO;AAEpD,iBAAO,KAAK,KAAK,UAAU,OAAO,SAAS,CAAC;AAE5C;AACA,sBAAY;AAAA,QACb;AAEA,eAAO;AAAA,MACR,SAAS,eAAe;AAEvB,mBAAO;AAAA,UACN;AAAA,UACA,KAAK;AAAA,UACL,KAAK;AAAA,UACL,KAAK;AAAA,QACN;AAAA,MACD;AAAA,IACD,SAAS,OAAO;AAEf,aAAO,CAAC;AAAA,IACT;AAAA,EACD;AACD;","names":[]}
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
2
|
var __defProp = Object.defineProperty;
|
|
4
3
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
4
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
5
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
6
|
var __export = (target, all) => {
|
|
9
7
|
for (var name in all)
|
|
@@ -17,14 +15,6 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
17
15
|
}
|
|
18
16
|
return to;
|
|
19
17
|
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
18
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
19
|
var tiktoken_exports = {};
|
|
30
20
|
__export(tiktoken_exports, {
|
|
@@ -32,29 +22,36 @@ __export(tiktoken_exports, {
|
|
|
32
22
|
getEncoding: () => getEncoding
|
|
33
23
|
});
|
|
34
24
|
module.exports = __toCommonJS(tiktoken_exports);
|
|
25
|
+
var import_fs = require("fs");
|
|
35
26
|
var import_lite = require("js-tiktoken/lite");
|
|
36
|
-
var
|
|
37
|
-
var
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
27
|
+
var import_n8n_workflow = require("n8n-workflow");
|
|
28
|
+
var import_path = require("path");
|
|
29
|
+
const cache = {};
|
|
30
|
+
const loadJSONFile = (filename) => {
|
|
31
|
+
const filePath = (0, import_path.join)(__dirname, filename);
|
|
32
|
+
const content = (0, import_fs.readFileSync)(filePath, "utf-8");
|
|
33
|
+
return (0, import_n8n_workflow.jsonParse)(content);
|
|
34
|
+
};
|
|
35
|
+
function getEncoding(encoding) {
|
|
36
|
+
if (cache[encoding]) {
|
|
37
|
+
return cache[encoding];
|
|
38
|
+
}
|
|
39
|
+
let jsonData;
|
|
40
|
+
switch (encoding) {
|
|
41
|
+
case "o200k_base":
|
|
42
|
+
jsonData = loadJSONFile("./o200k_base.json");
|
|
43
|
+
break;
|
|
44
|
+
case "cl100k_base":
|
|
45
|
+
jsonData = loadJSONFile("./cl100k_base.json");
|
|
46
|
+
break;
|
|
47
|
+
default:
|
|
48
|
+
jsonData = loadJSONFile("./cl100k_base.json");
|
|
53
49
|
}
|
|
54
|
-
|
|
50
|
+
cache[encoding] = new import_lite.Tiktoken(jsonData);
|
|
51
|
+
return cache[encoding];
|
|
55
52
|
}
|
|
56
|
-
|
|
57
|
-
return
|
|
53
|
+
function encodingForModel(model) {
|
|
54
|
+
return getEncoding((0, import_lite.getEncodingNameForModel)(model));
|
|
58
55
|
}
|
|
59
56
|
// Annotate the CommonJS export names for ESM import in node:
|
|
60
57
|
0 && (module.exports = {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../utils/tokenizer/tiktoken.ts"],"sourcesContent":["import type { TiktokenBPE, TiktokenEncoding, TiktokenModel } from 'js-tiktoken/lite';\nimport { Tiktoken, getEncodingNameForModel } from 'js-tiktoken/lite';\
|
|
1
|
+
{"version":3,"sources":["../../../utils/tokenizer/tiktoken.ts"],"sourcesContent":["import { readFileSync } from 'fs';\nimport type { TiktokenBPE, TiktokenEncoding, TiktokenModel } from 'js-tiktoken/lite';\nimport { Tiktoken, getEncodingNameForModel } from 'js-tiktoken/lite';\nimport { jsonParse } from 'n8n-workflow';\nimport { join } from 'path';\n\nconst cache: Record<string, Tiktoken> = {};\n\nconst loadJSONFile = (filename: string): TiktokenBPE => {\n\tconst filePath = join(__dirname, filename);\n\tconst content = readFileSync(filePath, 'utf-8');\n\treturn jsonParse(content);\n};\n\nexport function getEncoding(encoding: TiktokenEncoding): Tiktoken {\n\tif (cache[encoding]) {\n\t\treturn cache[encoding];\n\t}\n\n\tlet jsonData: TiktokenBPE;\n\n\tswitch (encoding) {\n\t\tcase 'o200k_base':\n\t\t\tjsonData = loadJSONFile('./o200k_base.json');\n\t\t\tbreak;\n\t\tcase 'cl100k_base':\n\t\t\tjsonData = loadJSONFile('./cl100k_base.json');\n\t\t\tbreak;\n\t\tdefault:\n\t\t\t// Fall back to cl100k_base for unsupported encodings\n\t\t\tjsonData = loadJSONFile('./cl100k_base.json');\n\t}\n\n\tcache[encoding] = new Tiktoken(jsonData);\n\treturn cache[encoding];\n}\n\nexport function encodingForModel(model: TiktokenModel): Tiktoken {\n\treturn getEncoding(getEncodingNameForModel(model));\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,gBAA6B;AAE7B,kBAAkD;AAClD,0BAA0B;AAC1B,kBAAqB;AAErB,MAAM,QAAkC,CAAC;AAEzC,MAAM,eAAe,CAAC,aAAkC;AACvD,QAAM,eAAW,kBAAK,WAAW,QAAQ;AACzC,QAAM,cAAU,wBAAa,UAAU,OAAO;AAC9C,aAAO,+BAAU,OAAO;AACzB;AAEO,SAAS,YAAY,UAAsC;AACjE,MAAI,MAAM,QAAQ,GAAG;AACpB,WAAO,MAAM,QAAQ;AAAA,EACtB;AAEA,MAAI;AAEJ,UAAQ,UAAU;AAAA,IACjB,KAAK;AACJ,iBAAW,aAAa,mBAAmB;AAC3C;AAAA,IACD,KAAK;AACJ,iBAAW,aAAa,oBAAoB;AAC5C;AAAA,IACD;AAEC,iBAAW,aAAa,oBAAoB;AAAA,EAC9C;AAEA,QAAM,QAAQ,IAAI,IAAI,qBAAS,QAAQ;AACvC,SAAO,MAAM,QAAQ;AACtB;AAEO,SAAS,iBAAiB,OAAgC;AAChE,SAAO,gBAAY,qCAAwB,KAAK,CAAC;AAClD;","names":[]}
|
|
@@ -82,7 +82,7 @@ async function estimateTokensFromStringList(list, model) {
|
|
|
82
82
|
if (!Array.isArray(list)) {
|
|
83
83
|
return 0;
|
|
84
84
|
}
|
|
85
|
-
const encoder =
|
|
85
|
+
const encoder = (0, import_tiktoken.encodingForModel)(model);
|
|
86
86
|
const encodedListLength = await Promise.all(
|
|
87
87
|
list.map(async (text) => {
|
|
88
88
|
try {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../utils/tokenizer/token-estimator.ts"],"sourcesContent":["/**\n * Token estimation utilities for handling text without using tiktoken.\n * This is used as a fallback when tiktoken would be too slow (e.g., with repetitive content).\n */\n\nimport type { TiktokenModel } from 'js-tiktoken';\n\nimport { encodingForModel } from './tiktoken';\nimport { hasLongSequentialRepeat } from '../helpers';\n\n/**\n * Model-specific average characters per token ratios.\n * These are approximate values based on typical English text.\n */\nconst MODEL_CHAR_PER_TOKEN_RATIOS: Record<string, number> = {\n\t'gpt-4o': 3.8,\n\t'gpt-4': 4.0,\n\t'gpt-3.5-turbo': 4.0,\n\tcl100k_base: 4.0,\n\to200k_base: 3.5,\n\tp50k_base: 4.2,\n\tr50k_base: 4.2,\n};\n\n/**\n * Estimates the number of tokens in a text based on character count.\n * This is much faster than tiktoken but less accurate.\n *\n * @param text The text to estimate tokens for\n * @param model The model or encoding name (optional)\n * @returns Estimated number of tokens\n */\nexport function estimateTokensByCharCount(text: string, model: string = 'cl100k_base'): number {\n\ttry {\n\t\t// Validate input\n\t\tif (!text || typeof text !== 'string' || text.length === 0) {\n\t\t\treturn 0;\n\t\t}\n\n\t\t// Get the ratio for the specific model, or use default\n\t\tconst charsPerToken = MODEL_CHAR_PER_TOKEN_RATIOS[model] || 4.0;\n\n\t\t// Validate ratio\n\t\tif (!Number.isFinite(charsPerToken) || charsPerToken <= 0) {\n\t\t\t// Fallback to default ratio\n\t\t\tconst estimatedTokens = Math.ceil(text.length / 4.0);\n\t\t\treturn estimatedTokens;\n\t\t}\n\n\t\t// Calculate estimated tokens\n\t\tconst estimatedTokens = Math.ceil(text.length / charsPerToken);\n\n\t\treturn estimatedTokens;\n\t} catch (error) {\n\t\t// Return conservative estimate on error\n\t\treturn Math.ceil((text?.length || 0) / 4.0);\n\t}\n}\n\n/**\n * Estimates tokens for text splitting purposes.\n * Returns chunk boundaries based on character positions rather than token positions.\n *\n * @param text The text to split\n * @param chunkSize Target chunk size in tokens\n * @param chunkOverlap Overlap between chunks in tokens\n * @param model The model or encoding name (optional)\n * @returns Array of text chunks\n */\nexport function estimateTextSplitsByTokens(\n\ttext: string,\n\tchunkSize: number,\n\tchunkOverlap: number,\n\tmodel: string = 'cl100k_base',\n): string[] {\n\ttry {\n\t\t// Validate inputs\n\t\tif (!text || typeof text !== 'string' || text.length === 0) {\n\t\t\treturn [];\n\t\t}\n\n\t\t// Validate numeric parameters\n\t\tif (!Number.isFinite(chunkSize) || chunkSize <= 0) {\n\t\t\t// Return whole text as single chunk if invalid chunk size\n\t\t\treturn [text];\n\t\t}\n\n\t\t// Ensure overlap is valid and less than chunk size\n\t\tconst validOverlap =\n\t\t\tNumber.isFinite(chunkOverlap) && chunkOverlap >= 0\n\t\t\t\t? Math.min(chunkOverlap, chunkSize - 1)\n\t\t\t\t: 0;\n\n\t\tconst charsPerToken = MODEL_CHAR_PER_TOKEN_RATIOS[model] || 4.0;\n\t\tconst chunkSizeInChars = Math.floor(chunkSize * charsPerToken);\n\t\tconst overlapInChars = Math.floor(validOverlap * charsPerToken);\n\n\t\tconst chunks: string[] = [];\n\t\tlet start = 0;\n\n\t\twhile (start < text.length) {\n\t\t\tconst end = Math.min(start + chunkSizeInChars, text.length);\n\t\t\tchunks.push(text.slice(start, end));\n\n\t\t\tif (end >= text.length) {\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\t// Move to next chunk with overlap\n\t\t\tstart = Math.max(end - overlapInChars, start + 1);\n\t\t}\n\n\t\treturn chunks;\n\t} catch (error) {\n\t\t// Return text as single chunk on error\n\t\treturn text ? [text] : [];\n\t}\n}\n\n/**\n * Estimates the total number of tokens for a list of strings.\n * Uses tiktoken for normal text but falls back to character-based estimation\n * for repetitive content or on errors.\n *\n * @param list Array of strings to estimate tokens for\n * @param model The model or encoding name to use for estimation\n * @returns Total estimated number of tokens across all strings\n */\nexport async function estimateTokensFromStringList(\n\tlist: string[],\n\tmodel: TiktokenModel,\n): Promise<number> {\n\ttry {\n\t\t// Validate input\n\t\tif (!Array.isArray(list)) {\n\t\t\treturn 0;\n\t\t}\n\n\t\tconst encoder =
|
|
1
|
+
{"version":3,"sources":["../../../utils/tokenizer/token-estimator.ts"],"sourcesContent":["/**\n * Token estimation utilities for handling text without using tiktoken.\n * This is used as a fallback when tiktoken would be too slow (e.g., with repetitive content).\n */\n\nimport type { TiktokenModel } from 'js-tiktoken';\n\nimport { encodingForModel } from './tiktoken';\nimport { hasLongSequentialRepeat } from '../helpers';\n\n/**\n * Model-specific average characters per token ratios.\n * These are approximate values based on typical English text.\n */\nconst MODEL_CHAR_PER_TOKEN_RATIOS: Record<string, number> = {\n\t'gpt-4o': 3.8,\n\t'gpt-4': 4.0,\n\t'gpt-3.5-turbo': 4.0,\n\tcl100k_base: 4.0,\n\to200k_base: 3.5,\n\tp50k_base: 4.2,\n\tr50k_base: 4.2,\n};\n\n/**\n * Estimates the number of tokens in a text based on character count.\n * This is much faster than tiktoken but less accurate.\n *\n * @param text The text to estimate tokens for\n * @param model The model or encoding name (optional)\n * @returns Estimated number of tokens\n */\nexport function estimateTokensByCharCount(text: string, model: string = 'cl100k_base'): number {\n\ttry {\n\t\t// Validate input\n\t\tif (!text || typeof text !== 'string' || text.length === 0) {\n\t\t\treturn 0;\n\t\t}\n\n\t\t// Get the ratio for the specific model, or use default\n\t\tconst charsPerToken = MODEL_CHAR_PER_TOKEN_RATIOS[model] || 4.0;\n\n\t\t// Validate ratio\n\t\tif (!Number.isFinite(charsPerToken) || charsPerToken <= 0) {\n\t\t\t// Fallback to default ratio\n\t\t\tconst estimatedTokens = Math.ceil(text.length / 4.0);\n\t\t\treturn estimatedTokens;\n\t\t}\n\n\t\t// Calculate estimated tokens\n\t\tconst estimatedTokens = Math.ceil(text.length / charsPerToken);\n\n\t\treturn estimatedTokens;\n\t} catch (error) {\n\t\t// Return conservative estimate on error\n\t\treturn Math.ceil((text?.length || 0) / 4.0);\n\t}\n}\n\n/**\n * Estimates tokens for text splitting purposes.\n * Returns chunk boundaries based on character positions rather than token positions.\n *\n * @param text The text to split\n * @param chunkSize Target chunk size in tokens\n * @param chunkOverlap Overlap between chunks in tokens\n * @param model The model or encoding name (optional)\n * @returns Array of text chunks\n */\nexport function estimateTextSplitsByTokens(\n\ttext: string,\n\tchunkSize: number,\n\tchunkOverlap: number,\n\tmodel: string = 'cl100k_base',\n): string[] {\n\ttry {\n\t\t// Validate inputs\n\t\tif (!text || typeof text !== 'string' || text.length === 0) {\n\t\t\treturn [];\n\t\t}\n\n\t\t// Validate numeric parameters\n\t\tif (!Number.isFinite(chunkSize) || chunkSize <= 0) {\n\t\t\t// Return whole text as single chunk if invalid chunk size\n\t\t\treturn [text];\n\t\t}\n\n\t\t// Ensure overlap is valid and less than chunk size\n\t\tconst validOverlap =\n\t\t\tNumber.isFinite(chunkOverlap) && chunkOverlap >= 0\n\t\t\t\t? Math.min(chunkOverlap, chunkSize - 1)\n\t\t\t\t: 0;\n\n\t\tconst charsPerToken = MODEL_CHAR_PER_TOKEN_RATIOS[model] || 4.0;\n\t\tconst chunkSizeInChars = Math.floor(chunkSize * charsPerToken);\n\t\tconst overlapInChars = Math.floor(validOverlap * charsPerToken);\n\n\t\tconst chunks: string[] = [];\n\t\tlet start = 0;\n\n\t\twhile (start < text.length) {\n\t\t\tconst end = Math.min(start + chunkSizeInChars, text.length);\n\t\t\tchunks.push(text.slice(start, end));\n\n\t\t\tif (end >= text.length) {\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\t// Move to next chunk with overlap\n\t\t\tstart = Math.max(end - overlapInChars, start + 1);\n\t\t}\n\n\t\treturn chunks;\n\t} catch (error) {\n\t\t// Return text as single chunk on error\n\t\treturn text ? [text] : [];\n\t}\n}\n\n/**\n * Estimates the total number of tokens for a list of strings.\n * Uses tiktoken for normal text but falls back to character-based estimation\n * for repetitive content or on errors.\n *\n * @param list Array of strings to estimate tokens for\n * @param model The model or encoding name to use for estimation\n * @returns Total estimated number of tokens across all strings\n */\nexport async function estimateTokensFromStringList(\n\tlist: string[],\n\tmodel: TiktokenModel,\n): Promise<number> {\n\ttry {\n\t\t// Validate input\n\t\tif (!Array.isArray(list)) {\n\t\t\treturn 0;\n\t\t}\n\n\t\tconst encoder = encodingForModel(model);\n\t\tconst encodedListLength = await Promise.all(\n\t\t\tlist.map(async (text) => {\n\t\t\t\ttry {\n\t\t\t\t\t// Handle null/undefined text\n\t\t\t\t\tif (!text || typeof text !== 'string') {\n\t\t\t\t\t\treturn 0;\n\t\t\t\t\t}\n\n\t\t\t\t\t// Check for repetitive content\n\t\t\t\t\tif (hasLongSequentialRepeat(text)) {\n\t\t\t\t\t\tconst estimatedTokens = estimateTokensByCharCount(text, model);\n\t\t\t\t\t\treturn estimatedTokens;\n\t\t\t\t\t}\n\n\t\t\t\t\t// Use tiktoken for normal text\n\t\t\t\t\ttry {\n\t\t\t\t\t\tconst tokens = encoder.encode(text);\n\t\t\t\t\t\treturn tokens.length;\n\t\t\t\t\t} catch (encodingError) {\n\t\t\t\t\t\t// Fall back to estimation if tiktoken fails\n\t\t\t\t\t\treturn estimateTokensByCharCount(text, model);\n\t\t\t\t\t}\n\t\t\t\t} catch (itemError) {\n\t\t\t\t\t// Return 0 for individual item errors\n\t\t\t\t\treturn 0;\n\t\t\t\t}\n\t\t\t}),\n\t\t);\n\n\t\tconst totalTokens = encodedListLength.reduce((acc, curr) => acc + curr, 0);\n\n\t\treturn totalTokens;\n\t} catch (error) {\n\t\t// Return 0 on complete failure\n\t\treturn 0;\n\t}\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAOA,sBAAiC;AACjC,qBAAwC;AAMxC,MAAM,8BAAsD;AAAA,EAC3D,UAAU;AAAA,EACV,SAAS;AAAA,EACT,iBAAiB;AAAA,EACjB,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,WAAW;AAAA,EACX,WAAW;AACZ;AAUO,SAAS,0BAA0B,MAAc,QAAgB,eAAuB;AAC9F,MAAI;AAEH,QAAI,CAAC,QAAQ,OAAO,SAAS,YAAY,KAAK,WAAW,GAAG;AAC3D,aAAO;AAAA,IACR;AAGA,UAAM,gBAAgB,4BAA4B,KAAK,KAAK;AAG5D,QAAI,CAAC,OAAO,SAAS,aAAa,KAAK,iBAAiB,GAAG;AAE1D,YAAMA,mBAAkB,KAAK,KAAK,KAAK,SAAS,CAAG;AACnD,aAAOA;AAAA,IACR;AAGA,UAAM,kBAAkB,KAAK,KAAK,KAAK,SAAS,aAAa;AAE7D,WAAO;AAAA,EACR,SAAS,OAAO;AAEf,WAAO,KAAK,MAAM,MAAM,UAAU,KAAK,CAAG;AAAA,EAC3C;AACD;AAYO,SAAS,2BACf,MACA,WACA,cACA,QAAgB,eACL;AACX,MAAI;AAEH,QAAI,CAAC,QAAQ,OAAO,SAAS,YAAY,KAAK,WAAW,GAAG;AAC3D,aAAO,CAAC;AAAA,IACT;AAGA,QAAI,CAAC,OAAO,SAAS,SAAS,KAAK,aAAa,GAAG;AAElD,aAAO,CAAC,IAAI;AAAA,IACb;AAGA,UAAM,eACL,OAAO,SAAS,YAAY,KAAK,gBAAgB,IAC9C,KAAK,IAAI,cAAc,YAAY,CAAC,IACpC;AAEJ,UAAM,gBAAgB,4BAA4B,KAAK,KAAK;AAC5D,UAAM,mBAAmB,KAAK,MAAM,YAAY,aAAa;AAC7D,UAAM,iBAAiB,KAAK,MAAM,eAAe,aAAa;AAE9D,UAAM,SAAmB,CAAC;AAC1B,QAAI,QAAQ;AAEZ,WAAO,QAAQ,KAAK,QAAQ;AAC3B,YAAM,MAAM,KAAK,IAAI,QAAQ,kBAAkB,KAAK,MAAM;AAC1D,aAAO,KAAK,KAAK,MAAM,OAAO,GAAG,CAAC;AAElC,UAAI,OAAO,KAAK,QAAQ;AACvB;AAAA,MACD;AAGA,cAAQ,KAAK,IAAI,MAAM,gBAAgB,QAAQ,CAAC;AAAA,IACjD;AAEA,WAAO;AAAA,EACR,SAAS,OAAO;AAEf,WAAO,OAAO,CAAC,IAAI,IAAI,CAAC;AAAA,EACzB;AACD;AAWA,eAAsB,6BACrB,MACA,OACkB;AAClB,MAAI;AAEH,QAAI,CAAC,MAAM,QAAQ,IAAI,GAAG;AACzB,aAAO;AAAA,IACR;AAEA,UAAM,cAAU,kCAAiB,KAAK;AACtC,UAAM,oBAAoB,MAAM,QAAQ;AAAA,MACvC,KAAK,IAAI,OAAO,SAAS;AACxB,YAAI;AAEH,cAAI,CAAC,QAAQ,OAAO,SAAS,UAAU;AACtC,mBAAO;AAAA,UACR;AAGA,kBAAI,wCAAwB,IAAI,GAAG;AAClC,kBAAM,kBAAkB,0BAA0B,MAAM,KAAK;AAC7D,mBAAO;AAAA,UACR;AAGA,cAAI;AACH,kBAAM,SAAS,QAAQ,OAAO,IAAI;AAClC,mBAAO,OAAO;AAAA,UACf,SAAS,eAAe;AAEvB,mBAAO,0BAA0B,MAAM,KAAK;AAAA,UAC7C;AAAA,QACD,SAAS,WAAW;AAEnB,iBAAO;AAAA,QACR;AAAA,MACD,CAAC;AAAA,IACF;AAEA,UAAM,cAAc,kBAAkB,OAAO,CAAC,KAAK,SAAS,MAAM,MAAM,CAAC;AAEzE,WAAO;AAAA,EACR,SAAS,OAAO;AAEf,WAAO;AAAA,EACR;AACD;","names":["estimatedTokens"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@n8n/n8n-nodes-langchain",
|
|
3
|
-
"version": "1.101.
|
|
3
|
+
"version": "1.101.2",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -207,8 +207,8 @@
|
|
|
207
207
|
"zod": "3.25.67",
|
|
208
208
|
"zod-to-json-schema": "3.23.3",
|
|
209
209
|
"@n8n/client-oauth2": "0.27.0",
|
|
210
|
-
"@n8n/json-schema-to-zod": "1.4.0",
|
|
211
210
|
"@n8n/typescript-config": "1.3.0",
|
|
211
|
+
"@n8n/json-schema-to-zod": "1.4.0",
|
|
212
212
|
"n8n-workflow": "1.99.1",
|
|
213
213
|
"n8n-nodes-base": "1.100.1"
|
|
214
214
|
},
|