@truto/truto-jsonata 1.0.50 → 1.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/browser/index.js +59 -88
- package/dist/browser/index.js.map +3 -3
- package/dist/cjs/index.cjs +59 -88
- package/dist/cjs/index.cjs.map +3 -3
- package/package.json +6 -5
package/dist/cjs/index.cjs
CHANGED
|
@@ -75968,62 +75968,38 @@ async function encodingForModel(model) {
|
|
|
75968
75968
|
}
|
|
75969
75969
|
|
|
75970
75970
|
// node_modules/@langchain/textsplitters/dist/text_splitter.js
|
|
75971
|
-
|
|
75971
|
+
var TextSplitter = class extends BaseDocumentTransformer {
|
|
75972
|
+
lc_namespace = [
|
|
75973
|
+
"langchain",
|
|
75974
|
+
"document_transformers",
|
|
75975
|
+
"text_splitters"
|
|
75976
|
+
];
|
|
75977
|
+
chunkSize = 1000;
|
|
75978
|
+
chunkOverlap = 200;
|
|
75979
|
+
keepSeparator = false;
|
|
75980
|
+
lengthFunction;
|
|
75972
75981
|
constructor(fields) {
|
|
75973
75982
|
super(fields);
|
|
75974
|
-
Object.defineProperty(this, "lc_namespace", {
|
|
75975
|
-
enumerable: true,
|
|
75976
|
-
configurable: true,
|
|
75977
|
-
writable: true,
|
|
75978
|
-
value: ["langchain", "document_transformers", "text_splitters"]
|
|
75979
|
-
});
|
|
75980
|
-
Object.defineProperty(this, "chunkSize", {
|
|
75981
|
-
enumerable: true,
|
|
75982
|
-
configurable: true,
|
|
75983
|
-
writable: true,
|
|
75984
|
-
value: 1000
|
|
75985
|
-
});
|
|
75986
|
-
Object.defineProperty(this, "chunkOverlap", {
|
|
75987
|
-
enumerable: true,
|
|
75988
|
-
configurable: true,
|
|
75989
|
-
writable: true,
|
|
75990
|
-
value: 200
|
|
75991
|
-
});
|
|
75992
|
-
Object.defineProperty(this, "keepSeparator", {
|
|
75993
|
-
enumerable: true,
|
|
75994
|
-
configurable: true,
|
|
75995
|
-
writable: true,
|
|
75996
|
-
value: false
|
|
75997
|
-
});
|
|
75998
|
-
Object.defineProperty(this, "lengthFunction", {
|
|
75999
|
-
enumerable: true,
|
|
76000
|
-
configurable: true,
|
|
76001
|
-
writable: true,
|
|
76002
|
-
value: undefined
|
|
76003
|
-
});
|
|
76004
75983
|
this.chunkSize = fields?.chunkSize ?? this.chunkSize;
|
|
76005
75984
|
this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
|
|
76006
75985
|
this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator;
|
|
76007
75986
|
this.lengthFunction = fields?.lengthFunction ?? ((text) => text.length);
|
|
76008
|
-
if (this.chunkOverlap >= this.chunkSize)
|
|
75987
|
+
if (this.chunkOverlap >= this.chunkSize)
|
|
76009
75988
|
throw new Error("Cannot have chunkOverlap >= chunkSize");
|
|
76010
|
-
}
|
|
76011
75989
|
}
|
|
76012
75990
|
async transformDocuments(documents, chunkHeaderOptions = {}) {
|
|
76013
75991
|
return this.splitDocuments(documents, chunkHeaderOptions);
|
|
76014
75992
|
}
|
|
76015
75993
|
splitOnSeparator(text, separator) {
|
|
76016
75994
|
let splits;
|
|
76017
|
-
if (separator)
|
|
75995
|
+
if (separator)
|
|
76018
75996
|
if (this.keepSeparator) {
|
|
76019
75997
|
const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, "\\$&");
|
|
76020
|
-
splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
|
|
76021
|
-
} else
|
|
75998
|
+
splits = text.split(/* @__PURE__ */ new RegExp(`(?=${regexEscapedSeparator})`));
|
|
75999
|
+
} else
|
|
76022
76000
|
splits = text.split(separator);
|
|
76023
|
-
|
|
76024
|
-
} else {
|
|
76001
|
+
else
|
|
76025
76002
|
splits = text.split("");
|
|
76026
|
-
}
|
|
76027
76003
|
return splits.filter((s2) => s2 !== "");
|
|
76028
76004
|
}
|
|
76029
76005
|
async createDocuments(texts, metadatas = [], chunkHeaderOptions = {}) {
|
|
@@ -76050,9 +76026,8 @@ class TextSplitter extends BaseDocumentTransformer {
|
|
|
76050
76026
|
const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexChunk, indexEndPrevChunk);
|
|
76051
76027
|
lineCounterIndex -= numberOfIntermediateNewLines;
|
|
76052
76028
|
}
|
|
76053
|
-
if (appendChunkOverlapHeader)
|
|
76029
|
+
if (appendChunkOverlapHeader)
|
|
76054
76030
|
pageContent += chunkOverlapHeader;
|
|
76055
|
-
}
|
|
76056
76031
|
}
|
|
76057
76032
|
const newLinesCount = this.numberOfNewLines(chunk2);
|
|
76058
76033
|
const loc = _metadatas[i3].loc && typeof _metadatas[i3].loc === "object" ? { ..._metadatas[i3].loc } : {};
|
|
@@ -76097,15 +76072,13 @@ class TextSplitter extends BaseDocumentTransformer {
|
|
|
76097
76072
|
for (const d of splits) {
|
|
76098
76073
|
const _len = await this.lengthFunction(d);
|
|
76099
76074
|
if (total + _len + currentDoc.length * separator.length > this.chunkSize) {
|
|
76100
|
-
if (total > this.chunkSize)
|
|
76075
|
+
if (total > this.chunkSize)
|
|
76101
76076
|
console.warn(`Created a chunk of size ${total}, +
|
|
76102
76077
|
which is longer than the specified ${this.chunkSize}`);
|
|
76103
|
-
}
|
|
76104
76078
|
if (currentDoc.length > 0) {
|
|
76105
|
-
const
|
|
76106
|
-
if (
|
|
76107
|
-
docs.push(
|
|
76108
|
-
}
|
|
76079
|
+
const doc$1 = this.joinDocs(currentDoc, separator);
|
|
76080
|
+
if (doc$1 !== null)
|
|
76081
|
+
docs.push(doc$1);
|
|
76109
76082
|
while (total > this.chunkOverlap || total + _len + currentDoc.length * separator.length > this.chunkSize && total > 0) {
|
|
76110
76083
|
total -= await this.lengthFunction(currentDoc[0]);
|
|
76111
76084
|
currentDoc.shift();
|
|
@@ -76116,27 +76089,26 @@ which is longer than the specified ${this.chunkSize}`);
|
|
|
76116
76089
|
total += _len;
|
|
76117
76090
|
}
|
|
76118
76091
|
const doc2 = this.joinDocs(currentDoc, separator);
|
|
76119
|
-
if (doc2 !== null)
|
|
76092
|
+
if (doc2 !== null)
|
|
76120
76093
|
docs.push(doc2);
|
|
76121
|
-
}
|
|
76122
76094
|
return docs;
|
|
76123
76095
|
}
|
|
76124
|
-
}
|
|
76125
|
-
|
|
76096
|
+
};
|
|
76097
|
+
var RecursiveCharacterTextSplitter = class RecursiveCharacterTextSplitter2 extends TextSplitter {
|
|
76126
76098
|
static lc_name() {
|
|
76127
76099
|
return "RecursiveCharacterTextSplitter";
|
|
76128
76100
|
}
|
|
76101
|
+
separators = [
|
|
76102
|
+
`
|
|
76103
|
+
|
|
76104
|
+
`,
|
|
76105
|
+
`
|
|
76106
|
+
`,
|
|
76107
|
+
" ",
|
|
76108
|
+
""
|
|
76109
|
+
];
|
|
76129
76110
|
constructor(fields) {
|
|
76130
76111
|
super(fields);
|
|
76131
|
-
Object.defineProperty(this, "separators", {
|
|
76132
|
-
enumerable: true,
|
|
76133
|
-
configurable: true,
|
|
76134
|
-
writable: true,
|
|
76135
|
-
value: [`
|
|
76136
|
-
|
|
76137
|
-
`, `
|
|
76138
|
-
`, " ", ""]
|
|
76139
|
-
});
|
|
76140
76112
|
this.separators = fields?.separators ?? this.separators;
|
|
76141
76113
|
this.keepSeparator = fields?.keepSeparator ?? true;
|
|
76142
76114
|
}
|
|
@@ -76159,23 +76131,22 @@ class RecursiveCharacterTextSplitter extends TextSplitter {
|
|
|
76159
76131
|
const splits = this.splitOnSeparator(text, separator);
|
|
76160
76132
|
let goodSplits = [];
|
|
76161
76133
|
const _separator = this.keepSeparator ? "" : separator;
|
|
76162
|
-
for (const s2 of splits)
|
|
76163
|
-
if (await this.lengthFunction(s2) < this.chunkSize)
|
|
76134
|
+
for (const s2 of splits)
|
|
76135
|
+
if (await this.lengthFunction(s2) < this.chunkSize)
|
|
76164
76136
|
goodSplits.push(s2);
|
|
76165
|
-
|
|
76137
|
+
else {
|
|
76166
76138
|
if (goodSplits.length) {
|
|
76167
76139
|
const mergedText = await this.mergeSplits(goodSplits, _separator);
|
|
76168
76140
|
finalChunks.push(...mergedText);
|
|
76169
76141
|
goodSplits = [];
|
|
76170
76142
|
}
|
|
76171
|
-
if (!newSeparators)
|
|
76143
|
+
if (!newSeparators)
|
|
76172
76144
|
finalChunks.push(s2);
|
|
76173
|
-
|
|
76145
|
+
else {
|
|
76174
76146
|
const otherInfo = await this._splitText(s2, newSeparators);
|
|
76175
76147
|
finalChunks.push(...otherInfo);
|
|
76176
76148
|
}
|
|
76177
76149
|
}
|
|
76178
|
-
}
|
|
76179
76150
|
if (goodSplits.length) {
|
|
76180
76151
|
const mergedText = await this.mergeSplits(goodSplits, _separator);
|
|
76181
76152
|
finalChunks.push(...mergedText);
|
|
@@ -76186,13 +76157,13 @@ class RecursiveCharacterTextSplitter extends TextSplitter {
|
|
|
76186
76157
|
return this._splitText(text, this.separators);
|
|
76187
76158
|
}
|
|
76188
76159
|
static fromLanguage(language, options3) {
|
|
76189
|
-
return new
|
|
76160
|
+
return new RecursiveCharacterTextSplitter2({
|
|
76190
76161
|
...options3,
|
|
76191
|
-
separators:
|
|
76162
|
+
separators: RecursiveCharacterTextSplitter2.getSeparatorsForLanguage(language)
|
|
76192
76163
|
});
|
|
76193
76164
|
}
|
|
76194
76165
|
static getSeparatorsForLanguage(language) {
|
|
76195
|
-
if (language === "cpp")
|
|
76166
|
+
if (language === "cpp")
|
|
76196
76167
|
return [
|
|
76197
76168
|
`
|
|
76198
76169
|
class `,
|
|
@@ -76222,7 +76193,7 @@ case `,
|
|
|
76222
76193
|
" ",
|
|
76223
76194
|
""
|
|
76224
76195
|
];
|
|
76225
|
-
|
|
76196
|
+
else if (language === "go")
|
|
76226
76197
|
return [
|
|
76227
76198
|
`
|
|
76228
76199
|
func `,
|
|
@@ -76248,7 +76219,7 @@ case `,
|
|
|
76248
76219
|
" ",
|
|
76249
76220
|
""
|
|
76250
76221
|
];
|
|
76251
|
-
|
|
76222
|
+
else if (language === "java")
|
|
76252
76223
|
return [
|
|
76253
76224
|
`
|
|
76254
76225
|
class `,
|
|
@@ -76278,7 +76249,7 @@ case `,
|
|
|
76278
76249
|
" ",
|
|
76279
76250
|
""
|
|
76280
76251
|
];
|
|
76281
|
-
|
|
76252
|
+
else if (language === "js")
|
|
76282
76253
|
return [
|
|
76283
76254
|
`
|
|
76284
76255
|
function `,
|
|
@@ -76310,7 +76281,7 @@ default `,
|
|
|
76310
76281
|
" ",
|
|
76311
76282
|
""
|
|
76312
76283
|
];
|
|
76313
|
-
|
|
76284
|
+
else if (language === "php")
|
|
76314
76285
|
return [
|
|
76315
76286
|
`
|
|
76316
76287
|
function `,
|
|
@@ -76336,7 +76307,7 @@ case `,
|
|
|
76336
76307
|
" ",
|
|
76337
76308
|
""
|
|
76338
76309
|
];
|
|
76339
|
-
|
|
76310
|
+
else if (language === "proto")
|
|
76340
76311
|
return [
|
|
76341
76312
|
`
|
|
76342
76313
|
message `,
|
|
@@ -76358,7 +76329,7 @@ syntax `,
|
|
|
76358
76329
|
" ",
|
|
76359
76330
|
""
|
|
76360
76331
|
];
|
|
76361
|
-
|
|
76332
|
+
else if (language === "python")
|
|
76362
76333
|
return [
|
|
76363
76334
|
`
|
|
76364
76335
|
class `,
|
|
@@ -76374,7 +76345,7 @@ def `,
|
|
|
76374
76345
|
" ",
|
|
76375
76346
|
""
|
|
76376
76347
|
];
|
|
76377
|
-
|
|
76348
|
+
else if (language === "rst")
|
|
76378
76349
|
return [
|
|
76379
76350
|
`
|
|
76380
76351
|
===
|
|
@@ -76395,7 +76366,7 @@ def `,
|
|
|
76395
76366
|
" ",
|
|
76396
76367
|
""
|
|
76397
76368
|
];
|
|
76398
|
-
|
|
76369
|
+
else if (language === "ruby")
|
|
76399
76370
|
return [
|
|
76400
76371
|
`
|
|
76401
76372
|
def `,
|
|
@@ -76423,7 +76394,7 @@ rescue `,
|
|
|
76423
76394
|
" ",
|
|
76424
76395
|
""
|
|
76425
76396
|
];
|
|
76426
|
-
|
|
76397
|
+
else if (language === "rust")
|
|
76427
76398
|
return [
|
|
76428
76399
|
`
|
|
76429
76400
|
fn `,
|
|
@@ -76451,7 +76422,7 @@ const `,
|
|
|
76451
76422
|
" ",
|
|
76452
76423
|
""
|
|
76453
76424
|
];
|
|
76454
|
-
|
|
76425
|
+
else if (language === "scala")
|
|
76455
76426
|
return [
|
|
76456
76427
|
`
|
|
76457
76428
|
class `,
|
|
@@ -76481,7 +76452,7 @@ case `,
|
|
|
76481
76452
|
" ",
|
|
76482
76453
|
""
|
|
76483
76454
|
];
|
|
76484
|
-
|
|
76455
|
+
else if (language === "swift")
|
|
76485
76456
|
return [
|
|
76486
76457
|
`
|
|
76487
76458
|
func `,
|
|
@@ -76511,7 +76482,7 @@ case `,
|
|
|
76511
76482
|
" ",
|
|
76512
76483
|
""
|
|
76513
76484
|
];
|
|
76514
|
-
|
|
76485
|
+
else if (language === "markdown")
|
|
76515
76486
|
return [
|
|
76516
76487
|
`
|
|
76517
76488
|
## `,
|
|
@@ -76547,7 +76518,7 @@ ___
|
|
|
76547
76518
|
" ",
|
|
76548
76519
|
""
|
|
76549
76520
|
];
|
|
76550
|
-
|
|
76521
|
+
else if (language === "latex")
|
|
76551
76522
|
return [
|
|
76552
76523
|
`
|
|
76553
76524
|
\\chapter{`,
|
|
@@ -76585,7 +76556,7 @@ ___
|
|
|
76585
76556
|
" ",
|
|
76586
76557
|
""
|
|
76587
76558
|
];
|
|
76588
|
-
|
|
76559
|
+
else if (language === "html")
|
|
76589
76560
|
return [
|
|
76590
76561
|
"<body>",
|
|
76591
76562
|
"<div>",
|
|
@@ -76616,7 +76587,7 @@ ___
|
|
|
76616
76587
|
" ",
|
|
76617
76588
|
""
|
|
76618
76589
|
];
|
|
76619
|
-
|
|
76590
|
+
else if (language === "sol")
|
|
76620
76591
|
return [
|
|
76621
76592
|
`
|
|
76622
76593
|
pragma `,
|
|
@@ -76662,11 +76633,11 @@ assembly `,
|
|
|
76662
76633
|
" ",
|
|
76663
76634
|
""
|
|
76664
76635
|
];
|
|
76665
|
-
|
|
76636
|
+
else
|
|
76666
76637
|
throw new Error(`Language ${language} is not supported.`);
|
|
76667
|
-
}
|
|
76668
76638
|
}
|
|
76669
|
-
}
|
|
76639
|
+
};
|
|
76640
|
+
|
|
76670
76641
|
// src/functions/recursiveCharacterTextSplitter.ts
|
|
76671
76642
|
async function recursiveCharacterTextSplitter(text, options3 = { chunkSize: 200, chunkOverlap: 60 }) {
|
|
76672
76643
|
let textToSplit;
|
|
@@ -76931,4 +76902,4 @@ function trutoJsonata(expression) {
|
|
|
76931
76902
|
return registerJsonataExtensions(import_jsonata.default(expression));
|
|
76932
76903
|
}
|
|
76933
76904
|
|
|
76934
|
-
//# debugId=
|
|
76905
|
+
//# debugId=90F868AC30527B0264756E2164756E21
|