@mastra/rag 2.1.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/docs/SKILL.md +38 -0
- package/dist/docs/assets/SOURCE_MAP.json +6 -0
- package/dist/docs/references/docs-rag-chunking-and-embedding.md +183 -0
- package/dist/docs/references/docs-rag-graph-rag.md +215 -0
- package/dist/docs/references/docs-rag-overview.md +72 -0
- package/dist/docs/references/docs-rag-retrieval.md +515 -0
- package/dist/docs/references/reference-rag-chunk.md +221 -0
- package/dist/docs/references/reference-rag-database-config.md +261 -0
- package/dist/docs/references/reference-rag-document.md +114 -0
- package/dist/docs/references/reference-rag-extract-params.md +168 -0
- package/dist/docs/references/reference-rag-graph-rag.md +111 -0
- package/dist/docs/references/reference-rag-rerank.md +75 -0
- package/dist/docs/references/reference-rag-rerankWithScorer.md +80 -0
- package/dist/docs/references/reference-tools-document-chunker-tool.md +89 -0
- package/dist/docs/references/reference-tools-graph-rag-tool.md +182 -0
- package/dist/docs/references/reference-tools-vector-query-tool.md +459 -0
- package/dist/document/transformers/semantic-markdown.d.ts +6 -4
- package/dist/document/transformers/semantic-markdown.d.ts.map +1 -1
- package/dist/document/transformers/token.d.ts +5 -4
- package/dist/document/transformers/token.d.ts.map +1 -1
- package/dist/index.cjs +41 -26
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +41 -26
- package/dist/index.js.map +1 -1
- package/package.json +5 -5
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,
|
|
1
|
+
{"version":3,"file":"token.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/token.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,gBAAgB,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE7E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAC;AAEzC,UAAU,SAAS;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,MAAM,CAAC;IACrC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CACpC;AAED,wBAAgB,iBAAiB,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,SAAS,CAAA;CAAE,GAAG,MAAM,EAAE,CAkBvG;AAED,qBAAa,gBAAiB,SAAQ,eAAe;IACnD,OAAO,CAAC,SAAS,CAAW;IAC5B,OAAO,CAAC,YAAY,CAAmB;IACvC,OAAO,CAAC,eAAe,CAAmB;gBAE9B,EACV,YAA4B,EAC5B,SAAS,EACT,SAAS,EAAE,iBAAiB,EAC5B,cAA0B,EAC1B,iBAAyB,EACzB,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,SAAS,CAAC,EAAE,QAAQ,CAAC;QACrB,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACrC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC;QACxC,OAAO,EAAE,iBAAiB,CAAC;KAC5B;IAiBD,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,MAAM,EAAE;IAqB/C,MAAM,CAAC,YAAY,CAAC,EAClB,YAA4B,EAC5B,SAAS,EACT,OAAY,GACb,EAAE;QACD,YAAY,CAAC,EAAE,gBAAgB,CAAC;QAChC,SAAS,CAAC,EAAE,aAAa,CAAC;QAC1B,OAAO,CAAC,EAAE,iBAAiB,CAAC;KAC7B,GAAG,gBAAgB;CAwCrB"}
|
package/dist/index.cjs
CHANGED
|
@@ -6072,31 +6072,34 @@ var MarkdownHeaderTransformer = class {
|
|
|
6072
6072
|
var SemanticMarkdownTransformer = class _SemanticMarkdownTransformer extends TextTransformer {
|
|
6073
6073
|
tokenizer;
|
|
6074
6074
|
joinThreshold;
|
|
6075
|
-
|
|
6076
|
-
|
|
6075
|
+
allowedArray;
|
|
6076
|
+
disallowedArray;
|
|
6077
6077
|
constructor({
|
|
6078
6078
|
joinThreshold = 500,
|
|
6079
6079
|
encodingName = "cl100k_base",
|
|
6080
6080
|
modelName,
|
|
6081
|
+
tokenizer: existingTokenizer,
|
|
6081
6082
|
allowedSpecial = /* @__PURE__ */ new Set(),
|
|
6082
6083
|
disallowedSpecial = "all",
|
|
6083
6084
|
...baseOptions
|
|
6084
6085
|
} = {}) {
|
|
6085
6086
|
super(baseOptions);
|
|
6086
6087
|
this.joinThreshold = joinThreshold;
|
|
6087
|
-
this.
|
|
6088
|
-
this.
|
|
6089
|
-
|
|
6090
|
-
this.tokenizer =
|
|
6091
|
-
}
|
|
6092
|
-
|
|
6088
|
+
this.allowedArray = allowedSpecial === "all" ? "all" : Array.from(allowedSpecial);
|
|
6089
|
+
this.disallowedArray = disallowedSpecial === "all" ? "all" : Array.from(disallowedSpecial);
|
|
6090
|
+
if (existingTokenizer) {
|
|
6091
|
+
this.tokenizer = existingTokenizer;
|
|
6092
|
+
} else {
|
|
6093
|
+
try {
|
|
6094
|
+
this.tokenizer = modelName ? jsTiktoken.encodingForModel(modelName) : jsTiktoken.getEncoding(encodingName);
|
|
6095
|
+
} catch {
|
|
6096
|
+
throw new Error("Could not load tiktoken encoding. Please install it with `npm install js-tiktoken`.");
|
|
6097
|
+
}
|
|
6093
6098
|
}
|
|
6094
6099
|
}
|
|
6095
6100
|
countTokens(text) {
|
|
6096
|
-
const allowed = this.allowedSpecial === "all" ? "all" : Array.from(this.allowedSpecial);
|
|
6097
|
-
const disallowed = this.disallowedSpecial === "all" ? "all" : Array.from(this.disallowedSpecial);
|
|
6098
6101
|
const processedText = this.stripWhitespace ? text.trim() : text;
|
|
6099
|
-
return this.tokenizer.encode(processedText,
|
|
6102
|
+
return this.tokenizer.encode(processedText, this.allowedArray, this.disallowedArray).length;
|
|
6100
6103
|
}
|
|
6101
6104
|
splitMarkdownByHeaders(markdown) {
|
|
6102
6105
|
const sections = [];
|
|
@@ -6150,14 +6153,21 @@ var SemanticMarkdownTransformer = class _SemanticMarkdownTransformer extends Tex
|
|
|
6150
6153
|
const current = workingSections[j];
|
|
6151
6154
|
if (current.depth === depth) {
|
|
6152
6155
|
const prev = workingSections[j - 1];
|
|
6153
|
-
|
|
6154
|
-
|
|
6155
|
-
const formattedTitle = `
|
|
6156
|
+
const title = `${"#".repeat(current.depth)} ${current.title}`;
|
|
6157
|
+
const formattedTitle = `
|
|
6156
6158
|
|
|
6157
6159
|
${title}`;
|
|
6160
|
+
const headerLength = this.tokenizer.encode(
|
|
6161
|
+
`${formattedTitle}
|
|
6162
|
+
`,
|
|
6163
|
+
this.allowedArray,
|
|
6164
|
+
this.disallowedArray
|
|
6165
|
+
).length;
|
|
6166
|
+
const mergedLength = prev.length + current.length + headerLength;
|
|
6167
|
+
if (mergedLength < this.joinThreshold && prev.depth <= current.depth) {
|
|
6158
6168
|
prev.content += `${formattedTitle}
|
|
6159
6169
|
${current.content}`;
|
|
6160
|
-
prev.length =
|
|
6170
|
+
prev.length = mergedLength;
|
|
6161
6171
|
workingSections.splice(j, 1);
|
|
6162
6172
|
j--;
|
|
6163
6173
|
}
|
|
@@ -6227,6 +6237,7 @@ ${section.content}`;
|
|
|
6227
6237
|
...options,
|
|
6228
6238
|
encodingName,
|
|
6229
6239
|
modelName,
|
|
6240
|
+
tokenizer,
|
|
6230
6241
|
lengthFunction: tikTokenCounter
|
|
6231
6242
|
});
|
|
6232
6243
|
}
|
|
@@ -6481,30 +6492,33 @@ function splitTextOnTokens({ text, tokenizer }) {
|
|
|
6481
6492
|
}
|
|
6482
6493
|
var TokenTransformer = class _TokenTransformer extends TextTransformer {
|
|
6483
6494
|
tokenizer;
|
|
6484
|
-
|
|
6485
|
-
|
|
6495
|
+
allowedArray;
|
|
6496
|
+
disallowedArray;
|
|
6486
6497
|
constructor({
|
|
6487
6498
|
encodingName = "cl100k_base",
|
|
6488
6499
|
modelName,
|
|
6500
|
+
tokenizer: existingTokenizer,
|
|
6489
6501
|
allowedSpecial = /* @__PURE__ */ new Set(),
|
|
6490
6502
|
disallowedSpecial = "all",
|
|
6491
6503
|
options = {}
|
|
6492
6504
|
}) {
|
|
6493
6505
|
super(options);
|
|
6494
|
-
|
|
6495
|
-
this.tokenizer =
|
|
6496
|
-
}
|
|
6497
|
-
|
|
6506
|
+
if (existingTokenizer) {
|
|
6507
|
+
this.tokenizer = existingTokenizer;
|
|
6508
|
+
} else {
|
|
6509
|
+
try {
|
|
6510
|
+
this.tokenizer = modelName ? jsTiktoken.encodingForModel(modelName) : jsTiktoken.getEncoding(encodingName);
|
|
6511
|
+
} catch {
|
|
6512
|
+
throw new Error("Could not load tiktoken encoding. Please install it with `npm install js-tiktoken`.");
|
|
6513
|
+
}
|
|
6498
6514
|
}
|
|
6499
|
-
this.
|
|
6500
|
-
this.
|
|
6515
|
+
this.allowedArray = allowedSpecial === "all" ? "all" : Array.from(allowedSpecial);
|
|
6516
|
+
this.disallowedArray = disallowedSpecial === "all" ? "all" : Array.from(disallowedSpecial);
|
|
6501
6517
|
}
|
|
6502
6518
|
splitText({ text }) {
|
|
6503
6519
|
const encode = (text2) => {
|
|
6504
|
-
const allowed = this.allowedSpecial === "all" ? "all" : Array.from(this.allowedSpecial);
|
|
6505
|
-
const disallowed = this.disallowedSpecial === "all" ? "all" : Array.from(this.disallowedSpecial);
|
|
6506
6520
|
const processedText = this.stripWhitespace ? text2.trim() : text2;
|
|
6507
|
-
return Array.from(this.tokenizer.encode(processedText,
|
|
6521
|
+
return Array.from(this.tokenizer.encode(processedText, this.allowedArray, this.disallowedArray));
|
|
6508
6522
|
};
|
|
6509
6523
|
const decode = (tokens) => {
|
|
6510
6524
|
const text2 = this.tokenizer.decode(tokens);
|
|
@@ -6541,6 +6555,7 @@ var TokenTransformer = class _TokenTransformer extends TextTransformer {
|
|
|
6541
6555
|
return new _TokenTransformer({
|
|
6542
6556
|
encodingName,
|
|
6543
6557
|
modelName,
|
|
6558
|
+
tokenizer,
|
|
6544
6559
|
allowedSpecial: options.allowedSpecial,
|
|
6545
6560
|
disallowedSpecial: options.disallowedSpecial,
|
|
6546
6561
|
options: {
|