@mastra/rag 2.1.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/docs/SKILL.md +38 -0
- package/dist/docs/assets/SOURCE_MAP.json +6 -0
- package/dist/docs/references/docs-rag-chunking-and-embedding.md +183 -0
- package/dist/docs/references/docs-rag-graph-rag.md +215 -0
- package/dist/docs/references/docs-rag-overview.md +72 -0
- package/dist/docs/references/docs-rag-retrieval.md +515 -0
- package/dist/docs/references/reference-rag-chunk.md +221 -0
- package/dist/docs/references/reference-rag-database-config.md +261 -0
- package/dist/docs/references/reference-rag-document.md +114 -0
- package/dist/docs/references/reference-rag-extract-params.md +168 -0
- package/dist/docs/references/reference-rag-graph-rag.md +111 -0
- package/dist/docs/references/reference-rag-rerank.md +75 -0
- package/dist/docs/references/reference-rag-rerankWithScorer.md +80 -0
- package/dist/docs/references/reference-tools-document-chunker-tool.md +89 -0
- package/dist/docs/references/reference-tools-graph-rag-tool.md +182 -0
- package/dist/docs/references/reference-tools-vector-query-tool.md +459 -0
- package/dist/document/transformers/semantic-markdown.d.ts +6 -4
- package/dist/document/transformers/semantic-markdown.d.ts.map +1 -1
- package/dist/document/transformers/token.d.ts +5 -4
- package/dist/document/transformers/token.d.ts.map +1 -1
- package/dist/index.cjs +41 -26
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +41 -26
- package/dist/index.js.map +1 -1
- package/package.json +5 -5
package/dist/index.js
CHANGED
|
@@ -6066,31 +6066,34 @@ var MarkdownHeaderTransformer = class {
|
|
|
6066
6066
|
var SemanticMarkdownTransformer = class _SemanticMarkdownTransformer extends TextTransformer {
|
|
6067
6067
|
tokenizer;
|
|
6068
6068
|
joinThreshold;
|
|
6069
|
-
|
|
6070
|
-
|
|
6069
|
+
allowedArray;
|
|
6070
|
+
disallowedArray;
|
|
6071
6071
|
constructor({
|
|
6072
6072
|
joinThreshold = 500,
|
|
6073
6073
|
encodingName = "cl100k_base",
|
|
6074
6074
|
modelName,
|
|
6075
|
+
tokenizer: existingTokenizer,
|
|
6075
6076
|
allowedSpecial = /* @__PURE__ */ new Set(),
|
|
6076
6077
|
disallowedSpecial = "all",
|
|
6077
6078
|
...baseOptions
|
|
6078
6079
|
} = {}) {
|
|
6079
6080
|
super(baseOptions);
|
|
6080
6081
|
this.joinThreshold = joinThreshold;
|
|
6081
|
-
this.
|
|
6082
|
-
this.
|
|
6083
|
-
|
|
6084
|
-
this.tokenizer =
|
|
6085
|
-
}
|
|
6086
|
-
|
|
6082
|
+
this.allowedArray = allowedSpecial === "all" ? "all" : Array.from(allowedSpecial);
|
|
6083
|
+
this.disallowedArray = disallowedSpecial === "all" ? "all" : Array.from(disallowedSpecial);
|
|
6084
|
+
if (existingTokenizer) {
|
|
6085
|
+
this.tokenizer = existingTokenizer;
|
|
6086
|
+
} else {
|
|
6087
|
+
try {
|
|
6088
|
+
this.tokenizer = modelName ? encodingForModel(modelName) : getEncoding(encodingName);
|
|
6089
|
+
} catch {
|
|
6090
|
+
throw new Error("Could not load tiktoken encoding. Please install it with `npm install js-tiktoken`.");
|
|
6091
|
+
}
|
|
6087
6092
|
}
|
|
6088
6093
|
}
|
|
6089
6094
|
countTokens(text) {
|
|
6090
|
-
const allowed = this.allowedSpecial === "all" ? "all" : Array.from(this.allowedSpecial);
|
|
6091
|
-
const disallowed = this.disallowedSpecial === "all" ? "all" : Array.from(this.disallowedSpecial);
|
|
6092
6095
|
const processedText = this.stripWhitespace ? text.trim() : text;
|
|
6093
|
-
return this.tokenizer.encode(processedText,
|
|
6096
|
+
return this.tokenizer.encode(processedText, this.allowedArray, this.disallowedArray).length;
|
|
6094
6097
|
}
|
|
6095
6098
|
splitMarkdownByHeaders(markdown) {
|
|
6096
6099
|
const sections = [];
|
|
@@ -6144,14 +6147,21 @@ var SemanticMarkdownTransformer = class _SemanticMarkdownTransformer extends Tex
|
|
|
6144
6147
|
const current = workingSections[j];
|
|
6145
6148
|
if (current.depth === depth) {
|
|
6146
6149
|
const prev = workingSections[j - 1];
|
|
6147
|
-
|
|
6148
|
-
|
|
6149
|
-
const formattedTitle = `
|
|
6150
|
+
const title = `${"#".repeat(current.depth)} ${current.title}`;
|
|
6151
|
+
const formattedTitle = `
|
|
6150
6152
|
|
|
6151
6153
|
${title}`;
|
|
6154
|
+
const headerLength = this.tokenizer.encode(
|
|
6155
|
+
`${formattedTitle}
|
|
6156
|
+
`,
|
|
6157
|
+
this.allowedArray,
|
|
6158
|
+
this.disallowedArray
|
|
6159
|
+
).length;
|
|
6160
|
+
const mergedLength = prev.length + current.length + headerLength;
|
|
6161
|
+
if (mergedLength < this.joinThreshold && prev.depth <= current.depth) {
|
|
6152
6162
|
prev.content += `${formattedTitle}
|
|
6153
6163
|
${current.content}`;
|
|
6154
|
-
prev.length =
|
|
6164
|
+
prev.length = mergedLength;
|
|
6155
6165
|
workingSections.splice(j, 1);
|
|
6156
6166
|
j--;
|
|
6157
6167
|
}
|
|
@@ -6221,6 +6231,7 @@ ${section.content}`;
|
|
|
6221
6231
|
...options,
|
|
6222
6232
|
encodingName,
|
|
6223
6233
|
modelName,
|
|
6234
|
+
tokenizer,
|
|
6224
6235
|
lengthFunction: tikTokenCounter
|
|
6225
6236
|
});
|
|
6226
6237
|
}
|
|
@@ -6475,30 +6486,33 @@ function splitTextOnTokens({ text, tokenizer }) {
|
|
|
6475
6486
|
}
|
|
6476
6487
|
var TokenTransformer = class _TokenTransformer extends TextTransformer {
|
|
6477
6488
|
tokenizer;
|
|
6478
|
-
|
|
6479
|
-
|
|
6489
|
+
allowedArray;
|
|
6490
|
+
disallowedArray;
|
|
6480
6491
|
constructor({
|
|
6481
6492
|
encodingName = "cl100k_base",
|
|
6482
6493
|
modelName,
|
|
6494
|
+
tokenizer: existingTokenizer,
|
|
6483
6495
|
allowedSpecial = /* @__PURE__ */ new Set(),
|
|
6484
6496
|
disallowedSpecial = "all",
|
|
6485
6497
|
options = {}
|
|
6486
6498
|
}) {
|
|
6487
6499
|
super(options);
|
|
6488
|
-
|
|
6489
|
-
this.tokenizer =
|
|
6490
|
-
}
|
|
6491
|
-
|
|
6500
|
+
if (existingTokenizer) {
|
|
6501
|
+
this.tokenizer = existingTokenizer;
|
|
6502
|
+
} else {
|
|
6503
|
+
try {
|
|
6504
|
+
this.tokenizer = modelName ? encodingForModel(modelName) : getEncoding(encodingName);
|
|
6505
|
+
} catch {
|
|
6506
|
+
throw new Error("Could not load tiktoken encoding. Please install it with `npm install js-tiktoken`.");
|
|
6507
|
+
}
|
|
6492
6508
|
}
|
|
6493
|
-
this.
|
|
6494
|
-
this.
|
|
6509
|
+
this.allowedArray = allowedSpecial === "all" ? "all" : Array.from(allowedSpecial);
|
|
6510
|
+
this.disallowedArray = disallowedSpecial === "all" ? "all" : Array.from(disallowedSpecial);
|
|
6495
6511
|
}
|
|
6496
6512
|
splitText({ text }) {
|
|
6497
6513
|
const encode = (text2) => {
|
|
6498
|
-
const allowed = this.allowedSpecial === "all" ? "all" : Array.from(this.allowedSpecial);
|
|
6499
|
-
const disallowed = this.disallowedSpecial === "all" ? "all" : Array.from(this.disallowedSpecial);
|
|
6500
6514
|
const processedText = this.stripWhitespace ? text2.trim() : text2;
|
|
6501
|
-
return Array.from(this.tokenizer.encode(processedText,
|
|
6515
|
+
return Array.from(this.tokenizer.encode(processedText, this.allowedArray, this.disallowedArray));
|
|
6502
6516
|
};
|
|
6503
6517
|
const decode = (tokens) => {
|
|
6504
6518
|
const text2 = this.tokenizer.decode(tokens);
|
|
@@ -6535,6 +6549,7 @@ var TokenTransformer = class _TokenTransformer extends TextTransformer {
|
|
|
6535
6549
|
return new _TokenTransformer({
|
|
6536
6550
|
encodingName,
|
|
6537
6551
|
modelName,
|
|
6552
|
+
tokenizer,
|
|
6538
6553
|
allowedSpecial: options.allowedSpecial,
|
|
6539
6554
|
disallowedSpecial: options.disallowedSpecial,
|
|
6540
6555
|
options: {
|