@mastra/rag 2.0.0-beta.1 → 2.0.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +94 -0
- package/dist/document/document.d.ts.map +1 -1
- package/dist/document/transformers/html.d.ts +1 -0
- package/dist/document/transformers/html.d.ts.map +1 -1
- package/dist/document/transformers/markdown.d.ts.map +1 -1
- package/dist/graph-rag/index.d.ts +13 -2
- package/dist/graph-rag/index.d.ts.map +1 -1
- package/dist/index.cjs +86 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +86 -19
- package/dist/index.js.map +1 -1
- package/dist/tools/graph-rag.d.ts.map +1 -1
- package/dist/tools/vector-query.d.ts.map +1 -1
- package/dist/utils/convert-sources.d.ts +3 -1
- package/dist/utils/convert-sources.d.ts.map +1 -1
- package/package.json +6 -6
package/dist/index.js
CHANGED
|
@@ -266,7 +266,7 @@ Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
|
|
|
266
266
|
var defaultQuestionExtractPrompt = new PromptTemplate({
|
|
267
267
|
templateVars: ["numQuestions", "context"],
|
|
268
268
|
template: `(
|
|
269
|
-
"Given the contextual
|
|
269
|
+
"Given the contextual information below, generate {numQuestions} questions this context can provide specific answers to which are unlikely to be found elsewhere. Higher-level summaries of surrounding context may be provided as well. "
|
|
270
270
|
"Try using these summaries to generate better questions that this context can answer."
|
|
271
271
|
"---------------------"
|
|
272
272
|
"{context}"
|
|
@@ -4993,23 +4993,47 @@ var HTMLSectionTransformer = class {
|
|
|
4993
4993
|
}
|
|
4994
4994
|
return "/" + parts.join("/");
|
|
4995
4995
|
}
|
|
4996
|
+
getTextContent(element) {
|
|
4997
|
+
if (!element) return "";
|
|
4998
|
+
if (!element.tagName) {
|
|
4999
|
+
return element.text || "";
|
|
5000
|
+
}
|
|
5001
|
+
let content = element.text || "";
|
|
5002
|
+
if (element.childNodes) {
|
|
5003
|
+
for (const child of element.childNodes) {
|
|
5004
|
+
const childText = this.getTextContent(child);
|
|
5005
|
+
if (childText) {
|
|
5006
|
+
content += " " + childText;
|
|
5007
|
+
}
|
|
5008
|
+
}
|
|
5009
|
+
}
|
|
5010
|
+
return content.trim();
|
|
5011
|
+
}
|
|
4996
5012
|
splitHtmlByHeaders(htmlDoc) {
|
|
4997
5013
|
const sections = [];
|
|
4998
5014
|
const root = parse(htmlDoc);
|
|
4999
5015
|
const headers = Object.keys(this.headersToSplitOn);
|
|
5000
5016
|
const headerElements = root.querySelectorAll(headers.join(","));
|
|
5001
|
-
headerElements.forEach((headerElement
|
|
5017
|
+
headerElements.forEach((headerElement) => {
|
|
5002
5018
|
const header = headerElement.text?.trim() || "";
|
|
5003
5019
|
const tagName = headerElement.tagName;
|
|
5004
5020
|
const xpath = this.getXPath(headerElement);
|
|
5005
5021
|
let content = "";
|
|
5006
|
-
|
|
5007
|
-
|
|
5008
|
-
|
|
5009
|
-
|
|
5010
|
-
|
|
5022
|
+
const parentNode = headerElement.parentNode;
|
|
5023
|
+
if (parentNode && parentNode.childNodes) {
|
|
5024
|
+
let foundHeader = false;
|
|
5025
|
+
for (const node of parentNode.childNodes) {
|
|
5026
|
+
if (node === headerElement) {
|
|
5027
|
+
foundHeader = true;
|
|
5028
|
+
continue;
|
|
5029
|
+
}
|
|
5030
|
+
if (foundHeader && node.tagName && headers.includes(node.tagName.toLowerCase())) {
|
|
5031
|
+
break;
|
|
5032
|
+
}
|
|
5033
|
+
if (foundHeader) {
|
|
5034
|
+
content += this.getTextContent(node) + " ";
|
|
5035
|
+
}
|
|
5011
5036
|
}
|
|
5012
|
-
currentElement = currentElement.nextElementSibling;
|
|
5013
5037
|
}
|
|
5014
5038
|
content = content.trim();
|
|
5015
5039
|
sections.push({
|
|
@@ -5561,6 +5585,11 @@ var MarkdownHeaderTransformer = class {
|
|
|
5561
5585
|
currentContent.push(line);
|
|
5562
5586
|
continue;
|
|
5563
5587
|
}
|
|
5588
|
+
const isTableLine = strippedLine.includes("|") && strippedLine.length > 0;
|
|
5589
|
+
if (isTableLine) {
|
|
5590
|
+
currentContent.push(line);
|
|
5591
|
+
continue;
|
|
5592
|
+
}
|
|
5564
5593
|
let headerMatched = false;
|
|
5565
5594
|
for (const [sep, name14] of this.headersToSplitOn) {
|
|
5566
5595
|
if (strippedLine.startsWith(sep) && (strippedLine.length === sep.length || strippedLine[sep.length] === " ")) {
|
|
@@ -6381,13 +6410,33 @@ var MDocument = class _MDocument {
|
|
|
6381
6410
|
async chunkHTML(options) {
|
|
6382
6411
|
if (options?.headers?.length) {
|
|
6383
6412
|
const rt = new HTMLHeaderTransformer(options);
|
|
6384
|
-
|
|
6413
|
+
let textSplit = rt.transformDocuments(this.chunks);
|
|
6414
|
+
if (options?.maxSize) {
|
|
6415
|
+
const textSplitter = new RecursiveCharacterTransformer({
|
|
6416
|
+
maxSize: options.maxSize,
|
|
6417
|
+
overlap: options.overlap,
|
|
6418
|
+
keepSeparator: options.keepSeparator,
|
|
6419
|
+
addStartIndex: options.addStartIndex,
|
|
6420
|
+
stripWhitespace: options.stripWhitespace
|
|
6421
|
+
});
|
|
6422
|
+
textSplit = textSplitter.splitDocuments(textSplit);
|
|
6423
|
+
}
|
|
6385
6424
|
this.chunks = textSplit;
|
|
6386
6425
|
return;
|
|
6387
6426
|
}
|
|
6388
6427
|
if (options?.sections?.length) {
|
|
6389
6428
|
const rt = new HTMLSectionTransformer(options);
|
|
6390
|
-
|
|
6429
|
+
let textSplit = rt.transformDocuments(this.chunks);
|
|
6430
|
+
if (options?.maxSize) {
|
|
6431
|
+
const textSplitter = new RecursiveCharacterTransformer({
|
|
6432
|
+
maxSize: options.maxSize,
|
|
6433
|
+
overlap: options.overlap,
|
|
6434
|
+
keepSeparator: options.keepSeparator,
|
|
6435
|
+
addStartIndex: options.addStartIndex,
|
|
6436
|
+
stripWhitespace: options.stripWhitespace
|
|
6437
|
+
});
|
|
6438
|
+
textSplit = textSplitter.splitDocuments(textSplit);
|
|
6439
|
+
}
|
|
6391
6440
|
this.chunks = textSplit;
|
|
6392
6441
|
return;
|
|
6393
6442
|
}
|
|
@@ -6797,7 +6846,7 @@ var GraphRAG = class {
|
|
|
6797
6846
|
return neighbors[neighbors.length - 1]?.id;
|
|
6798
6847
|
}
|
|
6799
6848
|
// Perform random walk with restart
|
|
6800
|
-
randomWalkWithRestart(startNodeId, steps, restartProb) {
|
|
6849
|
+
randomWalkWithRestart(startNodeId, steps, restartProb, allowedNodeIds) {
|
|
6801
6850
|
const visits = /* @__PURE__ */ new Map();
|
|
6802
6851
|
let currentNodeId = startNodeId;
|
|
6803
6852
|
for (let step = 0; step < steps; step++) {
|
|
@@ -6806,7 +6855,10 @@ var GraphRAG = class {
|
|
|
6806
6855
|
currentNodeId = startNodeId;
|
|
6807
6856
|
continue;
|
|
6808
6857
|
}
|
|
6809
|
-
|
|
6858
|
+
let neighbors = this.getNeighbors(currentNodeId);
|
|
6859
|
+
if (allowedNodeIds) {
|
|
6860
|
+
neighbors = neighbors.filter((n) => allowedNodeIds.has(n.id));
|
|
6861
|
+
}
|
|
6810
6862
|
if (neighbors.length === 0) {
|
|
6811
6863
|
currentNodeId = startNodeId;
|
|
6812
6864
|
continue;
|
|
@@ -6820,12 +6872,22 @@ var GraphRAG = class {
|
|
|
6820
6872
|
}
|
|
6821
6873
|
return normalizedVisits;
|
|
6822
6874
|
}
|
|
6875
|
+
/**
|
|
6876
|
+
* Query the graph with a dense embedding and optional metadata filter.
|
|
6877
|
+
*
|
|
6878
|
+
* @param query - The embedding vector to query.
|
|
6879
|
+
* @param topK - Number of top results to return.
|
|
6880
|
+
* @param randomWalkSteps - Steps for random walk reranking.
|
|
6881
|
+
* @param restartProb - Restart probability for random walk.
|
|
6882
|
+
* @param filter - Optional strict metadata filter. All key-value pairs must match exactly.
|
|
6883
|
+
*/
|
|
6823
6884
|
// Retrieve relevant nodes using hybrid approach
|
|
6824
6885
|
query({
|
|
6825
6886
|
query,
|
|
6826
6887
|
topK = 10,
|
|
6827
6888
|
randomWalkSteps = 100,
|
|
6828
|
-
restartProb = 0.15
|
|
6889
|
+
restartProb = 0.15,
|
|
6890
|
+
filter
|
|
6829
6891
|
}) {
|
|
6830
6892
|
if (!query || query.length !== this.dimension) {
|
|
6831
6893
|
throw new Error(`Query embedding must have dimension ${this.dimension}`);
|
|
@@ -6839,15 +6901,20 @@ var GraphRAG = class {
|
|
|
6839
6901
|
if (restartProb <= 0 || restartProb >= 1) {
|
|
6840
6902
|
throw new Error("Restart probability must be between 0 and 1");
|
|
6841
6903
|
}
|
|
6842
|
-
const
|
|
6904
|
+
const filterEntries = Object.entries(filter ?? {});
|
|
6905
|
+
const matchesFilter = (node) => filterEntries.length === 0 ? true : filterEntries.every(([key, value]) => node.metadata?.[key] === value);
|
|
6906
|
+
const nodesToSearch = Array.from(this.nodes.values()).filter(matchesFilter);
|
|
6907
|
+
const similarities = nodesToSearch.map((node) => ({
|
|
6843
6908
|
node,
|
|
6844
6909
|
similarity: this.cosineSimilarity(query, node.embedding)
|
|
6845
6910
|
}));
|
|
6846
6911
|
similarities.sort((a, b) => b.similarity - a.similarity);
|
|
6847
6912
|
const topNodes = similarities.slice(0, topK);
|
|
6913
|
+
const useFilter = filterEntries.length > 0;
|
|
6914
|
+
const allowedNodeIds = useFilter ? new Set(nodesToSearch.map((n) => n.id)) : void 0;
|
|
6848
6915
|
const rerankedNodes = /* @__PURE__ */ new Map();
|
|
6849
6916
|
for (const { node, similarity } of topNodes) {
|
|
6850
|
-
const walkScores = this.randomWalkWithRestart(node.id, randomWalkSteps, restartProb);
|
|
6917
|
+
const walkScores = this.randomWalkWithRestart(node.id, randomWalkSteps, restartProb, allowedNodeIds);
|
|
6851
6918
|
for (const [nodeId, walkScore] of walkScores) {
|
|
6852
6919
|
const node2 = this.nodes.get(nodeId);
|
|
6853
6920
|
const existingScore = rerankedNodes.get(nodeId)?.score || 0;
|
|
@@ -7130,9 +7197,9 @@ var createGraphRAGTool = (options) => {
|
|
|
7130
7197
|
return typeof filter === "string" ? JSON.parse(filter) : filter;
|
|
7131
7198
|
} catch (error) {
|
|
7132
7199
|
if (logger) {
|
|
7133
|
-
logger.
|
|
7200
|
+
logger.error("Invalid filter", { filter, error });
|
|
7134
7201
|
}
|
|
7135
|
-
|
|
7202
|
+
throw new Error(`Invalid filter format: ${error instanceof Error ? error.message : String(error)}`);
|
|
7136
7203
|
}
|
|
7137
7204
|
})();
|
|
7138
7205
|
}
|
|
@@ -7257,9 +7324,9 @@ var createVectorQueryTool = (options) => {
|
|
|
7257
7324
|
return typeof filter === "string" ? JSON.parse(filter) : filter;
|
|
7258
7325
|
} catch (error) {
|
|
7259
7326
|
if (logger) {
|
|
7260
|
-
logger.
|
|
7327
|
+
logger.error("Invalid filter", { filter, error });
|
|
7261
7328
|
}
|
|
7262
|
-
|
|
7329
|
+
throw new Error(`Invalid filter format: ${error instanceof Error ? error.message : String(error)}`);
|
|
7263
7330
|
}
|
|
7264
7331
|
})();
|
|
7265
7332
|
}
|