@mastra/rag 2.0.0-beta.1 → 2.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -0
- package/dist/document/transformers/markdown.d.ts.map +1 -1
- package/dist/graph-rag/index.d.ts +13 -2
- package/dist/graph-rag/index.d.ts.map +1 -1
- package/dist/index.cjs +32 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +32 -9
- package/dist/index.js.map +1 -1
- package/dist/tools/graph-rag.d.ts.map +1 -1
- package/dist/tools/vector-query.d.ts.map +1 -1
- package/dist/utils/convert-sources.d.ts +3 -1
- package/dist/utils/convert-sources.d.ts.map +1 -1
- package/package.json +6 -6
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,57 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 2.0.0-beta.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- Add table support to markdown transformer ([#10487](https://github.com/mastra-ai/mastra/pull/10487))
|
|
8
|
+
|
|
9
|
+
Added support for markdown tables in the `MarkdownHeaderTransformer` to prevent tables from being split in the middle during document chunking. Tables are now treated as semantic units similar to code blocks.
|
|
10
|
+
|
|
11
|
+
**Changes:**
|
|
12
|
+
- Updated `MarkdownHeaderTransformer` to detect and preserve markdown tables during chunking
|
|
13
|
+
- Tables are identified by lines containing pipe characters (`|`)
|
|
14
|
+
- Tables are kept together as a single block, preventing splits that would break table structure
|
|
15
|
+
- Added comprehensive test coverage for table handling in various scenarios
|
|
16
|
+
- Works with both simple and complex tables, including multi-row tables and tables with different formatting
|
|
17
|
+
|
|
18
|
+
**Usage:**
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
import { MDocument } from '@mastra/rag';
|
|
22
|
+
|
|
23
|
+
const doc = MDocument.fromMarkdown(`
|
|
24
|
+
# Data Report
|
|
25
|
+
|
|
26
|
+
## Results
|
|
27
|
+
|
|
28
|
+
| Name | Score | Status |
|
|
29
|
+
|------|-------|--------|
|
|
30
|
+
| Alice | 95 | Pass |
|
|
31
|
+
| Bob | 87 | Pass |
|
|
32
|
+
| Carol | 78 | Pass |
|
|
33
|
+
|
|
34
|
+
## Summary
|
|
35
|
+
|
|
36
|
+
The results show...
|
|
37
|
+
`);
|
|
38
|
+
|
|
39
|
+
const chunks = await doc.chunk({
|
|
40
|
+
strategy: 'markdown',
|
|
41
|
+
headers: [
|
|
42
|
+
['#', 'title'],
|
|
43
|
+
['##', 'section'],
|
|
44
|
+
],
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
// Tables will now be preserved intact within chunks
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
- Fix invalid filter handling in vector queries and graph-rag searches. Invalid filter inputs now throw explicit errors instead of silently falling back to empty filters, preventing unintended unfiltered results. ([#10375](https://github.com/mastra-ai/mastra/pull/10375))
|
|
51
|
+
|
|
52
|
+
- Updated dependencies [[`21a15de`](https://github.com/mastra-ai/mastra/commit/21a15de369fe82aac26bb642ed7be73505475e8b), [`feb7ee4`](https://github.com/mastra-ai/mastra/commit/feb7ee4d09a75edb46c6669a3beaceec78811747), [`b0e2ea5`](https://github.com/mastra-ai/mastra/commit/b0e2ea5b52c40fae438b9e2f7baee6f0f89c5442), [`c456e01`](https://github.com/mastra-ai/mastra/commit/c456e0149e3c176afcefdbd9bb1d2c5917723725), [`ab035c2`](https://github.com/mastra-ai/mastra/commit/ab035c2ef6d8cc7bb25f06f1a38508bd9e6f126b), [`1a46a56`](https://github.com/mastra-ai/mastra/commit/1a46a566f45a3fcbadc1cf36bf86d351f264bfa3), [`3cf540b`](https://github.com/mastra-ai/mastra/commit/3cf540b9fbfea8f4fc8d3a2319a4e6c0b0cbfd52), [`1c6ce51`](https://github.com/mastra-ai/mastra/commit/1c6ce51f875915ab57fd36873623013699a2a65d), [`898a972`](https://github.com/mastra-ai/mastra/commit/898a9727d286c2510d6b702dfd367e6aaf5c6b0f), [`a97003a`](https://github.com/mastra-ai/mastra/commit/a97003aa1cf2f4022a41912324a1e77263b326b8), [`ccc141e`](https://github.com/mastra-ai/mastra/commit/ccc141ed27da0abc3a3fc28e9e5128152e8e37f4), [`fe3b897`](https://github.com/mastra-ai/mastra/commit/fe3b897c2ccbcd2b10e81b099438c7337feddf89), [`00123ba`](https://github.com/mastra-ai/mastra/commit/00123ba96dc9e5cd0b110420ebdba56d8f237b25), [`29c4309`](https://github.com/mastra-ai/mastra/commit/29c4309f818b24304c041bcb4a8f19b5f13f6b62), [`16785ce`](https://github.com/mastra-ai/mastra/commit/16785ced928f6f22638f4488cf8a125d99211799), [`de8239b`](https://github.com/mastra-ai/mastra/commit/de8239bdcb1d8c0cfa06da21f1569912a66bbc8a), [`b5e6cd7`](https://github.com/mastra-ai/mastra/commit/b5e6cd77fc8c8e64e0494c1d06cee3d84e795d1e), [`3759cb0`](https://github.com/mastra-ai/mastra/commit/3759cb064935b5f74c65ac2f52a1145f7352899d), [`651e772`](https://github.com/mastra-ai/mastra/commit/651e772eb1475fb13e126d3fcc01751297a88214), [`b61b93f`](https://github.com/mastra-ai/mastra/commit/b61b93f9e058b11dd2eec169853175d31dbdd567), [`bae33d9`](https://github.com/mastra-ai/mastra/commit/bae33d91a63fbb64d1e80519e1fc1acaed1e9013), [`c0b731f`](https://github.com/mastra-ai/mastra/commit/c0b731fb27d712dc8582e846df5c0332a6a0c5ba), [`43ca8f2`](https://github.com/mastra-ai/mastra/commit/43ca8f2c7334851cc7b4d3d2f037d8784bfbdd5f), [`2ca67cc`](https://github.com/mastra-ai/mastra/commit/2ca67cc3bb1f6a617353fdcab197d9efebe60d6f), [`9e67002`](https://github.com/mastra-ai/mastra/commit/9e67002b52c9be19936c420a489dbee9c5fd6a78), [`35edc49`](https://github.com/mastra-ai/mastra/commit/35edc49ac0556db609189641d6341e76771b81fc)]:
|
|
53
|
+
- @mastra/core@1.0.0-beta.5
|
|
54
|
+
|
|
3
55
|
## 2.0.0-beta.1
|
|
4
56
|
|
|
5
57
|
### Patch Changes
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAGrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAa5D,qBAAa,mBAAoB,SAAQ,6BAA6B;gBACxD,OAAO,GAAE,gBAAqB;CAI3C;AAED,qBAAa,yBAAyB;IACpC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,YAAY,CAAU;gBAElB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,cAAc,GAAE,OAAe,EAAE,YAAY,GAAE,OAAc;IAM/G,OAAO,CAAC,sBAAsB;IAuD9B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAGrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAa5D,qBAAa,mBAAoB,SAAQ,6BAA6B;gBACxD,OAAO,GAAE,gBAAqB;CAI3C;AAED,qBAAa,yBAAyB;IACpC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,YAAY,CAAU;gBAElB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,cAAc,GAAE,OAAe,EAAE,YAAY,GAAE,OAAc;IAM/G,OAAO,CAAC,sBAAsB;IAuD9B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAsIjD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IAmB/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
|
|
@@ -6,11 +6,12 @@
|
|
|
6
6
|
* - Improve graph traversal and querying using types
|
|
7
7
|
*/
|
|
8
8
|
type SupportedEdgeType = 'semantic';
|
|
9
|
+
type GraphMetadata = Record<string, any>;
|
|
9
10
|
export interface GraphNode {
|
|
10
11
|
id: string;
|
|
11
12
|
content: string;
|
|
12
13
|
embedding?: number[];
|
|
13
|
-
metadata?:
|
|
14
|
+
metadata?: GraphMetadata;
|
|
14
15
|
}
|
|
15
16
|
export interface RankedNode extends GraphNode {
|
|
16
17
|
score: number;
|
|
@@ -46,11 +47,21 @@ export declare class GraphRAG {
|
|
|
46
47
|
createGraph(chunks: GraphChunk[], embeddings: GraphEmbedding[]): void;
|
|
47
48
|
private selectWeightedNeighbor;
|
|
48
49
|
private randomWalkWithRestart;
|
|
49
|
-
|
|
50
|
+
/**
|
|
51
|
+
* Query the graph with a dense embedding and optional metadata filter.
|
|
52
|
+
*
|
|
53
|
+
* @param query - The embedding vector to query.
|
|
54
|
+
* @param topK - Number of top results to return.
|
|
55
|
+
* @param randomWalkSteps - Steps for random walk reranking.
|
|
56
|
+
* @param restartProb - Restart probability for random walk.
|
|
57
|
+
* @param filter - Optional strict metadata filter. All key-value pairs must match exactly.
|
|
58
|
+
*/
|
|
59
|
+
query({ query, topK, randomWalkSteps, restartProb, filter, }: {
|
|
50
60
|
query: number[];
|
|
51
61
|
topK?: number;
|
|
52
62
|
randomWalkSteps?: number;
|
|
53
63
|
restartProb?: number;
|
|
64
|
+
filter?: Partial<GraphMetadata>;
|
|
54
65
|
}): RankedNode[];
|
|
55
66
|
}
|
|
56
67
|
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/graph-rag/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/graph-rag/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,KAAK,iBAAiB,GAAG,UAAU,CAAC;AACpC,KAAK,aAAa,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAGzC,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B;AAED,MAAM,WAAW,UAAW,SAAQ,SAAS;IAC3C,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,iBAAiB,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAAyB;IACtC,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,GAAE,MAAa,EAAE,SAAS,GAAE,MAAY;IAQ7D,OAAO,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAW9B,OAAO,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAe9B,QAAQ,IAAI,SAAS,EAAE;IAKvB,QAAQ,IAAI,SAAS,EAAE;IAIvB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IAIzC,KAAK,IAAI,IAAI;IAKb,iBAAiB,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IASvD,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,gBAAgB;IAgCxB,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,UAAU,EAAE,cAAc,EAAE;IAuC9D,OAAO,CAAC,sBAAsB;IAoB9B,OAAO,CAAC,qBAAqB;IA2C7B;;;;;;;;OAQG;IAEH,KAAK,CAAC,EACJ,KAAK,EACL,IAAS,EACT,eAAqB,EACrB,WAAkB,EAClB,MAAM,GACP,EAAE;QACD,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,MAAM,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;KACjC,GAAG,UAAU,EAAE;CA+DjB"}
|
package/dist/index.cjs
CHANGED
|
@@ -5567,6 +5567,11 @@ var MarkdownHeaderTransformer = class {
|
|
|
5567
5567
|
currentContent.push(line);
|
|
5568
5568
|
continue;
|
|
5569
5569
|
}
|
|
5570
|
+
const isTableLine = strippedLine.includes("|") && strippedLine.length > 0;
|
|
5571
|
+
if (isTableLine) {
|
|
5572
|
+
currentContent.push(line);
|
|
5573
|
+
continue;
|
|
5574
|
+
}
|
|
5570
5575
|
let headerMatched = false;
|
|
5571
5576
|
for (const [sep, name14] of this.headersToSplitOn) {
|
|
5572
5577
|
if (strippedLine.startsWith(sep) && (strippedLine.length === sep.length || strippedLine[sep.length] === " ")) {
|
|
@@ -6803,7 +6808,7 @@ var GraphRAG = class {
|
|
|
6803
6808
|
return neighbors[neighbors.length - 1]?.id;
|
|
6804
6809
|
}
|
|
6805
6810
|
// Perform random walk with restart
|
|
6806
|
-
randomWalkWithRestart(startNodeId, steps, restartProb) {
|
|
6811
|
+
randomWalkWithRestart(startNodeId, steps, restartProb, allowedNodeIds) {
|
|
6807
6812
|
const visits = /* @__PURE__ */ new Map();
|
|
6808
6813
|
let currentNodeId = startNodeId;
|
|
6809
6814
|
for (let step = 0; step < steps; step++) {
|
|
@@ -6812,7 +6817,10 @@ var GraphRAG = class {
|
|
|
6812
6817
|
currentNodeId = startNodeId;
|
|
6813
6818
|
continue;
|
|
6814
6819
|
}
|
|
6815
|
-
|
|
6820
|
+
let neighbors = this.getNeighbors(currentNodeId);
|
|
6821
|
+
if (allowedNodeIds) {
|
|
6822
|
+
neighbors = neighbors.filter((n) => allowedNodeIds.has(n.id));
|
|
6823
|
+
}
|
|
6816
6824
|
if (neighbors.length === 0) {
|
|
6817
6825
|
currentNodeId = startNodeId;
|
|
6818
6826
|
continue;
|
|
@@ -6826,12 +6834,22 @@ var GraphRAG = class {
|
|
|
6826
6834
|
}
|
|
6827
6835
|
return normalizedVisits;
|
|
6828
6836
|
}
|
|
6837
|
+
/**
|
|
6838
|
+
* Query the graph with a dense embedding and optional metadata filter.
|
|
6839
|
+
*
|
|
6840
|
+
* @param query - The embedding vector to query.
|
|
6841
|
+
* @param topK - Number of top results to return.
|
|
6842
|
+
* @param randomWalkSteps - Steps for random walk reranking.
|
|
6843
|
+
* @param restartProb - Restart probability for random walk.
|
|
6844
|
+
* @param filter - Optional strict metadata filter. All key-value pairs must match exactly.
|
|
6845
|
+
*/
|
|
6829
6846
|
// Retrieve relevant nodes using hybrid approach
|
|
6830
6847
|
query({
|
|
6831
6848
|
query,
|
|
6832
6849
|
topK = 10,
|
|
6833
6850
|
randomWalkSteps = 100,
|
|
6834
|
-
restartProb = 0.15
|
|
6851
|
+
restartProb = 0.15,
|
|
6852
|
+
filter
|
|
6835
6853
|
}) {
|
|
6836
6854
|
if (!query || query.length !== this.dimension) {
|
|
6837
6855
|
throw new Error(`Query embedding must have dimension ${this.dimension}`);
|
|
@@ -6845,15 +6863,20 @@ var GraphRAG = class {
|
|
|
6845
6863
|
if (restartProb <= 0 || restartProb >= 1) {
|
|
6846
6864
|
throw new Error("Restart probability must be between 0 and 1");
|
|
6847
6865
|
}
|
|
6848
|
-
const
|
|
6866
|
+
const filterEntries = Object.entries(filter ?? {});
|
|
6867
|
+
const matchesFilter = (node) => filterEntries.length === 0 ? true : filterEntries.every(([key, value]) => node.metadata?.[key] === value);
|
|
6868
|
+
const nodesToSearch = Array.from(this.nodes.values()).filter(matchesFilter);
|
|
6869
|
+
const similarities = nodesToSearch.map((node) => ({
|
|
6849
6870
|
node,
|
|
6850
6871
|
similarity: this.cosineSimilarity(query, node.embedding)
|
|
6851
6872
|
}));
|
|
6852
6873
|
similarities.sort((a, b) => b.similarity - a.similarity);
|
|
6853
6874
|
const topNodes = similarities.slice(0, topK);
|
|
6875
|
+
const useFilter = filterEntries.length > 0;
|
|
6876
|
+
const allowedNodeIds = useFilter ? new Set(nodesToSearch.map((n) => n.id)) : void 0;
|
|
6854
6877
|
const rerankedNodes = /* @__PURE__ */ new Map();
|
|
6855
6878
|
for (const { node, similarity } of topNodes) {
|
|
6856
|
-
const walkScores = this.randomWalkWithRestart(node.id, randomWalkSteps, restartProb);
|
|
6879
|
+
const walkScores = this.randomWalkWithRestart(node.id, randomWalkSteps, restartProb, allowedNodeIds);
|
|
6857
6880
|
for (const [nodeId, walkScore] of walkScores) {
|
|
6858
6881
|
const node2 = this.nodes.get(nodeId);
|
|
6859
6882
|
const existingScore = rerankedNodes.get(nodeId)?.score || 0;
|
|
@@ -7136,9 +7159,9 @@ var createGraphRAGTool = (options) => {
|
|
|
7136
7159
|
return typeof filter === "string" ? JSON.parse(filter) : filter;
|
|
7137
7160
|
} catch (error) {
|
|
7138
7161
|
if (logger) {
|
|
7139
|
-
logger.
|
|
7162
|
+
logger.error("Invalid filter", { filter, error });
|
|
7140
7163
|
}
|
|
7141
|
-
|
|
7164
|
+
throw new Error(`Invalid filter format: ${error instanceof Error ? error.message : String(error)}`);
|
|
7142
7165
|
}
|
|
7143
7166
|
})();
|
|
7144
7167
|
}
|
|
@@ -7263,9 +7286,9 @@ var createVectorQueryTool = (options) => {
|
|
|
7263
7286
|
return typeof filter === "string" ? JSON.parse(filter) : filter;
|
|
7264
7287
|
} catch (error) {
|
|
7265
7288
|
if (logger) {
|
|
7266
|
-
logger.
|
|
7289
|
+
logger.error("Invalid filter", { filter, error });
|
|
7267
7290
|
}
|
|
7268
|
-
|
|
7291
|
+
throw new Error(`Invalid filter format: ${error instanceof Error ? error.message : String(error)}`);
|
|
7269
7292
|
}
|
|
7270
7293
|
})();
|
|
7271
7294
|
}
|