@mastra/rag 2.0.0-beta.1 → 2.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,57 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 2.0.0-beta.2
4
+
5
+ ### Patch Changes
6
+
7
+ - Add table support to markdown transformer ([#10487](https://github.com/mastra-ai/mastra/pull/10487))
8
+
9
+ Added support for markdown tables in the `MarkdownHeaderTransformer` to prevent tables from being split in the middle during document chunking. Tables are now treated as semantic units similar to code blocks.
10
+
11
+ **Changes:**
12
+ - Updated `MarkdownHeaderTransformer` to detect and preserve markdown tables during chunking
13
+ - Tables are identified by lines containing pipe characters (`|`)
14
+ - Tables are kept together as a single block, preventing splits that would break table structure
15
+ - Added comprehensive test coverage for table handling in various scenarios
16
+ - Works with both simple and complex tables, including multi-row tables and tables with different formatting
17
+
18
+ **Usage:**
19
+
20
+ ```typescript
21
+ import { MDocument } from '@mastra/rag';
22
+
23
+ const doc = MDocument.fromMarkdown(`
24
+ # Data Report
25
+
26
+ ## Results
27
+
28
+ | Name | Score | Status |
29
+ |------|-------|--------|
30
+ | Alice | 95 | Pass |
31
+ | Bob | 87 | Pass |
32
+ | Carol | 78 | Pass |
33
+
34
+ ## Summary
35
+
36
+ The results show...
37
+ `);
38
+
39
+ const chunks = await doc.chunk({
40
+ strategy: 'markdown',
41
+ headers: [
42
+ ['#', 'title'],
43
+ ['##', 'section'],
44
+ ],
45
+ });
46
+
47
+ // Tables will now be preserved intact within chunks
48
+ ```
49
+
50
+ - Fix invalid filter handling in vector queries and graph-rag searches. Invalid filter inputs now throw explicit errors instead of silently falling back to empty filters, preventing unintended unfiltered results. ([#10375](https://github.com/mastra-ai/mastra/pull/10375))
51
+
52
+ - Updated dependencies [[`21a15de`](https://github.com/mastra-ai/mastra/commit/21a15de369fe82aac26bb642ed7be73505475e8b), [`feb7ee4`](https://github.com/mastra-ai/mastra/commit/feb7ee4d09a75edb46c6669a3beaceec78811747), [`b0e2ea5`](https://github.com/mastra-ai/mastra/commit/b0e2ea5b52c40fae438b9e2f7baee6f0f89c5442), [`c456e01`](https://github.com/mastra-ai/mastra/commit/c456e0149e3c176afcefdbd9bb1d2c5917723725), [`ab035c2`](https://github.com/mastra-ai/mastra/commit/ab035c2ef6d8cc7bb25f06f1a38508bd9e6f126b), [`1a46a56`](https://github.com/mastra-ai/mastra/commit/1a46a566f45a3fcbadc1cf36bf86d351f264bfa3), [`3cf540b`](https://github.com/mastra-ai/mastra/commit/3cf540b9fbfea8f4fc8d3a2319a4e6c0b0cbfd52), [`1c6ce51`](https://github.com/mastra-ai/mastra/commit/1c6ce51f875915ab57fd36873623013699a2a65d), [`898a972`](https://github.com/mastra-ai/mastra/commit/898a9727d286c2510d6b702dfd367e6aaf5c6b0f), [`a97003a`](https://github.com/mastra-ai/mastra/commit/a97003aa1cf2f4022a41912324a1e77263b326b8), [`ccc141e`](https://github.com/mastra-ai/mastra/commit/ccc141ed27da0abc3a3fc28e9e5128152e8e37f4), [`fe3b897`](https://github.com/mastra-ai/mastra/commit/fe3b897c2ccbcd2b10e81b099438c7337feddf89), [`00123ba`](https://github.com/mastra-ai/mastra/commit/00123ba96dc9e5cd0b110420ebdba56d8f237b25), [`29c4309`](https://github.com/mastra-ai/mastra/commit/29c4309f818b24304c041bcb4a8f19b5f13f6b62), [`16785ce`](https://github.com/mastra-ai/mastra/commit/16785ced928f6f22638f4488cf8a125d99211799), [`de8239b`](https://github.com/mastra-ai/mastra/commit/de8239bdcb1d8c0cfa06da21f1569912a66bbc8a), [`b5e6cd7`](https://github.com/mastra-ai/mastra/commit/b5e6cd77fc8c8e64e0494c1d06cee3d84e795d1e), [`3759cb0`](https://github.com/mastra-ai/mastra/commit/3759cb064935b5f74c65ac2f52a1145f7352899d), [`651e772`](https://github.com/mastra-ai/mastra/commit/651e772eb1475fb13e126d3fcc01751297a88214), [`b61b93f`](https://github.com/mastra-ai/mastra/commit/b61b93f9e058b11dd2eec169853175d31dbdd567), [`bae33d9`](https://github.com/mastra-ai/mastra/commit/bae33d91a63fbb64d1e80519e1fc1acaed1e9013), [`c0b731f`](https://github.com/mastra-ai/mastra/commit/c0b731fb27d712dc8582e846df5c0332a6a0c5ba), [`43ca8f2`](https://github.com/mastra-ai/mastra/commit/43ca8f2c7334851cc7b4d3d2f037d8784bfbdd5f), [`2ca67cc`](https://github.com/mastra-ai/mastra/commit/2ca67cc3bb1f6a617353fdcab197d9efebe60d6f), [`9e67002`](https://github.com/mastra-ai/mastra/commit/9e67002b52c9be19936c420a489dbee9c5fd6a78), [`35edc49`](https://github.com/mastra-ai/mastra/commit/35edc49ac0556db609189641d6341e76771b81fc)]:
53
+ - @mastra/core@1.0.0-beta.5
54
+
3
55
  ## 2.0.0-beta.1
4
56
 
5
57
  ### Patch Changes
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAGrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAa5D,qBAAa,mBAAoB,SAAQ,6BAA6B;gBACxD,OAAO,GAAE,gBAAqB;CAI3C;AAED,qBAAa,yBAAyB;IACpC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,YAAY,CAAU;gBAElB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,cAAc,GAAE,OAAe,EAAE,YAAY,GAAE,OAAc;IAM/G,OAAO,CAAC,sBAAsB;IAuD9B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAqHjD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IAmB/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/document/transformers/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAGrC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAEjD,OAAO,EAAE,6BAA6B,EAAE,MAAM,aAAa,CAAC;AAa5D,qBAAa,mBAAoB,SAAQ,6BAA6B;gBACxD,OAAO,GAAE,gBAAqB;CAI3C;AAED,qBAAa,yBAAyB;IACpC,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,YAAY,CAAU;gBAElB,gBAAgB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,cAAc,GAAE,OAAe,EAAE,YAAY,GAAE,OAAc;IAM/G,OAAO,CAAC,sBAAsB;IAuD9B,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE;QAAE,IAAI,EAAE,MAAM,CAAA;KAAE,GAAG,QAAQ,EAAE;IAsIjD,eAAe,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,GAAG,QAAQ,EAAE;IAmB/E,kBAAkB,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,EAAE;CAWtD"}
@@ -6,11 +6,12 @@
6
6
  * - Improve graph traversal and querying using types
7
7
  */
8
8
  type SupportedEdgeType = 'semantic';
9
+ type GraphMetadata = Record<string, any>;
9
10
  export interface GraphNode {
10
11
  id: string;
11
12
  content: string;
12
13
  embedding?: number[];
13
- metadata?: Record<string, any>;
14
+ metadata?: GraphMetadata;
14
15
  }
15
16
  export interface RankedNode extends GraphNode {
16
17
  score: number;
@@ -46,11 +47,21 @@ export declare class GraphRAG {
46
47
  createGraph(chunks: GraphChunk[], embeddings: GraphEmbedding[]): void;
47
48
  private selectWeightedNeighbor;
48
49
  private randomWalkWithRestart;
49
- query({ query, topK, randomWalkSteps, restartProb, }: {
50
+ /**
51
+ * Query the graph with a dense embedding and optional metadata filter.
52
+ *
53
+ * @param query - The embedding vector to query.
54
+ * @param topK - Number of top results to return.
55
+ * @param randomWalkSteps - Steps for random walk reranking.
56
+ * @param restartProb - Restart probability for random walk.
57
+ * @param filter - Optional strict metadata filter. All key-value pairs must match exactly.
58
+ */
59
+ query({ query, topK, randomWalkSteps, restartProb, filter, }: {
50
60
  query: number[];
51
61
  topK?: number;
52
62
  randomWalkSteps?: number;
53
63
  restartProb?: number;
64
+ filter?: Partial<GraphMetadata>;
54
65
  }): RankedNode[];
55
66
  }
56
67
  export {};
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/graph-rag/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,KAAK,iBAAiB,GAAG,UAAU,CAAC;AAGpC,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,UAAW,SAAQ,SAAS;IAC3C,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,iBAAiB,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAAyB;IACtC,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,GAAE,MAAa,EAAE,SAAS,GAAE,MAAY;IAQ7D,OAAO,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAW9B,OAAO,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAe9B,QAAQ,IAAI,SAAS,EAAE;IAKvB,QAAQ,IAAI,SAAS,EAAE;IAIvB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IAIzC,KAAK,IAAI,IAAI;IAKb,iBAAiB,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IASvD,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,gBAAgB;IAgCxB,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,UAAU,EAAE,cAAc,EAAE;IAuC9D,OAAO,CAAC,sBAAsB;IAoB9B,OAAO,CAAC,qBAAqB;IAoC7B,KAAK,CAAC,EACJ,KAAK,EACL,IAAS,EACT,eAAqB,EACrB,WAAkB,GACnB,EAAE;QACD,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,GAAG,UAAU,EAAE;CAoDjB"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/graph-rag/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,KAAK,iBAAiB,GAAG,UAAU,CAAC;AACpC,KAAK,aAAa,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAGzC,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,QAAQ,CAAC,EAAE,aAAa,CAAC;CAC1B;AAED,MAAM,WAAW,UAAW,SAAQ,SAAS;IAC3C,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,iBAAiB,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,KAAK,CAAyB;IACtC,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;gBAEd,SAAS,GAAE,MAAa,EAAE,SAAS,GAAE,MAAY;IAQ7D,OAAO,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAW9B,OAAO,CAAC,IAAI,EAAE,SAAS,GAAG,IAAI;IAe9B,QAAQ,IAAI,SAAS,EAAE;IAKvB,QAAQ,IAAI,SAAS,EAAE;IAIvB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE;IAIzC,KAAK,IAAI,IAAI;IAKb,iBAAiB,CAAC,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IASvD,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,gBAAgB;IAgCxB,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,UAAU,EAAE,cAAc,EAAE;IAuC9D,OAAO,CAAC,sBAAsB;IAoB9B,OAAO,CAAC,qBAAqB;IA2C7B;;;;;;;;OAQG;IAEH,KAAK,CAAC,EACJ,KAAK,EACL,IAAS,EACT,eAAqB,EACrB,WAAkB,EAClB,MAAM,GACP,EAAE;QACD,KAAK,EAAE,MAAM,EAAE,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,MAAM,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;KACjC,GAAG,UAAU,EAAE;CA+DjB"}
package/dist/index.cjs CHANGED
@@ -5567,6 +5567,11 @@ var MarkdownHeaderTransformer = class {
5567
5567
  currentContent.push(line);
5568
5568
  continue;
5569
5569
  }
5570
+ const isTableLine = strippedLine.includes("|") && strippedLine.length > 0;
5571
+ if (isTableLine) {
5572
+ currentContent.push(line);
5573
+ continue;
5574
+ }
5570
5575
  let headerMatched = false;
5571
5576
  for (const [sep, name14] of this.headersToSplitOn) {
5572
5577
  if (strippedLine.startsWith(sep) && (strippedLine.length === sep.length || strippedLine[sep.length] === " ")) {
@@ -6803,7 +6808,7 @@ var GraphRAG = class {
6803
6808
  return neighbors[neighbors.length - 1]?.id;
6804
6809
  }
6805
6810
  // Perform random walk with restart
6806
- randomWalkWithRestart(startNodeId, steps, restartProb) {
6811
+ randomWalkWithRestart(startNodeId, steps, restartProb, allowedNodeIds) {
6807
6812
  const visits = /* @__PURE__ */ new Map();
6808
6813
  let currentNodeId = startNodeId;
6809
6814
  for (let step = 0; step < steps; step++) {
@@ -6812,7 +6817,10 @@ var GraphRAG = class {
6812
6817
  currentNodeId = startNodeId;
6813
6818
  continue;
6814
6819
  }
6815
- const neighbors = this.getNeighbors(currentNodeId);
6820
+ let neighbors = this.getNeighbors(currentNodeId);
6821
+ if (allowedNodeIds) {
6822
+ neighbors = neighbors.filter((n) => allowedNodeIds.has(n.id));
6823
+ }
6816
6824
  if (neighbors.length === 0) {
6817
6825
  currentNodeId = startNodeId;
6818
6826
  continue;
@@ -6826,12 +6834,22 @@ var GraphRAG = class {
6826
6834
  }
6827
6835
  return normalizedVisits;
6828
6836
  }
6837
+ /**
6838
+ * Query the graph with a dense embedding and optional metadata filter.
6839
+ *
6840
+ * @param query - The embedding vector to query.
6841
+ * @param topK - Number of top results to return.
6842
+ * @param randomWalkSteps - Steps for random walk reranking.
6843
+ * @param restartProb - Restart probability for random walk.
6844
+ * @param filter - Optional strict metadata filter. All key-value pairs must match exactly.
6845
+ */
6829
6846
  // Retrieve relevant nodes using hybrid approach
6830
6847
  query({
6831
6848
  query,
6832
6849
  topK = 10,
6833
6850
  randomWalkSteps = 100,
6834
- restartProb = 0.15
6851
+ restartProb = 0.15,
6852
+ filter
6835
6853
  }) {
6836
6854
  if (!query || query.length !== this.dimension) {
6837
6855
  throw new Error(`Query embedding must have dimension ${this.dimension}`);
@@ -6845,15 +6863,20 @@ var GraphRAG = class {
6845
6863
  if (restartProb <= 0 || restartProb >= 1) {
6846
6864
  throw new Error("Restart probability must be between 0 and 1");
6847
6865
  }
6848
- const similarities = Array.from(this.nodes.values()).map((node) => ({
6866
+ const filterEntries = Object.entries(filter ?? {});
6867
+ const matchesFilter = (node) => filterEntries.length === 0 ? true : filterEntries.every(([key, value]) => node.metadata?.[key] === value);
6868
+ const nodesToSearch = Array.from(this.nodes.values()).filter(matchesFilter);
6869
+ const similarities = nodesToSearch.map((node) => ({
6849
6870
  node,
6850
6871
  similarity: this.cosineSimilarity(query, node.embedding)
6851
6872
  }));
6852
6873
  similarities.sort((a, b) => b.similarity - a.similarity);
6853
6874
  const topNodes = similarities.slice(0, topK);
6875
+ const useFilter = filterEntries.length > 0;
6876
+ const allowedNodeIds = useFilter ? new Set(nodesToSearch.map((n) => n.id)) : void 0;
6854
6877
  const rerankedNodes = /* @__PURE__ */ new Map();
6855
6878
  for (const { node, similarity } of topNodes) {
6856
- const walkScores = this.randomWalkWithRestart(node.id, randomWalkSteps, restartProb);
6879
+ const walkScores = this.randomWalkWithRestart(node.id, randomWalkSteps, restartProb, allowedNodeIds);
6857
6880
  for (const [nodeId, walkScore] of walkScores) {
6858
6881
  const node2 = this.nodes.get(nodeId);
6859
6882
  const existingScore = rerankedNodes.get(nodeId)?.score || 0;
@@ -7136,9 +7159,9 @@ var createGraphRAGTool = (options) => {
7136
7159
  return typeof filter === "string" ? JSON.parse(filter) : filter;
7137
7160
  } catch (error) {
7138
7161
  if (logger) {
7139
- logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
7162
+ logger.error("Invalid filter", { filter, error });
7140
7163
  }
7141
- return {};
7164
+ throw new Error(`Invalid filter format: ${error instanceof Error ? error.message : String(error)}`);
7142
7165
  }
7143
7166
  })();
7144
7167
  }
@@ -7263,9 +7286,9 @@ var createVectorQueryTool = (options) => {
7263
7286
  return typeof filter === "string" ? JSON.parse(filter) : filter;
7264
7287
  } catch (error) {
7265
7288
  if (logger) {
7266
- logger.warn("Failed to parse filter as JSON, using empty filter", { filter, error });
7289
+ logger.error("Invalid filter", { filter, error });
7267
7290
  }
7268
- return {};
7291
+ throw new Error(`Invalid filter format: ${error instanceof Error ? error.message : String(error)}`);
7269
7292
  }
7270
7293
  })();
7271
7294
  }