@mastra/rag 0.10.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,23 @@
1
1
 
2
- > @mastra/rag@0.10.3-alpha.1 build /home/runner/work/mastra/mastra/packages/rag
2
+ > @mastra/rag@1.0.0-alpha.0 build /home/runner/work/mastra/mastra/packages/rag
3
3
  > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.5.0
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 15594ms
9
+ TSC ⚡️ Build success in 13246ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.8.3
13
13
  Writing package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.ts
14
14
  Analysis will use the bundled TypeScript version 5.8.3
15
15
  Writing package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.cts
16
- DTS ⚡️ Build success in 12613ms
16
+ DTS ⚡️ Build success in 12527ms
17
17
  CLI Cleaning output folder
18
18
  ESM Build start
19
19
  CJS Build start
20
- ESM dist/index.js 241.64 KB
21
- ESM ⚡️ Build success in 4247ms
22
- CJS dist/index.cjs 243.35 KB
23
- CJS ⚡️ Build success in 4248ms
20
+ CJS dist/index.cjs 243.73 KB
21
+ CJS ⚡️ Build success in 4842ms
22
+ ESM dist/index.js 242.02 KB
23
+ ESM ⚡️ Build success in 4846ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,48 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 1.0.0
4
+
5
+ ### Major Changes
6
+
7
+ - 75136cd: Add LaTeX chunking support by adding a case for Language.LATEX in getSeparatorsForLanguage.
8
+
9
+ ### Patch Changes
10
+
11
+ - 63f6b7d: dependencies updates:
12
+ - Updated dependency [`zod@^3.25.57` ↗︎](https://www.npmjs.com/package/zod/v/3.25.57) (from `^3.25.56`, in `dependencies`)
13
+ - Updated dependencies [63f6b7d]
14
+ - Updated dependencies [12a95fc]
15
+ - Updated dependencies [4b0f8a6]
16
+ - Updated dependencies [51264a5]
17
+ - Updated dependencies [8e6f677]
18
+ - Updated dependencies [d70c420]
19
+ - Updated dependencies [ee9af57]
20
+ - Updated dependencies [36f1c36]
21
+ - Updated dependencies [2a16996]
22
+ - Updated dependencies [10d352e]
23
+ - Updated dependencies [9589624]
24
+ - Updated dependencies [53d3c37]
25
+ - Updated dependencies [751c894]
26
+ - Updated dependencies [577ce3a]
27
+ - Updated dependencies [9260b3a]
28
+ - @mastra/core@0.10.6
29
+
30
+ ## 1.0.0-alpha.0
31
+
32
+ ### Major Changes
33
+
34
+ - 75136cd: Add LaTeX chunking support by adding a case for Language.LATEX in getSeparatorsForLanguage.
35
+
36
+ ### Patch Changes
37
+
38
+ - 63f6b7d: dependencies updates:
39
+ - Updated dependency [`zod@^3.25.57` ↗︎](https://www.npmjs.com/package/zod/v/3.25.57) (from `^3.25.56`, in `dependencies`)
40
+ - Updated dependencies [63f6b7d]
41
+ - Updated dependencies [36f1c36]
42
+ - Updated dependencies [10d352e]
43
+ - Updated dependencies [53d3c37]
44
+ - @mastra/core@0.10.6-alpha.0
45
+
3
46
  ## 0.10.3
4
47
 
5
48
  ### Patch Changes
package/dist/index.cjs CHANGED
@@ -729,7 +729,7 @@ var customAlphabet = (alphabet, defaultSize = 21) => {
729
729
  };
730
730
  };
731
731
 
732
- // ../../node_modules/.pnpm/@ai-sdk+provider-utils@2.2.8_zod@3.25.56/node_modules/@ai-sdk/provider-utils/dist/index.mjs
732
+ // ../../node_modules/.pnpm/@ai-sdk+provider-utils@2.2.8_zod@3.25.57/node_modules/@ai-sdk/provider-utils/dist/index.mjs
733
733
  var import_secure_json_parse = __toESM(require_secure_json_parse());
734
734
  function combineHeaders(...headers) {
735
735
  return headers.reduce(
@@ -4767,6 +4767,21 @@ var RecursiveCharacterTransformer = class _RecursiveCharacterTransformer extends
4767
4767
  " ",
4768
4768
  ""
4769
4769
  ];
4770
+ case "latex" /* LATEX */:
4771
+ return [
4772
+ "\\\\part\\*?\\{",
4773
+ "\\\\chapter\\*?\\{",
4774
+ "\\\\section\\*?\\{",
4775
+ "\\\\subsection\\*?\\{",
4776
+ "\\\\subsubsection\\*?\\{",
4777
+ "\\\\begin\\{.*?\\}",
4778
+ "\\\\end\\{.*?\\}",
4779
+ "\\\\[a-zA-Z]+\\{.*?\\}",
4780
+ "\n\n",
4781
+ "\n",
4782
+ " ",
4783
+ ""
4784
+ ];
4770
4785
  // ... (add other language cases following the same pattern)
4771
4786
  default:
4772
4787
  throw new Error(`Language ${language} is not supported! Please choose from ${Object.values(Language)}`);
package/dist/index.js CHANGED
@@ -727,7 +727,7 @@ var customAlphabet = (alphabet, defaultSize = 21) => {
727
727
  };
728
728
  };
729
729
 
730
- // ../../node_modules/.pnpm/@ai-sdk+provider-utils@2.2.8_zod@3.25.56/node_modules/@ai-sdk/provider-utils/dist/index.mjs
730
+ // ../../node_modules/.pnpm/@ai-sdk+provider-utils@2.2.8_zod@3.25.57/node_modules/@ai-sdk/provider-utils/dist/index.mjs
731
731
  var import_secure_json_parse = __toESM(require_secure_json_parse());
732
732
  function combineHeaders(...headers) {
733
733
  return headers.reduce(
@@ -4765,6 +4765,21 @@ var RecursiveCharacterTransformer = class _RecursiveCharacterTransformer extends
4765
4765
  " ",
4766
4766
  ""
4767
4767
  ];
4768
+ case "latex" /* LATEX */:
4769
+ return [
4770
+ "\\\\part\\*?\\{",
4771
+ "\\\\chapter\\*?\\{",
4772
+ "\\\\section\\*?\\{",
4773
+ "\\\\subsection\\*?\\{",
4774
+ "\\\\subsubsection\\*?\\{",
4775
+ "\\\\begin\\{.*?\\}",
4776
+ "\\\\end\\{.*?\\}",
4777
+ "\\\\[a-zA-Z]+\\{.*?\\}",
4778
+ "\n\n",
4779
+ "\n",
4780
+ " ",
4781
+ ""
4782
+ ];
4768
4783
  // ... (add other language cases following the same pattern)
4769
4784
  default:
4770
4785
  throw new Error(`Language ${language} is not supported! Please choose from ${Object.values(Language)}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/rag",
3
- "version": "0.10.3",
3
+ "version": "1.0.0",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -26,26 +26,26 @@
26
26
  "js-tiktoken": "^1.0.20",
27
27
  "node-html-better-parser": "^1.4.11",
28
28
  "pathe": "^2.0.3",
29
- "zod": "^3.25.56"
29
+ "zod": "^3.25.57"
30
30
  },
31
31
  "peerDependencies": {
32
- "ai": "^4.0.0",
33
- "@mastra/core": "^0.10.0-alpha.0"
32
+ "@mastra/core": "^0.10.0-alpha.0",
33
+ "ai": "^4.0.0"
34
34
  },
35
35
  "devDependencies": {
36
36
  "@ai-sdk/cohere": "latest",
37
37
  "@ai-sdk/openai": "latest",
38
38
  "@microsoft/api-extractor": "^7.52.8",
39
39
  "@types/big.js": "^6.2.2",
40
- "@types/node": "^20.17.57",
40
+ "@types/node": "^20.19.0",
41
41
  "ai": "^4.3.16",
42
42
  "dotenv": "^16.5.0",
43
43
  "eslint": "^9.28.0",
44
44
  "tsup": "^8.5.0",
45
- "typescript": "^5.8.2",
46
- "vitest": "^3.2.2",
47
- "@internal/lint": "0.0.11",
48
- "@mastra/core": "0.10.4"
45
+ "typescript": "^5.8.3",
46
+ "vitest": "^3.2.3",
47
+ "@internal/lint": "0.0.13",
48
+ "@mastra/core": "0.10.6"
49
49
  },
50
50
  "keywords": [
51
51
  "rag",
@@ -1505,6 +1505,111 @@ describe('MDocument', () => {
1505
1505
  });
1506
1506
  });
1507
1507
 
1508
+ describe('chunkLaTeX', () => {
1509
+ it('should split LaTeX text correctly based on sections', async () => {
1510
+ const text = `\\section{Introduction}
1511
+
1512
+ This is the introduction section.
1513
+
1514
+ \\subsection{Background}
1515
+
1516
+ Some background information.
1517
+
1518
+ \\subsubsection{Details}
1519
+
1520
+ Even more detailed explanation.
1521
+
1522
+ \\section{Conclusion}
1523
+
1524
+ Final thoughts here.`;
1525
+
1526
+ const doc = MDocument.fromText(text, { meta: 'data' });
1527
+
1528
+ await doc.chunk({
1529
+ strategy: 'latex',
1530
+ size: 100,
1531
+ overlap: 10,
1532
+ keepSeparator: 'start',
1533
+ });
1534
+
1535
+ const chunks = doc.getText();
1536
+ expect(chunks.length).toBeGreaterThan(1);
1537
+ expect(chunks[0]).toContain('\\section{Introduction}');
1538
+ });
1539
+
1540
+ it('should handle environments like equations or itemize', async () => {
1541
+ const text = `\\section{Math Section}
1542
+
1543
+ Here is an equation:
1544
+
1545
+ \\[
1546
+ E = mc^2
1547
+ \\]
1548
+
1549
+ \\begin{itemize}
1550
+ \\item First item
1551
+ \\item Second item
1552
+ \\end{itemize}
1553
+
1554
+ End of the section.`;
1555
+
1556
+ const doc = MDocument.fromText(text, { meta: 'data' });
1557
+
1558
+ await doc.chunk({
1559
+ strategy: 'latex',
1560
+ size: 100,
1561
+ overlap: 10,
1562
+ keepSeparator: 'start',
1563
+ });
1564
+
1565
+ const chunks = doc.getText();
1566
+ expect(chunks.some(chunk => chunk.includes('\\begin{itemize}'))).toBe(true);
1567
+ expect(chunks.some(chunk => chunk.includes('E = mc^2'))).toBe(true);
1568
+ });
1569
+
1570
+ it('should split with keepSeparator at end', async () => {
1571
+ const text = `Intro text here.
1572
+ \\section{First}
1573
+ Content A.
1574
+
1575
+ \\section{Second}
1576
+ Content B.`;
1577
+
1578
+ const doc = MDocument.fromText(text, { meta: 'data' });
1579
+
1580
+ await doc.chunk({
1581
+ strategy: 'latex',
1582
+ size: 50,
1583
+ overlap: 0,
1584
+ keepSeparator: 'end',
1585
+ });
1586
+
1587
+ const chunks = doc.getText();
1588
+ expect(chunks.length).toBe(3);
1589
+ expect(chunks[0].trimEnd().includes('\\section{')).toBe(true);
1590
+ expect(chunks[1].trimEnd().includes('\\section{')).toBe(true);
1591
+ });
1592
+
1593
+ it('should strip whitespace correctly', async () => {
1594
+ const text = `\\section{Whitespace}
1595
+
1596
+ Content with leading and trailing whitespace.
1597
+ `;
1598
+
1599
+ const doc = MDocument.fromText(text, { meta: 'data' });
1600
+
1601
+ await doc.chunk({
1602
+ strategy: 'latex',
1603
+ size: 100,
1604
+ overlap: 0,
1605
+ stripWhitespace: true,
1606
+ });
1607
+
1608
+ const chunks = doc.getText();
1609
+ expect(chunks.every(chunk => chunk === chunk.trim())).toBe(true);
1610
+ });
1611
+ });
1612
+
1508
1613
  describe('MarkdownHeader', () => {
1509
1614
  it('should split on headers and preserve metadata', async () => {
1510
1615
  const text = `# Main Title
@@ -272,6 +272,21 @@ export class RecursiveCharacterTransformer extends TextTransformer {
272
272
  ' ',
273
273
  '',
274
274
  ];
275
+ case Language.LATEX:
276
+ return [
277
+ '\\\\part\\*?\\{',
278
+ '\\\\chapter\\*?\\{',
279
+ '\\\\section\\*?\\{',
280
+ '\\\\subsection\\*?\\{',
281
+ '\\\\subsubsection\\*?\\{',
282
+ '\\\\begin\\{.*?\\}',
283
+ '\\\\end\\{.*?\\}',
284
+ '\\\\[a-zA-Z]+\\{.*?\\}',
285
+ '\n\n',
286
+ '\n',
287
+ ' ',
288
+ '',
289
+ ];
275
290
  // ... (add other language cases following the same pattern)
276
291
  default:
277
292
  throw new Error(`Language ${language} is not supported! Please choose from ${Object.values(Language)}`);