@mastra/rag 0.1.14-alpha.3 → 0.1.15-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,23 @@
1
1
 
2
- > @mastra/rag@0.1.14-alpha.3 build /home/runner/work/mastra/mastra/packages/rag
2
+ > @mastra/rag@0.1.15-alpha.1 build /home/runner/work/mastra/mastra/packages/rag
3
3
  > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.4.0
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 27617ms
9
+ TSC ⚡️ Build success in 27112ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.8.2
13
13
  Writing package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.ts
14
14
  Analysis will use the bundled TypeScript version 5.8.2
15
15
  Writing package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.cts
16
- DTS ⚡️ Build success in 40031ms
16
+ DTS ⚡️ Build success in 35973ms
17
17
  CLI Cleaning output folder
18
18
  ESM Build start
19
19
  CJS Build start
20
- ESM dist/index.js 92.31 KB
21
- ESM ⚡️ Build success in 2414ms
22
- CJS dist/index.cjs 93.03 KB
23
- CJS ⚡️ Build success in 2414ms
20
+ CJS dist/index.cjs 93.37 KB
21
+ CJS ⚡️ Build success in 1563ms
22
+ ESM dist/index.js 92.65 KB
23
+ ESM ⚡️ Build success in 1563ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,47 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 0.1.15-alpha.1
4
+
5
+ ### Patch Changes
6
+
7
+ - e47f529: Updated KeywordExtraction in chunk
8
+ - Updated dependencies [619c39d]
9
+ - Updated dependencies [fe56be0]
10
+ - Updated dependencies [a0967a0]
11
+ - Updated dependencies [fca3b21]
12
+ - Updated dependencies [0118361]
13
+ - Updated dependencies [619c39d]
14
+ - @mastra/core@0.8.0-alpha.1
15
+
16
+ ## 0.1.15-alpha.0
17
+
18
+ ### Patch Changes
19
+
20
+ - 7599d77: fix(deps): update ai sdk to ^4.2.2
21
+ - Updated dependencies [107bcfe]
22
+ - Updated dependencies [5b4e19f]
23
+ - Updated dependencies [7599d77]
24
+ - Updated dependencies [cafae83]
25
+ - Updated dependencies [8076ecf]
26
+ - Updated dependencies [304397c]
27
+ - @mastra/core@0.7.1-alpha.0
28
+
29
+ ## 0.1.14
30
+
31
+ ### Patch Changes
32
+
33
+ - Updated dependencies [b4fbc59]
34
+ - Updated dependencies [a838fde]
35
+ - Updated dependencies [a8bd4cf]
36
+ - Updated dependencies [7a3eeb0]
37
+ - Updated dependencies [0b54522]
38
+ - Updated dependencies [b3b34f5]
39
+ - Updated dependencies [1af25d5]
40
+ - Updated dependencies [a4686e8]
41
+ - Updated dependencies [6530ad1]
42
+ - Updated dependencies [27439ad]
43
+ - @mastra/core@0.7.0
44
+
3
45
  ## 0.1.14-alpha.3
4
46
 
5
47
  ### Patch Changes
@@ -138,7 +138,7 @@ declare type ExtractParams = {
138
138
  title?: TitleExtractorsArgs | boolean;
139
139
  summary?: SummaryExtractArgs | boolean;
140
140
  questions?: QuestionAnswerExtractArgs | boolean;
141
- keywords?: boolean | Record<string, any>;
141
+ keywords?: KeywordExtractArgs | boolean;
142
142
  };
143
143
  export { ExtractParams }
144
144
  export { ExtractParams as ExtractParams_alias_1 }
@@ -138,7 +138,7 @@ declare type ExtractParams = {
138
138
  title?: TitleExtractorsArgs | boolean;
139
139
  summary?: SummaryExtractArgs | boolean;
140
140
  questions?: QuestionAnswerExtractArgs | boolean;
141
- keywords?: boolean | Record<string, any>;
141
+ keywords?: KeywordExtractArgs | boolean;
142
142
  };
143
143
  export { ExtractParams }
144
144
  export { ExtractParams as ExtractParams_alias_1 }
package/dist/index.cjs CHANGED
@@ -1305,6 +1305,18 @@ var MDocument = class _MDocument {
1305
1305
  }
1306
1306
  if (typeof title !== "undefined") {
1307
1307
  transformations.push(new llamaindex.TitleExtractor(typeof title === "boolean" ? {} : title));
1308
+ this.chunks = this.chunks.map(
1309
+ (doc, i) => new llamaindex.Document({
1310
+ ...doc,
1311
+ relationships: {
1312
+ [llamaindex.NodeRelationship.SOURCE]: {
1313
+ nodeId: `doc-${i}`,
1314
+ nodeType: llamaindex.ObjectType.DOCUMENT,
1315
+ metadata: doc.metadata
1316
+ }
1317
+ }
1318
+ })
1319
+ );
1308
1320
  }
1309
1321
  const pipeline = new llamaindex.IngestionPipeline({
1310
1322
  transformations
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { Document, SummaryExtractor, QuestionsAnsweredExtractor, KeywordExtractor, TitleExtractor, IngestionPipeline } from 'llamaindex';
1
+ import { Document, SummaryExtractor, QuestionsAnsweredExtractor, KeywordExtractor, TitleExtractor, ObjectType, NodeRelationship, IngestionPipeline } from 'llamaindex';
2
2
  import { parse } from 'node-html-better-parser';
3
3
  import { encodingForModel, getEncoding } from 'js-tiktoken';
4
4
  import { CohereRelevanceScorer, MastraAgentRelevanceScorer } from '@mastra/core/relevance';
@@ -1303,6 +1303,18 @@ var MDocument = class _MDocument {
1303
1303
  }
1304
1304
  if (typeof title !== "undefined") {
1305
1305
  transformations.push(new TitleExtractor(typeof title === "boolean" ? {} : title));
1306
+ this.chunks = this.chunks.map(
1307
+ (doc, i) => new Document({
1308
+ ...doc,
1309
+ relationships: {
1310
+ [NodeRelationship.SOURCE]: {
1311
+ nodeId: `doc-${i}`,
1312
+ nodeType: ObjectType.DOCUMENT,
1313
+ metadata: doc.metadata
1314
+ }
1315
+ }
1316
+ })
1317
+ );
1306
1318
  }
1307
1319
  const pipeline = new IngestionPipeline({
1308
1320
  transformations
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/rag",
3
- "version": "0.1.14-alpha.3",
3
+ "version": "0.1.15-alpha.1",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -29,7 +29,7 @@
29
29
  "node-html-better-parser": "^1.4.7",
30
30
  "pathe": "^2.0.3",
31
31
  "zod": "^3.24.2",
32
- "@mastra/core": "^0.7.0-alpha.3"
32
+ "@mastra/core": "^0.8.0-alpha.1"
33
33
  },
34
34
  "peerDependencies": {
35
35
  "ai": "^4.0.0"
@@ -39,7 +39,7 @@
39
39
  "@ai-sdk/openai": "latest",
40
40
  "@microsoft/api-extractor": "^7.52.1",
41
41
  "@types/node": "^20.17.27",
42
- "ai": "^4.1.54",
42
+ "ai": "^4.2.2",
43
43
  "dotenv": "^16.4.7",
44
44
  "eslint": "^9.23.0",
45
45
  "tsup": "^8.4.0",
@@ -1638,6 +1638,79 @@ describe('MDocument', () => {
1638
1638
  expect(docs?.[0]?.text).toContain('# Title');
1639
1639
  });
1640
1640
  });
1641
+
1642
+ describe('metadata extraction', () => {
1643
+ it('should extract metadata with default settings', async () => {
1644
+ const doc = MDocument.fromMarkdown(
1645
+ '# AI and Machine Learning\n\nThis is a test document about artificial intelligence and machine learning.',
1646
+ );
1647
+
1648
+ const chunks = await doc.chunk({
1649
+ strategy: 'markdown',
1650
+ extract: {
1651
+ title: true,
1652
+ summary: true,
1653
+ keywords: true,
1654
+ },
1655
+ });
1656
+
1657
+ const metadata = chunks[0].metadata;
1658
+ expect(metadata).toBeDefined();
1659
+ expect(metadata.documentTitle).toBeDefined();
1660
+ expect(metadata.sectionSummary).toBeDefined();
1661
+ expect(metadata.excerptKeywords).toMatch(/^KEYWORDS: .*/);
1662
+ }, 15000);
1663
+
1664
+ it('should extract metadata with custom settings', async () => {
1665
+ const doc = MDocument.fromMarkdown(
1666
+ '# AI and Machine Learning\n\nThis is a test document about artificial intelligence and machine learning.',
1667
+ );
1668
+
1669
+ const chunks = await doc.chunk({
1670
+ strategy: 'markdown',
1671
+ extract: {
1672
+ title: {
1673
+ nodes: 2,
1674
+ nodeTemplate: 'Generate a title for this: {context}',
1675
+ combineTemplate: 'Combine these titles: {context}',
1676
+ },
1677
+ summary: {
1678
+ summaries: ['self'],
1679
+ promptTemplate: 'Summarize this: {context}',
1680
+ },
1681
+ questions: {
1682
+ questions: 2,
1683
+ promptTemplate: 'Generate {numQuestions} questions about: {context}',
1684
+ },
1685
+ keywords: {
1686
+ keywords: 3,
1687
+ promptTemplate: 'Extract {maxKeywords} key terms from: {context}',
1688
+ },
1689
+ },
1690
+ });
1691
+
1692
+ const metadata = chunks[0].metadata;
1693
+ expect(metadata).toBeDefined();
1694
+ expect(metadata.documentTitle).toBeDefined();
1695
+ expect(metadata.sectionSummary).toBeDefined();
1696
+ expect(metadata.questionsThisExcerptCanAnswer).toMatch(/^1\. .*\?2\. .*\?$/);
1697
+ expect(metadata.excerptKeywords).toMatch(/^1\. .*\n2\. .*\n3\. .*$/);
1698
+ }, 15000);
1699
+
1700
+ it('should handle invalid summary types', async () => {
1701
+ const doc = MDocument.fromText('Test document');
1702
+
1703
+ await expect(
1704
+ doc.chunk({
1705
+ extract: {
1706
+ summary: {
1707
+ summaries: ['invalid'],
1708
+ },
1709
+ },
1710
+ }),
1711
+ ).rejects.toThrow("Summaries must be one of 'self', 'prev', 'next'");
1712
+ }, 15000);
1713
+ });
1641
1714
  });
1642
1715
 
1643
1716
  // Helper function to find the longest common substring between two strings
@@ -5,6 +5,8 @@ import {
5
5
  QuestionsAnsweredExtractor,
6
6
  SummaryExtractor,
7
7
  TitleExtractor,
8
+ ObjectType,
9
+ NodeRelationship,
8
10
  } from 'llamaindex';
9
11
 
10
12
  import { CharacterTransformer, RecursiveCharacterTransformer } from './transformers/character';
@@ -43,6 +45,19 @@ export class MDocument {
43
45
 
44
46
  if (typeof title !== 'undefined') {
45
47
  transformations.push(new TitleExtractor(typeof title === 'boolean' ? {} : title));
48
+ this.chunks = this.chunks.map(
49
+ (doc, i) =>
50
+ new Chunk({
51
+ ...doc,
52
+ relationships: {
53
+ [NodeRelationship.SOURCE]: {
54
+ nodeId: `doc-${i}`,
55
+ nodeType: ObjectType.DOCUMENT,
56
+ metadata: doc.metadata,
57
+ },
58
+ },
59
+ }),
60
+ );
46
61
  }
47
62
 
48
63
  const pipeline = new IngestionPipeline({
@@ -41,7 +41,7 @@ export type ExtractParams = {
41
41
  title?: TitleExtractorsArgs | boolean;
42
42
  summary?: SummaryExtractArgs | boolean;
43
43
  questions?: QuestionAnswerExtractArgs | boolean;
44
- keywords?: boolean | Record<string, any>;
44
+ keywords?: KeywordExtractArgs | boolean;
45
45
  };
46
46
 
47
47
  export type ChunkOptions = {