@mastra/rag 0.1.14 → 0.1.15-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/CHANGELOG.md +26 -0
- package/dist/_tsup-dts-rollup.d.cts +1 -1
- package/dist/_tsup-dts-rollup.d.ts +1 -1
- package/dist/index.cjs +12 -0
- package/dist/index.js +13 -1
- package/package.json +3 -3
- package/src/document/document.test.ts +73 -0
- package/src/document/document.ts +15 -0
- package/src/document/types.ts +1 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/rag@0.1.
|
|
2
|
+
> @mastra/rag@0.1.15-alpha.1 build /home/runner/work/mastra/mastra/packages/rag
|
|
3
3
|
> tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.4.0
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 27112ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.8.2
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
14
|
Analysis will use the bundled TypeScript version 5.8.2
|
|
15
15
|
[36mWriting package typings: /home/runner/work/mastra/mastra/packages/rag/dist/_tsup-dts-rollup.d.cts[39m
|
|
16
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 35973ms
|
|
17
17
|
[34mCLI[39m Cleaning output folder
|
|
18
18
|
[34mESM[39m Build start
|
|
19
19
|
[34mCJS[39m Build start
|
|
20
|
-
[
|
|
21
|
-
[
|
|
22
|
-
[
|
|
23
|
-
[
|
|
20
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m93.37 KB[39m
|
|
21
|
+
[32mCJS[39m ⚡️ Build success in 1563ms
|
|
22
|
+
[32mESM[39m [1mdist/index.js [22m[32m92.65 KB[39m
|
|
23
|
+
[32mESM[39m ⚡️ Build success in 1563ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 0.1.15-alpha.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- e47f529: Updated KeywordExtraction in chunk
|
|
8
|
+
- Updated dependencies [619c39d]
|
|
9
|
+
- Updated dependencies [fe56be0]
|
|
10
|
+
- Updated dependencies [a0967a0]
|
|
11
|
+
- Updated dependencies [fca3b21]
|
|
12
|
+
- Updated dependencies [0118361]
|
|
13
|
+
- Updated dependencies [619c39d]
|
|
14
|
+
- @mastra/core@0.8.0-alpha.1
|
|
15
|
+
|
|
16
|
+
## 0.1.15-alpha.0
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- 7599d77: fix(deps): update ai sdk to ^4.2.2
|
|
21
|
+
- Updated dependencies [107bcfe]
|
|
22
|
+
- Updated dependencies [5b4e19f]
|
|
23
|
+
- Updated dependencies [7599d77]
|
|
24
|
+
- Updated dependencies [cafae83]
|
|
25
|
+
- Updated dependencies [8076ecf]
|
|
26
|
+
- Updated dependencies [304397c]
|
|
27
|
+
- @mastra/core@0.7.1-alpha.0
|
|
28
|
+
|
|
3
29
|
## 0.1.14
|
|
4
30
|
|
|
5
31
|
### Patch Changes
|
|
@@ -138,7 +138,7 @@ declare type ExtractParams = {
|
|
|
138
138
|
title?: TitleExtractorsArgs | boolean;
|
|
139
139
|
summary?: SummaryExtractArgs | boolean;
|
|
140
140
|
questions?: QuestionAnswerExtractArgs | boolean;
|
|
141
|
-
keywords?:
|
|
141
|
+
keywords?: KeywordExtractArgs | boolean;
|
|
142
142
|
};
|
|
143
143
|
export { ExtractParams }
|
|
144
144
|
export { ExtractParams as ExtractParams_alias_1 }
|
|
@@ -138,7 +138,7 @@ declare type ExtractParams = {
|
|
|
138
138
|
title?: TitleExtractorsArgs | boolean;
|
|
139
139
|
summary?: SummaryExtractArgs | boolean;
|
|
140
140
|
questions?: QuestionAnswerExtractArgs | boolean;
|
|
141
|
-
keywords?:
|
|
141
|
+
keywords?: KeywordExtractArgs | boolean;
|
|
142
142
|
};
|
|
143
143
|
export { ExtractParams }
|
|
144
144
|
export { ExtractParams as ExtractParams_alias_1 }
|
package/dist/index.cjs
CHANGED
|
@@ -1305,6 +1305,18 @@ var MDocument = class _MDocument {
|
|
|
1305
1305
|
}
|
|
1306
1306
|
if (typeof title !== "undefined") {
|
|
1307
1307
|
transformations.push(new llamaindex.TitleExtractor(typeof title === "boolean" ? {} : title));
|
|
1308
|
+
this.chunks = this.chunks.map(
|
|
1309
|
+
(doc, i) => new llamaindex.Document({
|
|
1310
|
+
...doc,
|
|
1311
|
+
relationships: {
|
|
1312
|
+
[llamaindex.NodeRelationship.SOURCE]: {
|
|
1313
|
+
nodeId: `doc-${i}`,
|
|
1314
|
+
nodeType: llamaindex.ObjectType.DOCUMENT,
|
|
1315
|
+
metadata: doc.metadata
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
})
|
|
1319
|
+
);
|
|
1308
1320
|
}
|
|
1309
1321
|
const pipeline = new llamaindex.IngestionPipeline({
|
|
1310
1322
|
transformations
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Document, SummaryExtractor, QuestionsAnsweredExtractor, KeywordExtractor, TitleExtractor, IngestionPipeline } from 'llamaindex';
|
|
1
|
+
import { Document, SummaryExtractor, QuestionsAnsweredExtractor, KeywordExtractor, TitleExtractor, ObjectType, NodeRelationship, IngestionPipeline } from 'llamaindex';
|
|
2
2
|
import { parse } from 'node-html-better-parser';
|
|
3
3
|
import { encodingForModel, getEncoding } from 'js-tiktoken';
|
|
4
4
|
import { CohereRelevanceScorer, MastraAgentRelevanceScorer } from '@mastra/core/relevance';
|
|
@@ -1303,6 +1303,18 @@ var MDocument = class _MDocument {
|
|
|
1303
1303
|
}
|
|
1304
1304
|
if (typeof title !== "undefined") {
|
|
1305
1305
|
transformations.push(new TitleExtractor(typeof title === "boolean" ? {} : title));
|
|
1306
|
+
this.chunks = this.chunks.map(
|
|
1307
|
+
(doc, i) => new Document({
|
|
1308
|
+
...doc,
|
|
1309
|
+
relationships: {
|
|
1310
|
+
[NodeRelationship.SOURCE]: {
|
|
1311
|
+
nodeId: `doc-${i}`,
|
|
1312
|
+
nodeType: ObjectType.DOCUMENT,
|
|
1313
|
+
metadata: doc.metadata
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
})
|
|
1317
|
+
);
|
|
1306
1318
|
}
|
|
1307
1319
|
const pipeline = new IngestionPipeline({
|
|
1308
1320
|
transformations
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/rag",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.15-alpha.1",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"node-html-better-parser": "^1.4.7",
|
|
30
30
|
"pathe": "^2.0.3",
|
|
31
31
|
"zod": "^3.24.2",
|
|
32
|
-
"@mastra/core": "^0.
|
|
32
|
+
"@mastra/core": "^0.8.0-alpha.1"
|
|
33
33
|
},
|
|
34
34
|
"peerDependencies": {
|
|
35
35
|
"ai": "^4.0.0"
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
"@ai-sdk/openai": "latest",
|
|
40
40
|
"@microsoft/api-extractor": "^7.52.1",
|
|
41
41
|
"@types/node": "^20.17.27",
|
|
42
|
-
"ai": "^4.
|
|
42
|
+
"ai": "^4.2.2",
|
|
43
43
|
"dotenv": "^16.4.7",
|
|
44
44
|
"eslint": "^9.23.0",
|
|
45
45
|
"tsup": "^8.4.0",
|
|
@@ -1638,6 +1638,79 @@ describe('MDocument', () => {
|
|
|
1638
1638
|
expect(docs?.[0]?.text).toContain('# Title');
|
|
1639
1639
|
});
|
|
1640
1640
|
});
|
|
1641
|
+
|
|
1642
|
+
describe('metadata extraction', () => {
|
|
1643
|
+
it('should extract metadata with default settings', async () => {
|
|
1644
|
+
const doc = MDocument.fromMarkdown(
|
|
1645
|
+
'# AI and Machine Learning\n\nThis is a test document about artificial intelligence and machine learning.',
|
|
1646
|
+
);
|
|
1647
|
+
|
|
1648
|
+
const chunks = await doc.chunk({
|
|
1649
|
+
strategy: 'markdown',
|
|
1650
|
+
extract: {
|
|
1651
|
+
title: true,
|
|
1652
|
+
summary: true,
|
|
1653
|
+
keywords: true,
|
|
1654
|
+
},
|
|
1655
|
+
});
|
|
1656
|
+
|
|
1657
|
+
const metadata = chunks[0].metadata;
|
|
1658
|
+
expect(metadata).toBeDefined();
|
|
1659
|
+
expect(metadata.documentTitle).toBeDefined();
|
|
1660
|
+
expect(metadata.sectionSummary).toBeDefined();
|
|
1661
|
+
expect(metadata.excerptKeywords).toMatch(/^KEYWORDS: .*/);
|
|
1662
|
+
}, 15000);
|
|
1663
|
+
|
|
1664
|
+
it('should extract metadata with custom settings', async () => {
|
|
1665
|
+
const doc = MDocument.fromMarkdown(
|
|
1666
|
+
'# AI and Machine Learning\n\nThis is a test document about artificial intelligence and machine learning.',
|
|
1667
|
+
);
|
|
1668
|
+
|
|
1669
|
+
const chunks = await doc.chunk({
|
|
1670
|
+
strategy: 'markdown',
|
|
1671
|
+
extract: {
|
|
1672
|
+
title: {
|
|
1673
|
+
nodes: 2,
|
|
1674
|
+
nodeTemplate: 'Generate a title for this: {context}',
|
|
1675
|
+
combineTemplate: 'Combine these titles: {context}',
|
|
1676
|
+
},
|
|
1677
|
+
summary: {
|
|
1678
|
+
summaries: ['self'],
|
|
1679
|
+
promptTemplate: 'Summarize this: {context}',
|
|
1680
|
+
},
|
|
1681
|
+
questions: {
|
|
1682
|
+
questions: 2,
|
|
1683
|
+
promptTemplate: 'Generate {numQuestions} questions about: {context}',
|
|
1684
|
+
},
|
|
1685
|
+
keywords: {
|
|
1686
|
+
keywords: 3,
|
|
1687
|
+
promptTemplate: 'Extract {maxKeywords} key terms from: {context}',
|
|
1688
|
+
},
|
|
1689
|
+
},
|
|
1690
|
+
});
|
|
1691
|
+
|
|
1692
|
+
const metadata = chunks[0].metadata;
|
|
1693
|
+
expect(metadata).toBeDefined();
|
|
1694
|
+
expect(metadata.documentTitle).toBeDefined();
|
|
1695
|
+
expect(metadata.sectionSummary).toBeDefined();
|
|
1696
|
+
expect(metadata.questionsThisExcerptCanAnswer).toMatch(/^1\. .*\?2\. .*\?$/);
|
|
1697
|
+
expect(metadata.excerptKeywords).toMatch(/^1\. .*\n2\. .*\n3\. .*$/);
|
|
1698
|
+
}, 15000);
|
|
1699
|
+
|
|
1700
|
+
it('should handle invalid summary types', async () => {
|
|
1701
|
+
const doc = MDocument.fromText('Test document');
|
|
1702
|
+
|
|
1703
|
+
await expect(
|
|
1704
|
+
doc.chunk({
|
|
1705
|
+
extract: {
|
|
1706
|
+
summary: {
|
|
1707
|
+
summaries: ['invalid'],
|
|
1708
|
+
},
|
|
1709
|
+
},
|
|
1710
|
+
}),
|
|
1711
|
+
).rejects.toThrow("Summaries must be one of 'self', 'prev', 'next'");
|
|
1712
|
+
}, 15000);
|
|
1713
|
+
});
|
|
1641
1714
|
});
|
|
1642
1715
|
|
|
1643
1716
|
// Helper function to find the longest common substring between two strings
|
package/src/document/document.ts
CHANGED
|
@@ -5,6 +5,8 @@ import {
|
|
|
5
5
|
QuestionsAnsweredExtractor,
|
|
6
6
|
SummaryExtractor,
|
|
7
7
|
TitleExtractor,
|
|
8
|
+
ObjectType,
|
|
9
|
+
NodeRelationship,
|
|
8
10
|
} from 'llamaindex';
|
|
9
11
|
|
|
10
12
|
import { CharacterTransformer, RecursiveCharacterTransformer } from './transformers/character';
|
|
@@ -43,6 +45,19 @@ export class MDocument {
|
|
|
43
45
|
|
|
44
46
|
if (typeof title !== 'undefined') {
|
|
45
47
|
transformations.push(new TitleExtractor(typeof title === 'boolean' ? {} : title));
|
|
48
|
+
this.chunks = this.chunks.map(
|
|
49
|
+
(doc, i) =>
|
|
50
|
+
new Chunk({
|
|
51
|
+
...doc,
|
|
52
|
+
relationships: {
|
|
53
|
+
[NodeRelationship.SOURCE]: {
|
|
54
|
+
nodeId: `doc-${i}`,
|
|
55
|
+
nodeType: ObjectType.DOCUMENT,
|
|
56
|
+
metadata: doc.metadata,
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
}),
|
|
60
|
+
);
|
|
46
61
|
}
|
|
47
62
|
|
|
48
63
|
const pipeline = new IngestionPipeline({
|
package/src/document/types.ts
CHANGED
|
@@ -41,7 +41,7 @@ export type ExtractParams = {
|
|
|
41
41
|
title?: TitleExtractorsArgs | boolean;
|
|
42
42
|
summary?: SummaryExtractArgs | boolean;
|
|
43
43
|
questions?: QuestionAnswerExtractArgs | boolean;
|
|
44
|
-
keywords?:
|
|
44
|
+
keywords?: KeywordExtractArgs | boolean;
|
|
45
45
|
};
|
|
46
46
|
|
|
47
47
|
export type ChunkOptions = {
|