@mastra/rag 1.0.8 → 1.0.9-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +22 -0
- package/dist/document/extractors/keywords.d.ts.map +1 -1
- package/dist/document/extractors/questions.d.ts.map +1 -1
- package/dist/document/extractors/summary.d.ts.map +1 -1
- package/dist/document/extractors/title.d.ts.map +1 -1
- package/dist/index.cjs +105 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +105 -99
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
- package/src/document/document.test.ts +1 -1
- package/src/document/extractors/keywords.test.ts +2 -2
- package/src/document/extractors/keywords.ts +35 -20
- package/src/document/extractors/questions.test.ts +1 -1
- package/src/document/extractors/questions.ts +21 -18
- package/src/document/extractors/summary.ts +16 -12
- package/src/document/extractors/title.test.ts +1 -1
- package/src/document/extractors/title.ts +53 -41
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/rag",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.9-alpha.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"typescript": "^5.8.3",
|
|
47
47
|
"vitest": "^3.2.4",
|
|
48
48
|
"@internal/lint": "0.0.29",
|
|
49
|
-
"@
|
|
50
|
-
"@
|
|
49
|
+
"@internal/types-builder": "0.0.4",
|
|
50
|
+
"@mastra/core": "0.14.0-alpha.4"
|
|
51
51
|
},
|
|
52
52
|
"keywords": [
|
|
53
53
|
"rag",
|
|
@@ -20,7 +20,7 @@ const openai = createOpenAI({
|
|
|
20
20
|
apiKey: process.env.OPENAI_API_KEY,
|
|
21
21
|
});
|
|
22
22
|
|
|
23
|
-
vi.setConfig({ testTimeout:
|
|
23
|
+
vi.setConfig({ testTimeout: 100_000, hookTimeout: 100_000 });
|
|
24
24
|
|
|
25
25
|
describe('MDocument', () => {
|
|
26
26
|
describe('basics', () => {
|
|
@@ -9,7 +9,7 @@ const openai = createOpenAI({
|
|
|
9
9
|
|
|
10
10
|
const model = openai('gpt-4o');
|
|
11
11
|
|
|
12
|
-
vi.setConfig({ testTimeout:
|
|
12
|
+
vi.setConfig({ testTimeout: 50_000, hookTimeout: 50_000 });
|
|
13
13
|
|
|
14
14
|
describe('KeywordExtractor', () => {
|
|
15
15
|
it('can use a custom model for keywords extraction', async () => {
|
|
@@ -49,7 +49,7 @@ describe('KeywordExtractor', () => {
|
|
|
49
49
|
it(
|
|
50
50
|
'handles very long input',
|
|
51
51
|
{
|
|
52
|
-
timeout:
|
|
52
|
+
timeout: 60_000,
|
|
53
53
|
},
|
|
54
54
|
async () => {
|
|
55
55
|
const extractor = new KeywordExtractor();
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Agent } from '@mastra/core/agent';
|
|
1
2
|
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
2
3
|
import { defaultKeywordExtractPrompt, PromptTemplate } from '../prompts';
|
|
3
4
|
import type { KeywordExtractPrompt } from '../prompts';
|
|
@@ -64,33 +65,47 @@ export class KeywordExtractor extends BaseExtractor {
|
|
|
64
65
|
|
|
65
66
|
let keywords = '';
|
|
66
67
|
try {
|
|
67
|
-
const
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
68
|
+
const miniAgent = new Agent({
|
|
69
|
+
model: this.llm,
|
|
70
|
+
name: 'keyword-extractor',
|
|
71
|
+
instructions:
|
|
72
|
+
'You are a keyword extractor. You are given a node and you need to extract the keywords from the node.',
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
if (this.llm.specificationVersion === 'v2') {
|
|
76
|
+
const result = await miniAgent.generateVNext(
|
|
77
|
+
[
|
|
78
|
+
{
|
|
79
|
+
role: 'user',
|
|
80
|
+
content: this.promptTemplate.format({
|
|
81
|
+
context: node.getContent(),
|
|
82
|
+
maxKeywords: this.keywords.toString(),
|
|
83
|
+
}),
|
|
84
|
+
},
|
|
85
|
+
],
|
|
86
|
+
{ format: 'mastra' },
|
|
87
|
+
);
|
|
88
|
+
keywords = result.text;
|
|
89
|
+
} else {
|
|
90
|
+
const result = await miniAgent.generate([
|
|
71
91
|
{
|
|
72
92
|
role: 'user',
|
|
73
|
-
content:
|
|
74
|
-
{
|
|
75
|
-
type: 'text',
|
|
76
|
-
text: this.promptTemplate.format({
|
|
77
|
-
context: node.getContent(),
|
|
78
|
-
maxKeywords: this.keywords.toString(),
|
|
79
|
-
}),
|
|
80
|
-
},
|
|
81
|
-
],
|
|
93
|
+
content: this.promptTemplate.format({ context: node.getContent(), maxKeywords: this.keywords.toString() }),
|
|
82
94
|
},
|
|
83
|
-
]
|
|
84
|
-
|
|
85
|
-
if (typeof completion.text === 'string') {
|
|
86
|
-
keywords = completion.text.trim();
|
|
87
|
-
} else {
|
|
88
|
-
console.warn('Keyword extraction LLM output was not a string:', completion.text);
|
|
95
|
+
]);
|
|
96
|
+
keywords = result.text;
|
|
89
97
|
}
|
|
98
|
+
|
|
99
|
+
if (!keywords) {
|
|
100
|
+
console.warn('Keyword extraction LLM output returned empty');
|
|
101
|
+
return { excerptKeywords: '' };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return { excerptKeywords: keywords.trim() };
|
|
90
105
|
} catch (err) {
|
|
91
106
|
console.warn('Keyword extraction failed:', err);
|
|
107
|
+
return { excerptKeywords: '' };
|
|
92
108
|
}
|
|
93
|
-
return { excerptKeywords: keywords };
|
|
94
109
|
}
|
|
95
110
|
|
|
96
111
|
/**
|
|
@@ -9,7 +9,7 @@ const openai = createOpenAI({
|
|
|
9
9
|
|
|
10
10
|
const model = openai('gpt-4o');
|
|
11
11
|
|
|
12
|
-
vi.setConfig({ testTimeout:
|
|
12
|
+
vi.setConfig({ testTimeout: 100_000, hookTimeout: 100_000 });
|
|
13
13
|
|
|
14
14
|
describe('QuestionsAnsweredExtractor', () => {
|
|
15
15
|
it('can use a custom model for questions extraction', async () => {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Agent } from '@mastra/core/agent';
|
|
1
2
|
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
2
3
|
import { PromptTemplate, defaultQuestionExtractPrompt } from '../prompts';
|
|
3
4
|
import type { QuestionExtractPrompt } from '../prompts';
|
|
@@ -69,27 +70,29 @@ export class QuestionsAnsweredExtractor extends BaseExtractor {
|
|
|
69
70
|
numQuestions: this.questions.toString(),
|
|
70
71
|
});
|
|
71
72
|
|
|
72
|
-
const
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
role: 'user',
|
|
78
|
-
content: [{ type: 'text', text: prompt }],
|
|
79
|
-
},
|
|
80
|
-
],
|
|
73
|
+
const miniAgent = new Agent({
|
|
74
|
+
model: this.llm,
|
|
75
|
+
name: 'question-extractor',
|
|
76
|
+
instructions:
|
|
77
|
+
'You are a question extractor. You are given a node and you need to extract the questions from the node.',
|
|
81
78
|
});
|
|
82
79
|
|
|
83
|
-
let
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
} catch (err) {
|
|
91
|
-
console.warn('Question extraction failed:', err);
|
|
80
|
+
let questionsText = '';
|
|
81
|
+
if (this.llm.specificationVersion === 'v2') {
|
|
82
|
+
const result = await miniAgent.generateVNext([{ role: 'user', content: prompt }], { format: 'mastra' });
|
|
83
|
+
questionsText = result.text;
|
|
84
|
+
} else {
|
|
85
|
+
const result = await miniAgent.generate([{ role: 'user', content: prompt }]);
|
|
86
|
+
questionsText = result.text;
|
|
92
87
|
}
|
|
88
|
+
|
|
89
|
+
if (!questionsText) {
|
|
90
|
+
console.warn('Question extraction LLM output returned empty');
|
|
91
|
+
return { questionsThisExcerptCanAnswer: '' };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const result = questionsText.replace(STRIP_REGEX, '').trim();
|
|
95
|
+
|
|
93
96
|
return {
|
|
94
97
|
questionsThisExcerptCanAnswer: result,
|
|
95
98
|
};
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Agent } from '@mastra/core/agent';
|
|
1
2
|
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
2
3
|
import { PromptTemplate, defaultSummaryPrompt } from '../prompts';
|
|
3
4
|
import type { SummaryPrompt } from '../prompts';
|
|
@@ -68,22 +69,25 @@ export class SummaryExtractor extends BaseExtractor {
|
|
|
68
69
|
context,
|
|
69
70
|
});
|
|
70
71
|
|
|
71
|
-
const
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
role: 'user',
|
|
77
|
-
content: [{ type: 'text', text: prompt }],
|
|
78
|
-
},
|
|
79
|
-
],
|
|
72
|
+
const miniAgent = new Agent({
|
|
73
|
+
model: this.llm,
|
|
74
|
+
name: 'summary-extractor',
|
|
75
|
+
instructions:
|
|
76
|
+
'You are a summary extractor. You are given a node and you need to extract the summary from the node.',
|
|
80
77
|
});
|
|
81
78
|
|
|
82
79
|
let summary = '';
|
|
83
|
-
if (
|
|
84
|
-
|
|
80
|
+
if (this.llm.specificationVersion === 'v2') {
|
|
81
|
+
const result = await miniAgent.generateVNext([{ role: 'user', content: prompt }], { format: 'mastra' });
|
|
82
|
+
summary = result.text;
|
|
85
83
|
} else {
|
|
86
|
-
|
|
84
|
+
const result = await miniAgent.generate([{ role: 'user', content: prompt }]);
|
|
85
|
+
summary = result.text;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (!summary) {
|
|
89
|
+
console.warn('Summary extraction LLM output returned empty');
|
|
90
|
+
return '';
|
|
87
91
|
}
|
|
88
92
|
|
|
89
93
|
return summary.replace(STRIP_REGEX, '');
|
|
@@ -9,7 +9,7 @@ const openai = createOpenAI({
|
|
|
9
9
|
|
|
10
10
|
const model = openai('gpt-4o');
|
|
11
11
|
|
|
12
|
-
vi.setConfig({ testTimeout:
|
|
12
|
+
vi.setConfig({ testTimeout: 50_000, hookTimeout: 50_000 });
|
|
13
13
|
|
|
14
14
|
describe('TitleExtractor', () => {
|
|
15
15
|
it('can use a custom model from the test suite', async () => {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Agent } from '@mastra/core/agent';
|
|
1
2
|
import type { MastraLanguageModel } from '@mastra/core/agent';
|
|
2
3
|
import { defaultTitleCombinePromptTemplate, defaultTitleExtractorPromptTemplate, PromptTemplate } from '../prompts';
|
|
3
4
|
import type { TitleCombinePrompt, TitleExtractorPrompt } from '../prompts';
|
|
@@ -110,30 +111,38 @@ export class TitleExtractor extends BaseExtractor {
|
|
|
110
111
|
for (const [key, nodes] of Object.entries(nodesByDocument)) {
|
|
111
112
|
const titleCandidates = await this.getTitlesCandidates(nodes);
|
|
112
113
|
const combinedTitles = titleCandidates.join(', ');
|
|
113
|
-
const completion = await this.llm.doGenerate({
|
|
114
|
-
inputFormat: 'messages',
|
|
115
|
-
mode: { type: 'regular' },
|
|
116
|
-
prompt: [
|
|
117
|
-
{
|
|
118
|
-
role: 'user',
|
|
119
|
-
content: [
|
|
120
|
-
{
|
|
121
|
-
type: 'text',
|
|
122
|
-
text: this.combineTemplate.format({
|
|
123
|
-
context: combinedTitles,
|
|
124
|
-
}),
|
|
125
|
-
},
|
|
126
|
-
],
|
|
127
|
-
},
|
|
128
|
-
],
|
|
129
|
-
});
|
|
130
114
|
|
|
131
115
|
let title = '';
|
|
132
|
-
|
|
133
|
-
|
|
116
|
+
|
|
117
|
+
if (this.llm.specificationVersion === 'v2') {
|
|
118
|
+
const miniAgent = new Agent({
|
|
119
|
+
model: this.llm,
|
|
120
|
+
name: 'title-extractor',
|
|
121
|
+
instructions:
|
|
122
|
+
'You are a title extractor. You are given a list of nodes and you need to extract the title from the nodes.',
|
|
123
|
+
});
|
|
124
|
+
const result = await miniAgent.generateVNext(
|
|
125
|
+
[{ role: 'user', content: this.combineTemplate.format({ context: combinedTitles }) }],
|
|
126
|
+
{ format: 'mastra' },
|
|
127
|
+
);
|
|
128
|
+
title = result.text;
|
|
134
129
|
} else {
|
|
135
|
-
|
|
130
|
+
const miniAgent = new Agent({
|
|
131
|
+
model: this.llm,
|
|
132
|
+
name: 'title-extractor',
|
|
133
|
+
instructions:
|
|
134
|
+
'You are a title extractor. You are given a list of nodes and you need to extract the title from the nodes.',
|
|
135
|
+
});
|
|
136
|
+
const result = await miniAgent.generate([
|
|
137
|
+
{ role: 'user', content: this.combineTemplate.format({ context: combinedTitles }) },
|
|
138
|
+
]);
|
|
139
|
+
title = result.text;
|
|
136
140
|
}
|
|
141
|
+
|
|
142
|
+
if (!title) {
|
|
143
|
+
console.warn('Title extraction LLM output returned empty');
|
|
144
|
+
}
|
|
145
|
+
|
|
137
146
|
titlesByDocument[key] = title;
|
|
138
147
|
}
|
|
139
148
|
|
|
@@ -141,31 +150,34 @@ export class TitleExtractor extends BaseExtractor {
|
|
|
141
150
|
}
|
|
142
151
|
|
|
143
152
|
private async getTitlesCandidates(nodes: BaseNode[]): Promise<string[]> {
|
|
153
|
+
const miniAgent = new Agent({
|
|
154
|
+
model: this.llm,
|
|
155
|
+
name: 'titles-candidates-extractor',
|
|
156
|
+
instructions:
|
|
157
|
+
'You are a titles candidates extractor. You are given a list of nodes and you need to extract the titles candidates from the nodes.',
|
|
158
|
+
});
|
|
159
|
+
|
|
144
160
|
const titleJobs = nodes.map(async node => {
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
{
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
{
|
|
153
|
-
type: 'text',
|
|
154
|
-
text: this.nodeTemplate.format({
|
|
155
|
-
context: node.getContent(),
|
|
156
|
-
}),
|
|
157
|
-
},
|
|
158
|
-
],
|
|
159
|
-
},
|
|
160
|
-
],
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
if (typeof completion.text === 'string') {
|
|
164
|
-
return completion.text.trim();
|
|
161
|
+
let completion: string;
|
|
162
|
+
if (this.llm.specificationVersion === 'v2') {
|
|
163
|
+
const result = await miniAgent.generateVNext(
|
|
164
|
+
[{ role: 'user', content: this.nodeTemplate.format({ context: node.getContent() }) }],
|
|
165
|
+
{ format: 'mastra' },
|
|
166
|
+
);
|
|
167
|
+
completion = result.text;
|
|
165
168
|
} else {
|
|
166
|
-
|
|
169
|
+
const result = await miniAgent.generate([
|
|
170
|
+
{ role: 'user', content: this.nodeTemplate.format({ context: node.getContent() }) },
|
|
171
|
+
]);
|
|
172
|
+
completion = result.text;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (!completion) {
|
|
176
|
+
console.warn('Title candidate extraction LLM output returned empty');
|
|
167
177
|
return '';
|
|
168
178
|
}
|
|
179
|
+
|
|
180
|
+
return completion.trim();
|
|
169
181
|
});
|
|
170
182
|
|
|
171
183
|
return await Promise.all(titleJobs);
|