@mastra/rag 1.2.2 → 1.2.3-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/dist/index.cjs +25 -9
  3. package/dist/index.cjs.map +1 -1
  4. package/dist/index.js +25 -9
  5. package/dist/index.js.map +1 -1
  6. package/dist/tools/graph-rag.d.ts.map +1 -1
  7. package/dist/tools/types.d.ts +18 -5
  8. package/dist/tools/types.d.ts.map +1 -1
  9. package/dist/tools/vector-query.d.ts.map +1 -1
  10. package/dist/utils/vector-search.d.ts +6 -7
  11. package/dist/utils/vector-search.d.ts.map +1 -1
  12. package/package.json +19 -6
  13. package/.turbo/turbo-build.log +0 -4
  14. package/docker-compose.yaml +0 -22
  15. package/eslint.config.js +0 -6
  16. package/src/document/document.test.ts +0 -2975
  17. package/src/document/document.ts +0 -335
  18. package/src/document/extractors/base.ts +0 -30
  19. package/src/document/extractors/index.ts +0 -5
  20. package/src/document/extractors/keywords.test.ts +0 -125
  21. package/src/document/extractors/keywords.ts +0 -126
  22. package/src/document/extractors/questions.test.ts +0 -120
  23. package/src/document/extractors/questions.ts +0 -111
  24. package/src/document/extractors/summary.test.ts +0 -107
  25. package/src/document/extractors/summary.ts +0 -122
  26. package/src/document/extractors/title.test.ts +0 -121
  27. package/src/document/extractors/title.ts +0 -185
  28. package/src/document/extractors/types.ts +0 -40
  29. package/src/document/index.ts +0 -2
  30. package/src/document/prompts/base.ts +0 -77
  31. package/src/document/prompts/format.ts +0 -9
  32. package/src/document/prompts/index.ts +0 -15
  33. package/src/document/prompts/prompt.ts +0 -60
  34. package/src/document/prompts/types.ts +0 -29
  35. package/src/document/schema/index.ts +0 -3
  36. package/src/document/schema/node.ts +0 -187
  37. package/src/document/schema/types.ts +0 -40
  38. package/src/document/transformers/character.ts +0 -267
  39. package/src/document/transformers/html.ts +0 -346
  40. package/src/document/transformers/json.ts +0 -536
  41. package/src/document/transformers/latex.ts +0 -11
  42. package/src/document/transformers/markdown.ts +0 -239
  43. package/src/document/transformers/semantic-markdown.ts +0 -227
  44. package/src/document/transformers/sentence.ts +0 -314
  45. package/src/document/transformers/text.ts +0 -158
  46. package/src/document/transformers/token.ts +0 -137
  47. package/src/document/transformers/transformer.ts +0 -5
  48. package/src/document/types.ts +0 -145
  49. package/src/document/validation.ts +0 -158
  50. package/src/graph-rag/index.test.ts +0 -235
  51. package/src/graph-rag/index.ts +0 -306
  52. package/src/index.ts +0 -8
  53. package/src/rerank/index.test.ts +0 -150
  54. package/src/rerank/index.ts +0 -198
  55. package/src/rerank/relevance/cohere/index.ts +0 -56
  56. package/src/rerank/relevance/index.ts +0 -3
  57. package/src/rerank/relevance/mastra-agent/index.ts +0 -32
  58. package/src/rerank/relevance/zeroentropy/index.ts +0 -26
  59. package/src/tools/README.md +0 -153
  60. package/src/tools/document-chunker.ts +0 -34
  61. package/src/tools/graph-rag.test.ts +0 -115
  62. package/src/tools/graph-rag.ts +0 -154
  63. package/src/tools/index.ts +0 -3
  64. package/src/tools/types.ts +0 -110
  65. package/src/tools/vector-query-database-config.test.ts +0 -190
  66. package/src/tools/vector-query.test.ts +0 -418
  67. package/src/tools/vector-query.ts +0 -169
  68. package/src/utils/convert-sources.ts +0 -43
  69. package/src/utils/default-settings.ts +0 -38
  70. package/src/utils/index.ts +0 -3
  71. package/src/utils/tool-schemas.ts +0 -38
  72. package/src/utils/vector-prompts.ts +0 -832
  73. package/src/utils/vector-search.ts +0 -117
  74. package/tsconfig.build.json +0 -9
  75. package/tsconfig.json +0 -5
  76. package/tsup.config.ts +0 -17
  77. package/vitest.config.ts +0 -8
@@ -1,335 +0,0 @@
1
- import { TitleExtractor, SummaryExtractor, QuestionsAnsweredExtractor, KeywordExtractor } from './extractors';
2
- import type { BaseNode } from './schema';
3
- import { Document as Chunk, NodeRelationship, ObjectType } from './schema';
4
-
5
- import { CharacterTransformer, RecursiveCharacterTransformer } from './transformers/character';
6
- import { HTMLHeaderTransformer, HTMLSectionTransformer } from './transformers/html';
7
- import { RecursiveJsonTransformer } from './transformers/json';
8
- import { LatexTransformer } from './transformers/latex';
9
- import { MarkdownHeaderTransformer, MarkdownTransformer } from './transformers/markdown';
10
- import { SemanticMarkdownTransformer } from './transformers/semantic-markdown';
11
- import { SentenceTransformer } from './transformers/sentence';
12
- import { TokenTransformer } from './transformers/token';
13
- import type {
14
- ChunkParams,
15
- ChunkStrategy,
16
- ExtractParams,
17
- HTMLChunkOptions,
18
- RecursiveChunkOptions,
19
- CharacterChunkOptions,
20
- TokenChunkOptions,
21
- MarkdownChunkOptions,
22
- SemanticMarkdownChunkOptions,
23
- JsonChunkOptions,
24
- LatexChunkOptions,
25
- SentenceChunkOptions,
26
- StrategyOptions,
27
- } from './types';
28
- import { validateChunkParams } from './validation';
29
-
30
- export class MDocument {
31
- private chunks: Chunk[];
32
- private type: string; // e.g., 'text', 'html', 'markdown', 'json'
33
-
34
- constructor({ docs, type }: { docs: { text: string; metadata?: Record<string, any> }[]; type: string }) {
35
- this.chunks = docs.map(d => {
36
- return new Chunk({ text: d.text, metadata: d.metadata });
37
- });
38
- this.type = type;
39
- }
40
-
41
- async extractMetadata({ title, summary, questions, keywords }: ExtractParams): Promise<MDocument> {
42
- const transformations = [];
43
-
44
- if (typeof summary !== 'undefined') {
45
- transformations.push(new SummaryExtractor(typeof summary === 'boolean' ? {} : summary));
46
- }
47
-
48
- if (typeof questions !== 'undefined') {
49
- transformations.push(new QuestionsAnsweredExtractor(typeof questions === 'boolean' ? {} : questions));
50
- }
51
-
52
- if (typeof keywords !== 'undefined') {
53
- transformations.push(new KeywordExtractor(typeof keywords === 'boolean' ? {} : keywords));
54
- }
55
-
56
- if (typeof title !== 'undefined') {
57
- transformations.push(new TitleExtractor(typeof title === 'boolean' ? {} : title));
58
- this.chunks = this.chunks.map(doc =>
59
- doc?.metadata?.docId
60
- ? new Chunk({
61
- ...doc,
62
- relationships: {
63
- [NodeRelationship.SOURCE]: {
64
- nodeId: doc.metadata.docId,
65
- nodeType: ObjectType.DOCUMENT,
66
- metadata: doc.metadata,
67
- },
68
- },
69
- })
70
- : doc,
71
- );
72
- }
73
-
74
- let nodes: BaseNode[] = this.chunks;
75
- for (const extractor of transformations) {
76
- nodes = await extractor.processNodes(nodes);
77
- }
78
-
79
- this.chunks = this.chunks.map((doc, i) => {
80
- return new Chunk({
81
- text: doc.text,
82
- metadata: {
83
- ...doc.metadata,
84
- ...(nodes?.[i]?.metadata || {}),
85
- },
86
- });
87
- });
88
-
89
- return this;
90
- }
91
-
92
- static fromText(text: string, metadata?: Record<string, any>): MDocument {
93
- return new MDocument({
94
- docs: [
95
- {
96
- text,
97
- metadata,
98
- },
99
- ],
100
- type: 'text',
101
- });
102
- }
103
-
104
- static fromHTML(html: string, metadata?: Record<string, any>): MDocument {
105
- return new MDocument({
106
- docs: [
107
- {
108
- text: html,
109
- metadata,
110
- },
111
- ],
112
- type: 'html',
113
- });
114
- }
115
-
116
- static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument {
117
- return new MDocument({
118
- docs: [
119
- {
120
- text: markdown,
121
- metadata,
122
- },
123
- ],
124
- type: 'markdown',
125
- });
126
- }
127
-
128
- static fromJSON(jsonString: string, metadata?: Record<string, any>): MDocument {
129
- return new MDocument({
130
- docs: [
131
- {
132
- text: jsonString,
133
- metadata,
134
- },
135
- ],
136
- type: 'json',
137
- });
138
- }
139
-
140
- private defaultStrategy(): ChunkStrategy {
141
- switch (this.type) {
142
- case 'html':
143
- return 'html';
144
- case 'markdown':
145
- return 'markdown';
146
- case 'json':
147
- return 'json';
148
- case 'latex':
149
- return 'latex';
150
- default:
151
- return 'recursive';
152
- }
153
- }
154
-
155
- private _strategyMap?: { [S in ChunkStrategy]: (options?: StrategyOptions[S]) => Promise<void> };
156
-
157
- private get strategyMap() {
158
- if (!this._strategyMap) {
159
- this._strategyMap = {
160
- recursive: options => this.chunkRecursive(options),
161
- character: options => this.chunkCharacter(options),
162
- token: options => this.chunkToken(options),
163
- markdown: options => this.chunkMarkdown(options),
164
- html: options => this.chunkHTML(options),
165
- json: options => this.chunkJSON(options),
166
- latex: options => this.chunkLatex(options),
167
- sentence: options => this.chunkSentence(options),
168
- 'semantic-markdown': options => this.chunkSemanticMarkdown(options),
169
- };
170
- }
171
- return this._strategyMap;
172
- }
173
-
174
- private async chunkBy<K extends ChunkStrategy>(strategy: K, options?: StrategyOptions[K]): Promise<void> {
175
- const chunkingFunc = this.strategyMap[strategy];
176
- if (chunkingFunc) {
177
- await chunkingFunc(options);
178
- } else {
179
- throw new Error(`Unknown strategy: ${strategy}`);
180
- }
181
- }
182
-
183
- async chunkRecursive(options?: RecursiveChunkOptions): Promise<void> {
184
- if (options?.language) {
185
- const rt = RecursiveCharacterTransformer.fromLanguage(options.language, options);
186
- const textSplit = rt.transformDocuments(this.chunks);
187
- this.chunks = textSplit;
188
- return;
189
- }
190
-
191
- const rt = new RecursiveCharacterTransformer(options);
192
- const textSplit = rt.transformDocuments(this.chunks);
193
- this.chunks = textSplit;
194
- }
195
-
196
- async chunkCharacter(options?: CharacterChunkOptions): Promise<void> {
197
- const rt = new CharacterTransformer({
198
- ...options,
199
- separator: options?.separator,
200
- isSeparatorRegex: options?.isSeparatorRegex,
201
- });
202
- const textSplit = rt.transformDocuments(this.chunks);
203
- this.chunks = textSplit;
204
- }
205
-
206
- async chunkHTML(options?: HTMLChunkOptions): Promise<void> {
207
- if (options?.headers?.length) {
208
- const rt = new HTMLHeaderTransformer(options as HTMLChunkOptions & { headers: [string, string][] });
209
-
210
- const textSplit = rt.transformDocuments(this.chunks);
211
- this.chunks = textSplit;
212
- return;
213
- }
214
-
215
- if (options?.sections?.length) {
216
- const rt = new HTMLSectionTransformer(options as HTMLChunkOptions & { sections: [string, string][] });
217
-
218
- const textSplit = rt.transformDocuments(this.chunks);
219
- this.chunks = textSplit;
220
- return;
221
- }
222
-
223
- throw new Error('HTML chunking requires either headers or sections to be specified');
224
- }
225
-
226
- async chunkJSON(options?: JsonChunkOptions): Promise<void> {
227
- if (!options?.maxSize) {
228
- throw new Error('JSON chunking requires maxSize to be specified');
229
- }
230
-
231
- const rt = new RecursiveJsonTransformer({
232
- maxSize: options?.maxSize,
233
- minSize: options?.minSize,
234
- });
235
-
236
- const textSplit = rt.transformDocuments({
237
- documents: this.chunks,
238
- ensureAscii: options?.ensureAscii,
239
- convertLists: options?.convertLists,
240
- });
241
-
242
- this.chunks = textSplit;
243
- }
244
-
245
- async chunkLatex(options?: LatexChunkOptions): Promise<void> {
246
- const rt = new LatexTransformer(options);
247
- const textSplit = rt.transformDocuments(this.chunks);
248
- this.chunks = textSplit;
249
- }
250
-
251
- async chunkToken(options?: TokenChunkOptions): Promise<void> {
252
- const rt = TokenTransformer.fromTikToken({
253
- options,
254
- encodingName: options?.encodingName,
255
- modelName: options?.modelName,
256
- });
257
- const textSplit = rt.transformDocuments(this.chunks);
258
- this.chunks = textSplit;
259
- }
260
-
261
- async chunkMarkdown(options?: MarkdownChunkOptions): Promise<void> {
262
- if (options?.headers) {
263
- const rt = new MarkdownHeaderTransformer(options.headers, options?.returnEachLine, options?.stripHeaders);
264
- const textSplit = rt.transformDocuments(this.chunks);
265
- this.chunks = textSplit;
266
- return;
267
- }
268
-
269
- const rt = new MarkdownTransformer(options);
270
- const textSplit = rt.transformDocuments(this.chunks);
271
- this.chunks = textSplit;
272
- }
273
-
274
- async chunkSentence(options?: SentenceChunkOptions): Promise<void> {
275
- if (!options?.maxSize) {
276
- throw new Error('Sentence chunking requires maxSize to be specified');
277
- }
278
-
279
- const rt = new SentenceTransformer({
280
- minSize: options?.minSize,
281
- maxSize: options?.maxSize,
282
- targetSize: options?.targetSize,
283
- overlap: options?.overlap,
284
- sentenceEnders: options?.sentenceEnders,
285
- fallbackToWords: options?.fallbackToWords,
286
- fallbackToCharacters: options?.fallbackToCharacters,
287
- keepSeparator: options?.keepSeparator,
288
- lengthFunction: options?.lengthFunction,
289
- addStartIndex: options?.addStartIndex,
290
- stripWhitespace: options?.stripWhitespace,
291
- });
292
-
293
- const textSplit = rt.transformDocuments(this.chunks);
294
- this.chunks = textSplit;
295
- }
296
-
297
- async chunkSemanticMarkdown(options?: SemanticMarkdownChunkOptions): Promise<void> {
298
- const rt = SemanticMarkdownTransformer.fromTikToken({
299
- options,
300
- encodingName: options?.encodingName,
301
- modelName: options?.modelName,
302
- });
303
- const textSplit = rt.transformDocuments(this.chunks);
304
- this.chunks = textSplit;
305
- }
306
-
307
- async chunk(params?: ChunkParams): Promise<Chunk[]> {
308
- const { strategy: passedStrategy, extract, ...chunkOptions } = params || {};
309
- // Determine the default strategy based on type if not specified
310
- const strategy = passedStrategy || this.defaultStrategy();
311
-
312
- validateChunkParams(strategy, chunkOptions);
313
-
314
- // Apply the appropriate chunking strategy
315
- await this.chunkBy(strategy, chunkOptions);
316
-
317
- if (extract) {
318
- await this.extractMetadata(extract);
319
- }
320
-
321
- return this.chunks;
322
- }
323
-
324
- getDocs(): Chunk[] {
325
- return this.chunks;
326
- }
327
-
328
- getText(): string[] {
329
- return this.chunks.map(doc => doc.text);
330
- }
331
-
332
- getMetadata(): Record<string, any>[] {
333
- return this.chunks.map(doc => doc.metadata);
334
- }
335
- }
@@ -1,30 +0,0 @@
1
- import type { BaseNode } from '../schema';
2
-
3
- /*
4
- * Abstract class for all extractors.
5
- */
6
- export abstract class BaseExtractor {
7
- isTextNodeOnly: boolean = true;
8
-
9
- abstract extract(nodes: BaseNode[]): Promise<Record<string, any>[]>;
10
-
11
- /**
12
- *
13
- * @param nodes Nodes to extract metadata from.
14
- * @returns Metadata extracted from the nodes.
15
- */
16
- async processNodes(nodes: BaseNode[]): Promise<BaseNode[]> {
17
- let newNodes: BaseNode[] = nodes;
18
-
19
- const curMetadataList = await this.extract(newNodes);
20
-
21
- for (const idx in newNodes) {
22
- newNodes[idx]!.metadata = {
23
- ...newNodes[idx]!.metadata,
24
- ...curMetadataList[idx],
25
- };
26
- }
27
-
28
- return newNodes;
29
- }
30
- }
@@ -1,5 +0,0 @@
1
- export { TitleExtractor } from './title';
2
- export { SummaryExtractor } from './summary';
3
- export { QuestionsAnsweredExtractor } from './questions';
4
- export { KeywordExtractor } from './keywords';
5
- export type { KeywordExtractArgs, QuestionAnswerExtractArgs, SummaryExtractArgs, TitleExtractorsArgs } from './types';
@@ -1,125 +0,0 @@
1
- import { createOpenAI } from '@ai-sdk/openai';
2
- import { describe, it, expect, vi } from 'vitest';
3
- import { TextNode } from '../schema';
4
- import { KeywordExtractor } from './keywords';
5
-
6
- const openai = createOpenAI({
7
- apiKey: process.env.OPENAI_API_KEY,
8
- });
9
-
10
- const model = openai('gpt-4o');
11
-
12
- vi.setConfig({ testTimeout: 50_000, hookTimeout: 50_000 });
13
-
14
- describe('KeywordExtractor', () => {
15
- it('can use a custom model for keywords extraction', async () => {
16
- const extractor = new KeywordExtractor({ llm: model });
17
- const node = new TextNode({ text: 'The quick brown fox jumps over the lazy dog.' });
18
- const result = await extractor.extractKeywordsFromNodes(node);
19
- expect(result).toHaveProperty('excerptKeywords');
20
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
21
- });
22
-
23
- it('handles empty input gracefully', async () => {
24
- const extractor = new KeywordExtractor();
25
- const node = new TextNode({ text: '' });
26
- const result = await extractor.extractKeywordsFromNodes(node);
27
- expect(result.excerptKeywords).toBe('');
28
- });
29
-
30
- it('supports prompt customization', async () => {
31
- const extractor = new KeywordExtractor({
32
- promptTemplate: 'List keywords in: {context}. Limit to {maxKeywords}.',
33
- });
34
- const node = new TextNode({ text: 'Test document for prompt customization.' });
35
- const result = await extractor.extractKeywordsFromNodes(node);
36
- expect(result).toHaveProperty('excerptKeywords');
37
- expect(typeof result.excerptKeywords).toBe('string');
38
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
39
- });
40
-
41
- it('extracts keywords from text', async () => {
42
- const extractor = new KeywordExtractor();
43
- const node = new TextNode({ text: 'The quick brown fox jumps over the lazy dog.' });
44
- const result = await extractor.extractKeywordsFromNodes(node);
45
- expect(result).toHaveProperty('excerptKeywords');
46
- expect(typeof result.excerptKeywords).toBe('string');
47
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
48
- });
49
- it(
50
- 'handles very long input',
51
- {
52
- timeout: 60_000,
53
- },
54
- async () => {
55
- const extractor = new KeywordExtractor();
56
- const longText = 'A'.repeat(1000);
57
- const node = new TextNode({ text: longText });
58
- const result = await extractor.extractKeywordsFromNodes(node);
59
- expect(result).toHaveProperty('excerptKeywords');
60
- expect(typeof result.excerptKeywords).toBe('string');
61
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
62
- },
63
- );
64
-
65
- it('handles whitespace only input', async () => {
66
- const extractor = new KeywordExtractor();
67
- const node = new TextNode({ text: ' ' });
68
- const result = await extractor.extractKeywordsFromNodes(node);
69
- expect(result.excerptKeywords).toBe('');
70
- });
71
-
72
- it('handles special characters and emojis', async () => {
73
- const extractor = new KeywordExtractor();
74
- const node = new TextNode({ text: '🚀✨🔥' });
75
- const result = await extractor.extractKeywordsFromNodes(node);
76
- expect(result).toHaveProperty('excerptKeywords');
77
- expect(typeof result.excerptKeywords).toBe('string');
78
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
79
- });
80
-
81
- it('handles numbers only', async () => {
82
- const extractor = new KeywordExtractor();
83
- const node = new TextNode({ text: '1234567890' });
84
- const result = await extractor.extractKeywordsFromNodes(node);
85
- expect(result).toHaveProperty('excerptKeywords');
86
- expect(typeof result.excerptKeywords).toBe('string');
87
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
88
- });
89
-
90
- it('handles HTML tags', async () => {
91
- const extractor = new KeywordExtractor();
92
- const node = new TextNode({ text: '<h1>Test</h1>' });
93
- const result = await extractor.extractKeywordsFromNodes(node);
94
- expect(result).toHaveProperty('excerptKeywords');
95
- expect(typeof result.excerptKeywords).toBe('string');
96
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
97
- });
98
-
99
- it('handles non-English text', async () => {
100
- const extractor = new KeywordExtractor();
101
- const node = new TextNode({ text: '这是一个测试文档。' });
102
- const result = await extractor.extractKeywordsFromNodes(node);
103
- expect(result).toHaveProperty('excerptKeywords');
104
- expect(typeof result.excerptKeywords).toBe('string');
105
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
106
- });
107
-
108
- it('handles duplicate/repeated text', async () => {
109
- const extractor = new KeywordExtractor();
110
- const node = new TextNode({ text: 'repeat repeat repeat' });
111
- const result = await extractor.extractKeywordsFromNodes(node);
112
- expect(result).toHaveProperty('excerptKeywords');
113
- expect(typeof result.excerptKeywords).toBe('string');
114
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
115
- });
116
-
117
- it('handles only punctuation', async () => {
118
- const extractor = new KeywordExtractor();
119
- const node = new TextNode({ text: '!!!???...' });
120
- const result = await extractor.extractKeywordsFromNodes(node);
121
- expect(result).toHaveProperty('excerptKeywords');
122
- expect(typeof result.excerptKeywords).toBe('string');
123
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
124
- });
125
- });
@@ -1,126 +0,0 @@
1
- import { Agent } from '@mastra/core/agent';
2
- import type { MastraLanguageModel } from '@mastra/core/agent';
3
- import { defaultKeywordExtractPrompt, PromptTemplate } from '../prompts';
4
- import type { KeywordExtractPrompt } from '../prompts';
5
- import type { BaseNode } from '../schema';
6
- import { TextNode } from '../schema';
7
- import { BaseExtractor } from './base';
8
- import { baseLLM } from './types';
9
- import type { KeywordExtractArgs } from './types';
10
-
11
- type ExtractKeyword = {
12
- /**
13
- * Comma-separated keywords extracted from the node. May be empty if extraction fails.
14
- */
15
- excerptKeywords: string;
16
- };
17
-
18
- /**
19
- * Extract keywords from a list of nodes.
20
- */
21
- export class KeywordExtractor extends BaseExtractor {
22
- llm: MastraLanguageModel;
23
- keywords: number = 5;
24
- promptTemplate: KeywordExtractPrompt;
25
-
26
- /**
27
- * Constructor for the KeywordExtractor class.
28
- * @param {MastraLanguageModel} llm MastraLanguageModel instance.
29
- * @param {number} keywords Number of keywords to extract.
30
- * @param {string} [promptTemplate] Optional custom prompt template (must include {context})
31
- * @throws {Error} If keywords is less than 1.
32
- */
33
- constructor(options?: KeywordExtractArgs) {
34
- if (options?.keywords && options.keywords < 1) throw new Error('Keywords must be greater than 0');
35
-
36
- super();
37
-
38
- this.llm = options?.llm ?? baseLLM;
39
- this.keywords = options?.keywords ?? 5;
40
- this.promptTemplate = options?.promptTemplate
41
- ? new PromptTemplate({
42
- templateVars: ['context', 'maxKeywords'],
43
- template: options.promptTemplate,
44
- })
45
- : defaultKeywordExtractPrompt;
46
- }
47
-
48
- /**
49
- *
50
- * @param node Node to extract keywords from.
51
- * @returns Keywords extracted from the node.
52
- */
53
- /**
54
- * Extract keywords from a node. Returns an object with a comma-separated string of keywords, or an empty string if extraction fails.
55
- * Adds error handling for malformed/empty LLM output.
56
- */
57
- async extractKeywordsFromNodes(node: BaseNode): Promise<ExtractKeyword> {
58
- const text = node.getContent();
59
- if (!text || text.trim() === '') {
60
- return { excerptKeywords: '' };
61
- }
62
- if (this.isTextNodeOnly && !(node instanceof TextNode)) {
63
- return { excerptKeywords: '' };
64
- }
65
-
66
- let keywords = '';
67
- try {
68
- const miniAgent = new Agent({
69
- model: this.llm,
70
- name: 'keyword-extractor',
71
- instructions:
72
- 'You are a keyword extractor. You are given a node and you need to extract the keywords from the node.',
73
- });
74
-
75
- if (this.llm.specificationVersion === 'v2') {
76
- const result = await miniAgent.generateVNext(
77
- [
78
- {
79
- role: 'user',
80
- content: this.promptTemplate.format({
81
- context: node.getContent(),
82
- maxKeywords: this.keywords.toString(),
83
- }),
84
- },
85
- ],
86
- { format: 'mastra' },
87
- );
88
- keywords = result.text;
89
- } else {
90
- const result = await miniAgent.generate([
91
- {
92
- role: 'user',
93
- content: this.promptTemplate.format({ context: node.getContent(), maxKeywords: this.keywords.toString() }),
94
- },
95
- ]);
96
- keywords = result.text;
97
- }
98
-
99
- if (!keywords) {
100
- console.warn('Keyword extraction LLM output returned empty');
101
- return { excerptKeywords: '' };
102
- }
103
-
104
- return { excerptKeywords: keywords.trim() };
105
- } catch (err) {
106
- console.warn('Keyword extraction failed:', err);
107
- return { excerptKeywords: '' };
108
- }
109
- }
110
-
111
- /**
112
- *
113
- * @param nodes Nodes to extract keywords from.
114
- * @returns Keywords extracted from the nodes.
115
- */
116
- /**
117
- * Extract keywords from an array of nodes. Always returns an array (may be empty).
118
- * @param nodes Nodes to extract keywords from.
119
- * @returns Array of keyword extraction results.
120
- */
121
- async extract(nodes: BaseNode[]): Promise<Array<ExtractKeyword>> {
122
- if (!Array.isArray(nodes) || nodes.length === 0) return [];
123
- const results = await Promise.all(nodes.map(node => this.extractKeywordsFromNodes(node)));
124
- return results;
125
- }
126
- }