@mastra/rag 1.2.3-alpha.0 → 1.2.3-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/package.json +19 -6
  3. package/.turbo/turbo-build.log +0 -4
  4. package/docker-compose.yaml +0 -22
  5. package/eslint.config.js +0 -6
  6. package/src/document/document.test.ts +0 -2975
  7. package/src/document/document.ts +0 -335
  8. package/src/document/extractors/base.ts +0 -30
  9. package/src/document/extractors/index.ts +0 -5
  10. package/src/document/extractors/keywords.test.ts +0 -125
  11. package/src/document/extractors/keywords.ts +0 -126
  12. package/src/document/extractors/questions.test.ts +0 -120
  13. package/src/document/extractors/questions.ts +0 -111
  14. package/src/document/extractors/summary.test.ts +0 -107
  15. package/src/document/extractors/summary.ts +0 -122
  16. package/src/document/extractors/title.test.ts +0 -121
  17. package/src/document/extractors/title.ts +0 -185
  18. package/src/document/extractors/types.ts +0 -40
  19. package/src/document/index.ts +0 -2
  20. package/src/document/prompts/base.ts +0 -77
  21. package/src/document/prompts/format.ts +0 -9
  22. package/src/document/prompts/index.ts +0 -15
  23. package/src/document/prompts/prompt.ts +0 -60
  24. package/src/document/prompts/types.ts +0 -29
  25. package/src/document/schema/index.ts +0 -3
  26. package/src/document/schema/node.ts +0 -187
  27. package/src/document/schema/types.ts +0 -40
  28. package/src/document/transformers/character.ts +0 -267
  29. package/src/document/transformers/html.ts +0 -346
  30. package/src/document/transformers/json.ts +0 -536
  31. package/src/document/transformers/latex.ts +0 -11
  32. package/src/document/transformers/markdown.ts +0 -239
  33. package/src/document/transformers/semantic-markdown.ts +0 -227
  34. package/src/document/transformers/sentence.ts +0 -314
  35. package/src/document/transformers/text.ts +0 -158
  36. package/src/document/transformers/token.ts +0 -137
  37. package/src/document/transformers/transformer.ts +0 -5
  38. package/src/document/types.ts +0 -145
  39. package/src/document/validation.ts +0 -158
  40. package/src/graph-rag/index.test.ts +0 -235
  41. package/src/graph-rag/index.ts +0 -306
  42. package/src/index.ts +0 -8
  43. package/src/rerank/index.test.ts +0 -150
  44. package/src/rerank/index.ts +0 -198
  45. package/src/rerank/relevance/cohere/index.ts +0 -56
  46. package/src/rerank/relevance/index.ts +0 -3
  47. package/src/rerank/relevance/mastra-agent/index.ts +0 -32
  48. package/src/rerank/relevance/zeroentropy/index.ts +0 -26
  49. package/src/tools/README.md +0 -153
  50. package/src/tools/document-chunker.ts +0 -34
  51. package/src/tools/graph-rag.test.ts +0 -115
  52. package/src/tools/graph-rag.ts +0 -157
  53. package/src/tools/index.ts +0 -3
  54. package/src/tools/types.ts +0 -126
  55. package/src/tools/vector-query-database-config.test.ts +0 -190
  56. package/src/tools/vector-query.test.ts +0 -477
  57. package/src/tools/vector-query.ts +0 -171
  58. package/src/utils/convert-sources.ts +0 -43
  59. package/src/utils/default-settings.ts +0 -38
  60. package/src/utils/index.ts +0 -3
  61. package/src/utils/tool-schemas.ts +0 -38
  62. package/src/utils/vector-prompts.ts +0 -832
  63. package/src/utils/vector-search.ts +0 -130
  64. package/tsconfig.build.json +0 -9
  65. package/tsconfig.json +0 -5
  66. package/tsup.config.ts +0 -17
  67. package/vitest.config.ts +0 -8
@@ -1,335 +0,0 @@
1
- import { TitleExtractor, SummaryExtractor, QuestionsAnsweredExtractor, KeywordExtractor } from './extractors';
2
- import type { BaseNode } from './schema';
3
- import { Document as Chunk, NodeRelationship, ObjectType } from './schema';
4
-
5
- import { CharacterTransformer, RecursiveCharacterTransformer } from './transformers/character';
6
- import { HTMLHeaderTransformer, HTMLSectionTransformer } from './transformers/html';
7
- import { RecursiveJsonTransformer } from './transformers/json';
8
- import { LatexTransformer } from './transformers/latex';
9
- import { MarkdownHeaderTransformer, MarkdownTransformer } from './transformers/markdown';
10
- import { SemanticMarkdownTransformer } from './transformers/semantic-markdown';
11
- import { SentenceTransformer } from './transformers/sentence';
12
- import { TokenTransformer } from './transformers/token';
13
- import type {
14
- ChunkParams,
15
- ChunkStrategy,
16
- ExtractParams,
17
- HTMLChunkOptions,
18
- RecursiveChunkOptions,
19
- CharacterChunkOptions,
20
- TokenChunkOptions,
21
- MarkdownChunkOptions,
22
- SemanticMarkdownChunkOptions,
23
- JsonChunkOptions,
24
- LatexChunkOptions,
25
- SentenceChunkOptions,
26
- StrategyOptions,
27
- } from './types';
28
- import { validateChunkParams } from './validation';
29
-
30
- export class MDocument {
31
- private chunks: Chunk[];
32
- private type: string; // e.g., 'text', 'html', 'markdown', 'json'
33
-
34
- constructor({ docs, type }: { docs: { text: string; metadata?: Record<string, any> }[]; type: string }) {
35
- this.chunks = docs.map(d => {
36
- return new Chunk({ text: d.text, metadata: d.metadata });
37
- });
38
- this.type = type;
39
- }
40
-
41
- async extractMetadata({ title, summary, questions, keywords }: ExtractParams): Promise<MDocument> {
42
- const transformations = [];
43
-
44
- if (typeof summary !== 'undefined') {
45
- transformations.push(new SummaryExtractor(typeof summary === 'boolean' ? {} : summary));
46
- }
47
-
48
- if (typeof questions !== 'undefined') {
49
- transformations.push(new QuestionsAnsweredExtractor(typeof questions === 'boolean' ? {} : questions));
50
- }
51
-
52
- if (typeof keywords !== 'undefined') {
53
- transformations.push(new KeywordExtractor(typeof keywords === 'boolean' ? {} : keywords));
54
- }
55
-
56
- if (typeof title !== 'undefined') {
57
- transformations.push(new TitleExtractor(typeof title === 'boolean' ? {} : title));
58
- this.chunks = this.chunks.map(doc =>
59
- doc?.metadata?.docId
60
- ? new Chunk({
61
- ...doc,
62
- relationships: {
63
- [NodeRelationship.SOURCE]: {
64
- nodeId: doc.metadata.docId,
65
- nodeType: ObjectType.DOCUMENT,
66
- metadata: doc.metadata,
67
- },
68
- },
69
- })
70
- : doc,
71
- );
72
- }
73
-
74
- let nodes: BaseNode[] = this.chunks;
75
- for (const extractor of transformations) {
76
- nodes = await extractor.processNodes(nodes);
77
- }
78
-
79
- this.chunks = this.chunks.map((doc, i) => {
80
- return new Chunk({
81
- text: doc.text,
82
- metadata: {
83
- ...doc.metadata,
84
- ...(nodes?.[i]?.metadata || {}),
85
- },
86
- });
87
- });
88
-
89
- return this;
90
- }
91
-
92
- static fromText(text: string, metadata?: Record<string, any>): MDocument {
93
- return new MDocument({
94
- docs: [
95
- {
96
- text,
97
- metadata,
98
- },
99
- ],
100
- type: 'text',
101
- });
102
- }
103
-
104
- static fromHTML(html: string, metadata?: Record<string, any>): MDocument {
105
- return new MDocument({
106
- docs: [
107
- {
108
- text: html,
109
- metadata,
110
- },
111
- ],
112
- type: 'html',
113
- });
114
- }
115
-
116
- static fromMarkdown(markdown: string, metadata?: Record<string, any>): MDocument {
117
- return new MDocument({
118
- docs: [
119
- {
120
- text: markdown,
121
- metadata,
122
- },
123
- ],
124
- type: 'markdown',
125
- });
126
- }
127
-
128
- static fromJSON(jsonString: string, metadata?: Record<string, any>): MDocument {
129
- return new MDocument({
130
- docs: [
131
- {
132
- text: jsonString,
133
- metadata,
134
- },
135
- ],
136
- type: 'json',
137
- });
138
- }
139
-
140
- private defaultStrategy(): ChunkStrategy {
141
- switch (this.type) {
142
- case 'html':
143
- return 'html';
144
- case 'markdown':
145
- return 'markdown';
146
- case 'json':
147
- return 'json';
148
- case 'latex':
149
- return 'latex';
150
- default:
151
- return 'recursive';
152
- }
153
- }
154
-
155
- private _strategyMap?: { [S in ChunkStrategy]: (options?: StrategyOptions[S]) => Promise<void> };
156
-
157
- private get strategyMap() {
158
- if (!this._strategyMap) {
159
- this._strategyMap = {
160
- recursive: options => this.chunkRecursive(options),
161
- character: options => this.chunkCharacter(options),
162
- token: options => this.chunkToken(options),
163
- markdown: options => this.chunkMarkdown(options),
164
- html: options => this.chunkHTML(options),
165
- json: options => this.chunkJSON(options),
166
- latex: options => this.chunkLatex(options),
167
- sentence: options => this.chunkSentence(options),
168
- 'semantic-markdown': options => this.chunkSemanticMarkdown(options),
169
- };
170
- }
171
- return this._strategyMap;
172
- }
173
-
174
- private async chunkBy<K extends ChunkStrategy>(strategy: K, options?: StrategyOptions[K]): Promise<void> {
175
- const chunkingFunc = this.strategyMap[strategy];
176
- if (chunkingFunc) {
177
- await chunkingFunc(options);
178
- } else {
179
- throw new Error(`Unknown strategy: ${strategy}`);
180
- }
181
- }
182
-
183
- async chunkRecursive(options?: RecursiveChunkOptions): Promise<void> {
184
- if (options?.language) {
185
- const rt = RecursiveCharacterTransformer.fromLanguage(options.language, options);
186
- const textSplit = rt.transformDocuments(this.chunks);
187
- this.chunks = textSplit;
188
- return;
189
- }
190
-
191
- const rt = new RecursiveCharacterTransformer(options);
192
- const textSplit = rt.transformDocuments(this.chunks);
193
- this.chunks = textSplit;
194
- }
195
-
196
- async chunkCharacter(options?: CharacterChunkOptions): Promise<void> {
197
- const rt = new CharacterTransformer({
198
- ...options,
199
- separator: options?.separator,
200
- isSeparatorRegex: options?.isSeparatorRegex,
201
- });
202
- const textSplit = rt.transformDocuments(this.chunks);
203
- this.chunks = textSplit;
204
- }
205
-
206
- async chunkHTML(options?: HTMLChunkOptions): Promise<void> {
207
- if (options?.headers?.length) {
208
- const rt = new HTMLHeaderTransformer(options as HTMLChunkOptions & { headers: [string, string][] });
209
-
210
- const textSplit = rt.transformDocuments(this.chunks);
211
- this.chunks = textSplit;
212
- return;
213
- }
214
-
215
- if (options?.sections?.length) {
216
- const rt = new HTMLSectionTransformer(options as HTMLChunkOptions & { sections: [string, string][] });
217
-
218
- const textSplit = rt.transformDocuments(this.chunks);
219
- this.chunks = textSplit;
220
- return;
221
- }
222
-
223
- throw new Error('HTML chunking requires either headers or sections to be specified');
224
- }
225
-
226
- async chunkJSON(options?: JsonChunkOptions): Promise<void> {
227
- if (!options?.maxSize) {
228
- throw new Error('JSON chunking requires maxSize to be specified');
229
- }
230
-
231
- const rt = new RecursiveJsonTransformer({
232
- maxSize: options?.maxSize,
233
- minSize: options?.minSize,
234
- });
235
-
236
- const textSplit = rt.transformDocuments({
237
- documents: this.chunks,
238
- ensureAscii: options?.ensureAscii,
239
- convertLists: options?.convertLists,
240
- });
241
-
242
- this.chunks = textSplit;
243
- }
244
-
245
- async chunkLatex(options?: LatexChunkOptions): Promise<void> {
246
- const rt = new LatexTransformer(options);
247
- const textSplit = rt.transformDocuments(this.chunks);
248
- this.chunks = textSplit;
249
- }
250
-
251
- async chunkToken(options?: TokenChunkOptions): Promise<void> {
252
- const rt = TokenTransformer.fromTikToken({
253
- options,
254
- encodingName: options?.encodingName,
255
- modelName: options?.modelName,
256
- });
257
- const textSplit = rt.transformDocuments(this.chunks);
258
- this.chunks = textSplit;
259
- }
260
-
261
- async chunkMarkdown(options?: MarkdownChunkOptions): Promise<void> {
262
- if (options?.headers) {
263
- const rt = new MarkdownHeaderTransformer(options.headers, options?.returnEachLine, options?.stripHeaders);
264
- const textSplit = rt.transformDocuments(this.chunks);
265
- this.chunks = textSplit;
266
- return;
267
- }
268
-
269
- const rt = new MarkdownTransformer(options);
270
- const textSplit = rt.transformDocuments(this.chunks);
271
- this.chunks = textSplit;
272
- }
273
-
274
- async chunkSentence(options?: SentenceChunkOptions): Promise<void> {
275
- if (!options?.maxSize) {
276
- throw new Error('Sentence chunking requires maxSize to be specified');
277
- }
278
-
279
- const rt = new SentenceTransformer({
280
- minSize: options?.minSize,
281
- maxSize: options?.maxSize,
282
- targetSize: options?.targetSize,
283
- overlap: options?.overlap,
284
- sentenceEnders: options?.sentenceEnders,
285
- fallbackToWords: options?.fallbackToWords,
286
- fallbackToCharacters: options?.fallbackToCharacters,
287
- keepSeparator: options?.keepSeparator,
288
- lengthFunction: options?.lengthFunction,
289
- addStartIndex: options?.addStartIndex,
290
- stripWhitespace: options?.stripWhitespace,
291
- });
292
-
293
- const textSplit = rt.transformDocuments(this.chunks);
294
- this.chunks = textSplit;
295
- }
296
-
297
- async chunkSemanticMarkdown(options?: SemanticMarkdownChunkOptions): Promise<void> {
298
- const rt = SemanticMarkdownTransformer.fromTikToken({
299
- options,
300
- encodingName: options?.encodingName,
301
- modelName: options?.modelName,
302
- });
303
- const textSplit = rt.transformDocuments(this.chunks);
304
- this.chunks = textSplit;
305
- }
306
-
307
- async chunk(params?: ChunkParams): Promise<Chunk[]> {
308
- const { strategy: passedStrategy, extract, ...chunkOptions } = params || {};
309
- // Determine the default strategy based on type if not specified
310
- const strategy = passedStrategy || this.defaultStrategy();
311
-
312
- validateChunkParams(strategy, chunkOptions);
313
-
314
- // Apply the appropriate chunking strategy
315
- await this.chunkBy(strategy, chunkOptions);
316
-
317
- if (extract) {
318
- await this.extractMetadata(extract);
319
- }
320
-
321
- return this.chunks;
322
- }
323
-
324
- getDocs(): Chunk[] {
325
- return this.chunks;
326
- }
327
-
328
- getText(): string[] {
329
- return this.chunks.map(doc => doc.text);
330
- }
331
-
332
- getMetadata(): Record<string, any>[] {
333
- return this.chunks.map(doc => doc.metadata);
334
- }
335
- }
@@ -1,30 +0,0 @@
1
- import type { BaseNode } from '../schema';
2
-
3
- /*
4
- * Abstract class for all extractors.
5
- */
6
- export abstract class BaseExtractor {
7
- isTextNodeOnly: boolean = true;
8
-
9
- abstract extract(nodes: BaseNode[]): Promise<Record<string, any>[]>;
10
-
11
- /**
12
- *
13
- * @param nodes Nodes to extract metadata from.
14
- * @returns Metadata extracted from the nodes.
15
- */
16
- async processNodes(nodes: BaseNode[]): Promise<BaseNode[]> {
17
- let newNodes: BaseNode[] = nodes;
18
-
19
- const curMetadataList = await this.extract(newNodes);
20
-
21
- for (const idx in newNodes) {
22
- newNodes[idx]!.metadata = {
23
- ...newNodes[idx]!.metadata,
24
- ...curMetadataList[idx],
25
- };
26
- }
27
-
28
- return newNodes;
29
- }
30
- }
@@ -1,5 +0,0 @@
1
- export { TitleExtractor } from './title';
2
- export { SummaryExtractor } from './summary';
3
- export { QuestionsAnsweredExtractor } from './questions';
4
- export { KeywordExtractor } from './keywords';
5
- export type { KeywordExtractArgs, QuestionAnswerExtractArgs, SummaryExtractArgs, TitleExtractorsArgs } from './types';
@@ -1,125 +0,0 @@
1
- import { createOpenAI } from '@ai-sdk/openai';
2
- import { describe, it, expect, vi } from 'vitest';
3
- import { TextNode } from '../schema';
4
- import { KeywordExtractor } from './keywords';
5
-
6
- const openai = createOpenAI({
7
- apiKey: process.env.OPENAI_API_KEY,
8
- });
9
-
10
- const model = openai('gpt-4o');
11
-
12
- vi.setConfig({ testTimeout: 50_000, hookTimeout: 50_000 });
13
-
14
- describe('KeywordExtractor', () => {
15
- it('can use a custom model for keywords extraction', async () => {
16
- const extractor = new KeywordExtractor({ llm: model });
17
- const node = new TextNode({ text: 'The quick brown fox jumps over the lazy dog.' });
18
- const result = await extractor.extractKeywordsFromNodes(node);
19
- expect(result).toHaveProperty('excerptKeywords');
20
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
21
- });
22
-
23
- it('handles empty input gracefully', async () => {
24
- const extractor = new KeywordExtractor();
25
- const node = new TextNode({ text: '' });
26
- const result = await extractor.extractKeywordsFromNodes(node);
27
- expect(result.excerptKeywords).toBe('');
28
- });
29
-
30
- it('supports prompt customization', async () => {
31
- const extractor = new KeywordExtractor({
32
- promptTemplate: 'List keywords in: {context}. Limit to {maxKeywords}.',
33
- });
34
- const node = new TextNode({ text: 'Test document for prompt customization.' });
35
- const result = await extractor.extractKeywordsFromNodes(node);
36
- expect(result).toHaveProperty('excerptKeywords');
37
- expect(typeof result.excerptKeywords).toBe('string');
38
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
39
- });
40
-
41
- it('extracts keywords from text', async () => {
42
- const extractor = new KeywordExtractor();
43
- const node = new TextNode({ text: 'The quick brown fox jumps over the lazy dog.' });
44
- const result = await extractor.extractKeywordsFromNodes(node);
45
- expect(result).toHaveProperty('excerptKeywords');
46
- expect(typeof result.excerptKeywords).toBe('string');
47
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
48
- });
49
- it(
50
- 'handles very long input',
51
- {
52
- timeout: 60_000,
53
- },
54
- async () => {
55
- const extractor = new KeywordExtractor();
56
- const longText = 'A'.repeat(1000);
57
- const node = new TextNode({ text: longText });
58
- const result = await extractor.extractKeywordsFromNodes(node);
59
- expect(result).toHaveProperty('excerptKeywords');
60
- expect(typeof result.excerptKeywords).toBe('string');
61
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
62
- },
63
- );
64
-
65
- it('handles whitespace only input', async () => {
66
- const extractor = new KeywordExtractor();
67
- const node = new TextNode({ text: ' ' });
68
- const result = await extractor.extractKeywordsFromNodes(node);
69
- expect(result.excerptKeywords).toBe('');
70
- });
71
-
72
- it('handles special characters and emojis', async () => {
73
- const extractor = new KeywordExtractor();
74
- const node = new TextNode({ text: '🚀✨🔥' });
75
- const result = await extractor.extractKeywordsFromNodes(node);
76
- expect(result).toHaveProperty('excerptKeywords');
77
- expect(typeof result.excerptKeywords).toBe('string');
78
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
79
- });
80
-
81
- it('handles numbers only', async () => {
82
- const extractor = new KeywordExtractor();
83
- const node = new TextNode({ text: '1234567890' });
84
- const result = await extractor.extractKeywordsFromNodes(node);
85
- expect(result).toHaveProperty('excerptKeywords');
86
- expect(typeof result.excerptKeywords).toBe('string');
87
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
88
- });
89
-
90
- it('handles HTML tags', async () => {
91
- const extractor = new KeywordExtractor();
92
- const node = new TextNode({ text: '<h1>Test</h1>' });
93
- const result = await extractor.extractKeywordsFromNodes(node);
94
- expect(result).toHaveProperty('excerptKeywords');
95
- expect(typeof result.excerptKeywords).toBe('string');
96
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
97
- });
98
-
99
- it('handles non-English text', async () => {
100
- const extractor = new KeywordExtractor();
101
- const node = new TextNode({ text: '这是一个测试文档。' });
102
- const result = await extractor.extractKeywordsFromNodes(node);
103
- expect(result).toHaveProperty('excerptKeywords');
104
- expect(typeof result.excerptKeywords).toBe('string');
105
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
106
- });
107
-
108
- it('handles duplicate/repeated text', async () => {
109
- const extractor = new KeywordExtractor();
110
- const node = new TextNode({ text: 'repeat repeat repeat' });
111
- const result = await extractor.extractKeywordsFromNodes(node);
112
- expect(result).toHaveProperty('excerptKeywords');
113
- expect(typeof result.excerptKeywords).toBe('string');
114
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
115
- });
116
-
117
- it('handles only punctuation', async () => {
118
- const extractor = new KeywordExtractor();
119
- const node = new TextNode({ text: '!!!???...' });
120
- const result = await extractor.extractKeywordsFromNodes(node);
121
- expect(result).toHaveProperty('excerptKeywords');
122
- expect(typeof result.excerptKeywords).toBe('string');
123
- expect(result.excerptKeywords.length).toBeGreaterThan(0);
124
- });
125
- });
@@ -1,126 +0,0 @@
1
- import { Agent } from '@mastra/core/agent';
2
- import type { MastraLanguageModel } from '@mastra/core/agent';
3
- import { defaultKeywordExtractPrompt, PromptTemplate } from '../prompts';
4
- import type { KeywordExtractPrompt } from '../prompts';
5
- import type { BaseNode } from '../schema';
6
- import { TextNode } from '../schema';
7
- import { BaseExtractor } from './base';
8
- import { baseLLM } from './types';
9
- import type { KeywordExtractArgs } from './types';
10
-
11
- type ExtractKeyword = {
12
- /**
13
- * Comma-separated keywords extracted from the node. May be empty if extraction fails.
14
- */
15
- excerptKeywords: string;
16
- };
17
-
18
- /**
19
- * Extract keywords from a list of nodes.
20
- */
21
- export class KeywordExtractor extends BaseExtractor {
22
- llm: MastraLanguageModel;
23
- keywords: number = 5;
24
- promptTemplate: KeywordExtractPrompt;
25
-
26
- /**
27
- * Constructor for the KeywordExtractor class.
28
- * @param {MastraLanguageModel} llm MastraLanguageModel instance.
29
- * @param {number} keywords Number of keywords to extract.
30
- * @param {string} [promptTemplate] Optional custom prompt template (must include {context})
31
- * @throws {Error} If keywords is less than 1.
32
- */
33
- constructor(options?: KeywordExtractArgs) {
34
- if (options?.keywords && options.keywords < 1) throw new Error('Keywords must be greater than 0');
35
-
36
- super();
37
-
38
- this.llm = options?.llm ?? baseLLM;
39
- this.keywords = options?.keywords ?? 5;
40
- this.promptTemplate = options?.promptTemplate
41
- ? new PromptTemplate({
42
- templateVars: ['context', 'maxKeywords'],
43
- template: options.promptTemplate,
44
- })
45
- : defaultKeywordExtractPrompt;
46
- }
47
-
48
- /**
49
- *
50
- * @param node Node to extract keywords from.
51
- * @returns Keywords extracted from the node.
52
- */
53
- /**
54
- * Extract keywords from a node. Returns an object with a comma-separated string of keywords, or an empty string if extraction fails.
55
- * Adds error handling for malformed/empty LLM output.
56
- */
57
- async extractKeywordsFromNodes(node: BaseNode): Promise<ExtractKeyword> {
58
- const text = node.getContent();
59
- if (!text || text.trim() === '') {
60
- return { excerptKeywords: '' };
61
- }
62
- if (this.isTextNodeOnly && !(node instanceof TextNode)) {
63
- return { excerptKeywords: '' };
64
- }
65
-
66
- let keywords = '';
67
- try {
68
- const miniAgent = new Agent({
69
- model: this.llm,
70
- name: 'keyword-extractor',
71
- instructions:
72
- 'You are a keyword extractor. You are given a node and you need to extract the keywords from the node.',
73
- });
74
-
75
- if (this.llm.specificationVersion === 'v2') {
76
- const result = await miniAgent.generateVNext(
77
- [
78
- {
79
- role: 'user',
80
- content: this.promptTemplate.format({
81
- context: node.getContent(),
82
- maxKeywords: this.keywords.toString(),
83
- }),
84
- },
85
- ],
86
- { format: 'mastra' },
87
- );
88
- keywords = result.text;
89
- } else {
90
- const result = await miniAgent.generate([
91
- {
92
- role: 'user',
93
- content: this.promptTemplate.format({ context: node.getContent(), maxKeywords: this.keywords.toString() }),
94
- },
95
- ]);
96
- keywords = result.text;
97
- }
98
-
99
- if (!keywords) {
100
- console.warn('Keyword extraction LLM output returned empty');
101
- return { excerptKeywords: '' };
102
- }
103
-
104
- return { excerptKeywords: keywords.trim() };
105
- } catch (err) {
106
- console.warn('Keyword extraction failed:', err);
107
- return { excerptKeywords: '' };
108
- }
109
- }
110
-
111
- /**
112
- *
113
- * @param nodes Nodes to extract keywords from.
114
- * @returns Keywords extracted from the nodes.
115
- */
116
- /**
117
- * Extract keywords from an array of nodes. Always returns an array (may be empty).
118
- * @param nodes Nodes to extract keywords from.
119
- * @returns Array of keyword extraction results.
120
- */
121
- async extract(nodes: BaseNode[]): Promise<Array<ExtractKeyword>> {
122
- if (!Array.isArray(nodes) || nodes.length === 0) return [];
123
- const results = await Promise.all(nodes.map(node => this.extractKeywordsFromNodes(node)));
124
- return results;
125
- }
126
- }