@mastra/rag 1.2.2 β†’ 1.2.3-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/dist/index.cjs +25 -9
  3. package/dist/index.cjs.map +1 -1
  4. package/dist/index.js +25 -9
  5. package/dist/index.js.map +1 -1
  6. package/dist/tools/graph-rag.d.ts.map +1 -1
  7. package/dist/tools/types.d.ts +18 -5
  8. package/dist/tools/types.d.ts.map +1 -1
  9. package/dist/tools/vector-query.d.ts.map +1 -1
  10. package/dist/utils/vector-search.d.ts +6 -7
  11. package/dist/utils/vector-search.d.ts.map +1 -1
  12. package/package.json +19 -6
  13. package/.turbo/turbo-build.log +0 -4
  14. package/docker-compose.yaml +0 -22
  15. package/eslint.config.js +0 -6
  16. package/src/document/document.test.ts +0 -2975
  17. package/src/document/document.ts +0 -335
  18. package/src/document/extractors/base.ts +0 -30
  19. package/src/document/extractors/index.ts +0 -5
  20. package/src/document/extractors/keywords.test.ts +0 -125
  21. package/src/document/extractors/keywords.ts +0 -126
  22. package/src/document/extractors/questions.test.ts +0 -120
  23. package/src/document/extractors/questions.ts +0 -111
  24. package/src/document/extractors/summary.test.ts +0 -107
  25. package/src/document/extractors/summary.ts +0 -122
  26. package/src/document/extractors/title.test.ts +0 -121
  27. package/src/document/extractors/title.ts +0 -185
  28. package/src/document/extractors/types.ts +0 -40
  29. package/src/document/index.ts +0 -2
  30. package/src/document/prompts/base.ts +0 -77
  31. package/src/document/prompts/format.ts +0 -9
  32. package/src/document/prompts/index.ts +0 -15
  33. package/src/document/prompts/prompt.ts +0 -60
  34. package/src/document/prompts/types.ts +0 -29
  35. package/src/document/schema/index.ts +0 -3
  36. package/src/document/schema/node.ts +0 -187
  37. package/src/document/schema/types.ts +0 -40
  38. package/src/document/transformers/character.ts +0 -267
  39. package/src/document/transformers/html.ts +0 -346
  40. package/src/document/transformers/json.ts +0 -536
  41. package/src/document/transformers/latex.ts +0 -11
  42. package/src/document/transformers/markdown.ts +0 -239
  43. package/src/document/transformers/semantic-markdown.ts +0 -227
  44. package/src/document/transformers/sentence.ts +0 -314
  45. package/src/document/transformers/text.ts +0 -158
  46. package/src/document/transformers/token.ts +0 -137
  47. package/src/document/transformers/transformer.ts +0 -5
  48. package/src/document/types.ts +0 -145
  49. package/src/document/validation.ts +0 -158
  50. package/src/graph-rag/index.test.ts +0 -235
  51. package/src/graph-rag/index.ts +0 -306
  52. package/src/index.ts +0 -8
  53. package/src/rerank/index.test.ts +0 -150
  54. package/src/rerank/index.ts +0 -198
  55. package/src/rerank/relevance/cohere/index.ts +0 -56
  56. package/src/rerank/relevance/index.ts +0 -3
  57. package/src/rerank/relevance/mastra-agent/index.ts +0 -32
  58. package/src/rerank/relevance/zeroentropy/index.ts +0 -26
  59. package/src/tools/README.md +0 -153
  60. package/src/tools/document-chunker.ts +0 -34
  61. package/src/tools/graph-rag.test.ts +0 -115
  62. package/src/tools/graph-rag.ts +0 -154
  63. package/src/tools/index.ts +0 -3
  64. package/src/tools/types.ts +0 -110
  65. package/src/tools/vector-query-database-config.test.ts +0 -190
  66. package/src/tools/vector-query.test.ts +0 -418
  67. package/src/tools/vector-query.ts +0 -169
  68. package/src/utils/convert-sources.ts +0 -43
  69. package/src/utils/default-settings.ts +0 -38
  70. package/src/utils/index.ts +0 -3
  71. package/src/utils/tool-schemas.ts +0 -38
  72. package/src/utils/vector-prompts.ts +0 -832
  73. package/src/utils/vector-search.ts +0 -117
  74. package/tsconfig.build.json +0 -9
  75. package/tsconfig.json +0 -5
  76. package/tsup.config.ts +0 -17
  77. package/vitest.config.ts +0 -8
@@ -1,120 +0,0 @@
1
- import { createOpenAI } from '@ai-sdk/openai';
2
- import { describe, it, expect, vi } from 'vitest';
3
- import { TextNode } from '../schema';
4
- import { QuestionsAnsweredExtractor } from './questions';
5
-
6
- const openai = createOpenAI({
7
- apiKey: process.env.OPENAI_API_KEY,
8
- });
9
-
10
- const model = openai('gpt-4o');
11
-
12
- vi.setConfig({ testTimeout: 100_000, hookTimeout: 100_000 });
13
-
14
- describe('QuestionsAnsweredExtractor', () => {
15
- it('can use a custom model for questions extraction', async () => {
16
- const extractor = new QuestionsAnsweredExtractor({ llm: model });
17
- const node = new TextNode({ text: 'What is the capital of Spain?' });
18
- const result = await extractor.extractQuestionsFromNode(node);
19
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
20
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
21
- });
22
-
23
- it('extracts questions', async () => {
24
- const extractor = new QuestionsAnsweredExtractor();
25
- const node = new TextNode({ text: 'What is the capital of France? What is the color of the sky?' });
26
- const result = await extractor.extractQuestionsFromNode(node);
27
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
28
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
29
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
30
- });
31
-
32
- it('handles empty input gracefully', async () => {
33
- const extractor = new QuestionsAnsweredExtractor();
34
- const node = new TextNode({ text: '' });
35
- const result = await extractor.extractQuestionsFromNode(node);
36
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
37
- expect(result.questionsThisExcerptCanAnswer).toBe('');
38
- });
39
-
40
- it('supports prompt customization', async () => {
41
- const extractor = new QuestionsAnsweredExtractor({
42
- promptTemplate: 'List questions in: {context}. Limit to {numQuestions}.',
43
- });
44
- const node = new TextNode({ text: 'Test document for prompt customization.' });
45
- const result = await extractor.extractQuestionsFromNode(node);
46
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
47
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
48
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
49
- });
50
- it('handles very long input', async () => {
51
- const extractor = new QuestionsAnsweredExtractor();
52
- const longText = 'A'.repeat(1000);
53
- const node = new TextNode({ text: longText });
54
- const result = await extractor.extractQuestionsFromNode(node);
55
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
56
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
57
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
58
- });
59
-
60
- it('handles whitespace only input', async () => {
61
- const extractor = new QuestionsAnsweredExtractor();
62
- const node = new TextNode({ text: ' ' });
63
- const result = await extractor.extractQuestionsFromNode(node);
64
- expect(result.questionsThisExcerptCanAnswer).toBe('');
65
- });
66
-
67
- it('handles special characters and emojis', async () => {
68
- const extractor = new QuestionsAnsweredExtractor();
69
- const node = new TextNode({ text: 'πŸš€βœ¨πŸ”₯' });
70
- const result = await extractor.extractQuestionsFromNode(node);
71
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
72
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
73
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
74
- });
75
-
76
- it('handles numbers only', async () => {
77
- const extractor = new QuestionsAnsweredExtractor();
78
- const node = new TextNode({ text: '1234567890' });
79
- const result = await extractor.extractQuestionsFromNode(node);
80
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
81
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
82
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
83
- });
84
-
85
- it('handles HTML tags', async () => {
86
- const extractor = new QuestionsAnsweredExtractor();
87
- const node = new TextNode({ text: '<h1>Test</h1>' });
88
- const result = await extractor.extractQuestionsFromNode(node);
89
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
90
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
91
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
92
- });
93
-
94
- it('handles non-English text', async () => {
95
- const extractor = new QuestionsAnsweredExtractor();
96
- const node = new TextNode({ text: 'θΏ™ζ˜―δΈ€δΈͺ桋试文摣。' });
97
- const result = await extractor.extractQuestionsFromNode(node);
98
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
99
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
100
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
101
- });
102
-
103
- it('handles duplicate/repeated text', async () => {
104
- const extractor = new QuestionsAnsweredExtractor();
105
- const node = new TextNode({ text: 'repeat repeat repeat' });
106
- const result = await extractor.extractQuestionsFromNode(node);
107
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
108
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
109
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
110
- });
111
-
112
- it('handles only punctuation', async () => {
113
- const extractor = new QuestionsAnsweredExtractor();
114
- const node = new TextNode({ text: '!!!???...' });
115
- const result = await extractor.extractQuestionsFromNode(node);
116
- expect(result).toHaveProperty('questionsThisExcerptCanAnswer');
117
- expect(typeof result.questionsThisExcerptCanAnswer).toBe('string');
118
- expect(result.questionsThisExcerptCanAnswer.length).toBeGreaterThan(0);
119
- });
120
- });
@@ -1,111 +0,0 @@
1
- import { Agent } from '@mastra/core/agent';
2
- import type { MastraLanguageModel } from '@mastra/core/agent';
3
- import { PromptTemplate, defaultQuestionExtractPrompt } from '../prompts';
4
- import type { QuestionExtractPrompt } from '../prompts';
5
- import type { BaseNode } from '../schema';
6
- import { TextNode } from '../schema';
7
- import { BaseExtractor } from './base';
8
- import { baseLLM, STRIP_REGEX } from './types';
9
- import type { QuestionAnswerExtractArgs } from './types';
10
-
11
- type ExtractQuestion = {
12
- /**
13
- * Questions extracted from the node as a string (may be empty if extraction fails).
14
- */
15
- questionsThisExcerptCanAnswer: string;
16
- };
17
-
18
- /**
19
- * Extract questions from a list of nodes.
20
- */
21
- export class QuestionsAnsweredExtractor extends BaseExtractor {
22
- llm: MastraLanguageModel;
23
- questions: number = 5;
24
- promptTemplate: QuestionExtractPrompt;
25
- embeddingOnly: boolean = false;
26
-
27
- /**
28
- * Constructor for the QuestionsAnsweredExtractor class.
29
- * @param {MastraLanguageModel} llm MastraLanguageModel instance.
30
- * @param {number} questions Number of questions to generate.
31
- * @param {QuestionExtractPrompt['template']} promptTemplate Optional custom prompt template (should include {context}).
32
- * @param {boolean} embeddingOnly Whether to use metadata for embeddings only.
33
- */
34
- constructor(options?: QuestionAnswerExtractArgs) {
35
- if (options?.questions && options.questions < 1) throw new Error('Questions must be greater than 0');
36
-
37
- super();
38
-
39
- this.llm = options?.llm ?? baseLLM;
40
- this.questions = options?.questions ?? 5;
41
- this.promptTemplate = options?.promptTemplate
42
- ? new PromptTemplate({
43
- templateVars: ['numQuestions', 'context'],
44
- template: options.promptTemplate,
45
- }).partialFormat({
46
- numQuestions: '5',
47
- })
48
- : defaultQuestionExtractPrompt;
49
- this.embeddingOnly = options?.embeddingOnly ?? false;
50
- }
51
-
52
- /**
53
- * Extract answered questions from a node.
54
- * @param {BaseNode} node Node to extract questions from.
55
- * @returns {Promise<Array<ExtractQuestion> | Array<{}>>} Questions extracted from the node.
56
- */
57
- async extractQuestionsFromNode(node: BaseNode): Promise<ExtractQuestion> {
58
- const text = node.getContent();
59
- if (!text || text.trim() === '') {
60
- return { questionsThisExcerptCanAnswer: '' };
61
- }
62
- if (this.isTextNodeOnly && !(node instanceof TextNode)) {
63
- return { questionsThisExcerptCanAnswer: '' };
64
- }
65
-
66
- const contextStr = node.getContent();
67
-
68
- const prompt = this.promptTemplate.format({
69
- context: contextStr,
70
- numQuestions: this.questions.toString(),
71
- });
72
-
73
- const miniAgent = new Agent({
74
- model: this.llm,
75
- name: 'question-extractor',
76
- instructions:
77
- 'You are a question extractor. You are given a node and you need to extract the questions from the node.',
78
- });
79
-
80
- let questionsText = '';
81
- if (this.llm.specificationVersion === 'v2') {
82
- const result = await miniAgent.generateVNext([{ role: 'user', content: prompt }], { format: 'mastra' });
83
- questionsText = result.text;
84
- } else {
85
- const result = await miniAgent.generate([{ role: 'user', content: prompt }]);
86
- questionsText = result.text;
87
- }
88
-
89
- if (!questionsText) {
90
- console.warn('Question extraction LLM output returned empty');
91
- return { questionsThisExcerptCanAnswer: '' };
92
- }
93
-
94
- const result = questionsText.replace(STRIP_REGEX, '').trim();
95
-
96
- return {
97
- questionsThisExcerptCanAnswer: result,
98
- };
99
- }
100
-
101
- /**
102
- * Extract answered questions from a list of nodes.
103
- * @param {BaseNode[]} nodes Nodes to extract questions from.
104
- * @returns {Promise<Array<ExtractQuestion> | Array<{}>>} Questions extracted from the nodes.
105
- */
106
- async extract(nodes: BaseNode[]): Promise<Array<ExtractQuestion> | Array<object>> {
107
- const results = await Promise.all(nodes.map(node => this.extractQuestionsFromNode(node)));
108
-
109
- return results;
110
- }
111
- }
@@ -1,107 +0,0 @@
1
- import { createOpenAI } from '@ai-sdk/openai';
2
- import { describe, it, expect, vi } from 'vitest';
3
- import { TextNode } from '../schema';
4
- import { SummaryExtractor } from './summary';
5
-
6
- const openai = createOpenAI({
7
- apiKey: process.env.OPENAI_API_KEY,
8
- });
9
-
10
- const model = openai('gpt-4o');
11
-
12
- vi.setConfig({ testTimeout: 10_000, hookTimeout: 10_000 });
13
-
14
- describe('SummaryExtractor', () => {
15
- it('can use a custom model from the test suite', async () => {
16
- const extractor = new SummaryExtractor({ llm: model });
17
- const node = new TextNode({ text: 'A summary test using a custom model.' });
18
- const summary = await extractor.generateNodeSummary(node);
19
- expect(typeof summary).toBe('string');
20
- expect(summary.length).toBeGreaterThan(0);
21
- });
22
- it('extracts summary from normal text', async () => {
23
- const extractor = new SummaryExtractor();
24
- const node = new TextNode({ text: 'This is a test document.' });
25
- const summary = await extractor.generateNodeSummary(node);
26
- expect(typeof summary).toBe('string');
27
- expect(summary.length).toBeGreaterThan(0);
28
- });
29
-
30
- it('handles empty input gracefully', async () => {
31
- const extractor = new SummaryExtractor();
32
- const node = new TextNode({ text: '' });
33
- const summary = await extractor.generateNodeSummary(node);
34
- expect(summary).toBe('');
35
- });
36
-
37
- it('supports prompt customization', async () => {
38
- const extractor = new SummaryExtractor({ promptTemplate: 'Summarize: {context}' });
39
- const node = new TextNode({ text: 'Test document for prompt customization.' });
40
- const summary = await extractor.generateNodeSummary(node);
41
- expect(typeof summary).toBe('string');
42
- expect(summary.length).toBeGreaterThan(0);
43
- });
44
-
45
- it('handles very long input', async () => {
46
- const extractor = new SummaryExtractor();
47
- const longText = 'A'.repeat(1000);
48
- const node = new TextNode({ text: longText });
49
- const summary = await extractor.generateNodeSummary(node);
50
- expect(typeof summary).toBe('string');
51
- });
52
-
53
- it('handles whitespace only input', async () => {
54
- const extractor = new SummaryExtractor();
55
- const node = new TextNode({ text: ' ' });
56
- const summary = await extractor.generateNodeSummary(node);
57
- expect(summary).toBe('');
58
- });
59
-
60
- it('handles special characters and emojis', async () => {
61
- const extractor = new SummaryExtractor();
62
- const node = new TextNode({ text: 'πŸš€βœ¨πŸ”₯' });
63
- const summary = await extractor.generateNodeSummary(node);
64
- expect(typeof summary).toBe('string');
65
- expect(summary.length).toBeGreaterThan(0);
66
- });
67
-
68
- it('handles numbers only', async () => {
69
- const extractor = new SummaryExtractor();
70
- const node = new TextNode({ text: '1234567890' });
71
- const summary = await extractor.generateNodeSummary(node);
72
- expect(typeof summary).toBe('string');
73
- expect(summary.length).toBeGreaterThan(0);
74
- });
75
-
76
- it('handles HTML tags', async () => {
77
- const extractor = new SummaryExtractor();
78
- const node = new TextNode({ text: '<h1>Test</h1>' });
79
- const summary = await extractor.generateNodeSummary(node);
80
- expect(typeof summary).toBe('string');
81
- expect(summary.length).toBeGreaterThan(0);
82
- });
83
-
84
- it('handles non-English text', async () => {
85
- const extractor = new SummaryExtractor();
86
- const node = new TextNode({ text: 'θΏ™ζ˜―δΈ€δΈͺ桋试文摣。' });
87
- const summary = await extractor.generateNodeSummary(node);
88
- expect(typeof summary).toBe('string');
89
- expect(summary.length).toBeGreaterThan(0);
90
- });
91
-
92
- it('handles duplicate/repeated text', async () => {
93
- const extractor = new SummaryExtractor();
94
- const node = new TextNode({ text: 'repeat repeat repeat' });
95
- const summary = await extractor.generateNodeSummary(node);
96
- expect(typeof summary).toBe('string');
97
- expect(summary.length).toBeGreaterThan(0);
98
- });
99
-
100
- it('handles only punctuation', async () => {
101
- const extractor = new SummaryExtractor();
102
- const node = new TextNode({ text: '!!!???...' });
103
- const summary = await extractor.generateNodeSummary(node);
104
- expect(typeof summary).toBe('string');
105
- expect(summary.length).toBeGreaterThan(0);
106
- });
107
- });
@@ -1,122 +0,0 @@
1
- import { Agent } from '@mastra/core/agent';
2
- import type { MastraLanguageModel } from '@mastra/core/agent';
3
- import { PromptTemplate, defaultSummaryPrompt } from '../prompts';
4
- import type { SummaryPrompt } from '../prompts';
5
- import type { BaseNode } from '../schema';
6
- import { TextNode } from '../schema';
7
- import { BaseExtractor } from './base';
8
- import { baseLLM, STRIP_REGEX } from './types';
9
- import type { SummaryExtractArgs } from './types';
10
-
11
- type ExtractSummary = {
12
- sectionSummary?: string;
13
- prevSectionSummary?: string;
14
- nextSectionSummary?: string;
15
- };
16
-
17
- /**
18
- * Summarize an array of nodes using a custom LLM.
19
- *
20
- * @param nodes Array of node-like objects
21
- * @param options Summary extraction options
22
- * @returns Array of summary results
23
- */
24
- export class SummaryExtractor extends BaseExtractor {
25
- private llm: MastraLanguageModel;
26
- summaries: string[];
27
- promptTemplate: SummaryPrompt;
28
- private selfSummary: boolean;
29
- private prevSummary: boolean;
30
- private nextSummary: boolean;
31
- constructor(options?: SummaryExtractArgs) {
32
- const summaries = options?.summaries ?? ['self'];
33
-
34
- if (summaries && !summaries.some(s => ['self', 'prev', 'next'].includes(s)))
35
- throw new Error("Summaries must be one of 'self', 'prev', 'next'");
36
-
37
- super();
38
-
39
- this.llm = options?.llm ?? baseLLM;
40
- this.summaries = summaries;
41
- this.promptTemplate = options?.promptTemplate
42
- ? new PromptTemplate({
43
- templateVars: ['context'],
44
- template: options.promptTemplate,
45
- })
46
- : defaultSummaryPrompt;
47
-
48
- this.selfSummary = summaries?.includes('self') ?? false;
49
- this.prevSummary = summaries?.includes('prev') ?? false;
50
- this.nextSummary = summaries?.includes('next') ?? false;
51
- }
52
-
53
- /**
54
- * Extract summary from a node.
55
- * @param {BaseNode} node Node to extract summary from.
56
- * @returns {Promise<string>} Summary extracted from the node.
57
- */
58
- async generateNodeSummary(node: BaseNode): Promise<string> {
59
- const text = node.getContent();
60
- if (!text || text.trim() === '') {
61
- return '';
62
- }
63
- if (this.isTextNodeOnly && !(node instanceof TextNode)) {
64
- return '';
65
- }
66
- const context = node.getContent();
67
-
68
- const prompt = this.promptTemplate.format({
69
- context,
70
- });
71
-
72
- const miniAgent = new Agent({
73
- model: this.llm,
74
- name: 'summary-extractor',
75
- instructions:
76
- 'You are a summary extractor. You are given a node and you need to extract the summary from the node.',
77
- });
78
-
79
- let summary = '';
80
- if (this.llm.specificationVersion === 'v2') {
81
- const result = await miniAgent.generateVNext([{ role: 'user', content: prompt }], { format: 'mastra' });
82
- summary = result.text;
83
- } else {
84
- const result = await miniAgent.generate([{ role: 'user', content: prompt }]);
85
- summary = result.text;
86
- }
87
-
88
- if (!summary) {
89
- console.warn('Summary extraction LLM output returned empty');
90
- return '';
91
- }
92
-
93
- return summary.replace(STRIP_REGEX, '');
94
- }
95
-
96
- /**
97
- * Extract summaries from a list of nodes.
98
- * @param {BaseNode[]} nodes Nodes to extract summaries from.
99
- * @returns {Promise<ExtractSummary[]>} Summaries extracted from the nodes.
100
- */
101
- async extract(nodes: BaseNode[]): Promise<ExtractSummary[]> {
102
- if (!nodes.every(n => n instanceof TextNode)) throw new Error('Only `TextNode` is allowed for `Summary` extractor');
103
-
104
- const nodeSummaries = await Promise.all(nodes.map(node => this.generateNodeSummary(node)));
105
-
106
- const metadataList: ExtractSummary[] = nodes.map(() => ({}));
107
-
108
- for (let i = 0; i < nodes.length; i++) {
109
- if (i > 0 && this.prevSummary && nodeSummaries[i - 1]) {
110
- metadataList[i]!['prevSectionSummary'] = nodeSummaries[i - 1];
111
- }
112
- if (i < nodes.length - 1 && this.nextSummary && nodeSummaries[i + 1]) {
113
- metadataList[i]!['nextSectionSummary'] = nodeSummaries[i + 1];
114
- }
115
- if (this.selfSummary && nodeSummaries[i]) {
116
- metadataList[i]!['sectionSummary'] = nodeSummaries[i];
117
- }
118
- }
119
-
120
- return metadataList;
121
- }
122
- }
@@ -1,121 +0,0 @@
1
- import { createOpenAI } from '@ai-sdk/openai';
2
- import { describe, it, expect, vi } from 'vitest';
3
- import { TextNode } from '../schema';
4
- import { TitleExtractor } from './title';
5
-
6
- const openai = createOpenAI({
7
- apiKey: process.env.OPENAI_API_KEY,
8
- });
9
-
10
- const model = openai('gpt-4o');
11
-
12
- vi.setConfig({ testTimeout: 50_000, hookTimeout: 50_000 });
13
-
14
- describe('TitleExtractor', () => {
15
- it('can use a custom model from the test suite', async () => {
16
- const extractor = new TitleExtractor({ llm: model });
17
- const node = new TextNode({ text: 'A title test using a custom model.' });
18
- const titles = await extractor.extract([node]);
19
- expect(Array.isArray(titles)).toBe(true);
20
- expect(titles[0]).toHaveProperty('documentTitle');
21
- expect(typeof titles[0].documentTitle).toBe('string');
22
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
23
- });
24
-
25
- it('extracts title', async () => {
26
- const extractor = new TitleExtractor({ llm: model });
27
- const node = new TextNode({ text: 'This is a test document.' });
28
- const titles = await extractor.extract([node]);
29
- expect(Array.isArray(titles)).toBe(true);
30
- expect(titles[0]).toHaveProperty('documentTitle');
31
- expect(typeof titles[0].documentTitle).toBe('string');
32
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
33
- });
34
-
35
- it('handles empty input gracefully', async () => {
36
- const extractor = new TitleExtractor({ llm: model });
37
- const node = new TextNode({ text: '' });
38
- const titles = await extractor.extract([node]);
39
- expect(titles[0].documentTitle).toBe('');
40
- });
41
-
42
- it('supports prompt customization', async () => {
43
- const extractor = new TitleExtractor({ llm: model, nodeTemplate: 'Title for: {context}' });
44
- const node = new TextNode({ text: 'Test document for prompt customization.' });
45
- const titles = await extractor.extract([node]);
46
- expect(titles[0]).toHaveProperty('documentTitle');
47
- expect(typeof titles[0].documentTitle).toBe('string');
48
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
49
- });
50
-
51
- it('handles very long input', async () => {
52
- const extractor = new TitleExtractor({ llm: model });
53
- const longText = 'A'.repeat(1000);
54
- const node = new TextNode({ text: longText });
55
- const titles = await extractor.extract([node]);
56
- expect(titles[0]).toHaveProperty('documentTitle');
57
- expect(typeof titles[0].documentTitle).toBe('string');
58
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
59
- });
60
-
61
- it('handles whitespace only input', async () => {
62
- const extractor = new TitleExtractor({ llm: model });
63
- const node = new TextNode({ text: ' ' });
64
- const titles = await extractor.extract([node]);
65
- expect(titles[0].documentTitle).toBe('');
66
- });
67
-
68
- it('handles special characters and emojis', async () => {
69
- const extractor = new TitleExtractor({ llm: model });
70
- const node = new TextNode({ text: 'πŸš€βœ¨πŸ”₯' });
71
- const titles = await extractor.extract([node]);
72
- expect(titles[0]).toHaveProperty('documentTitle');
73
- expect(typeof titles[0].documentTitle).toBe('string');
74
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
75
- });
76
-
77
- it('handles numbers only', async () => {
78
- const extractor = new TitleExtractor({ llm: model });
79
- const node = new TextNode({ text: '1234567890' });
80
- const titles = await extractor.extract([node]);
81
- expect(titles[0]).toHaveProperty('documentTitle');
82
- expect(typeof titles[0].documentTitle).toBe('string');
83
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
84
- });
85
-
86
- it('handles HTML tags', async () => {
87
- const extractor = new TitleExtractor({ llm: model });
88
- const node = new TextNode({ text: '<h1>Test</h1>' });
89
- const titles = await extractor.extract([node]);
90
- expect(titles[0]).toHaveProperty('documentTitle');
91
- expect(typeof titles[0].documentTitle).toBe('string');
92
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
93
- });
94
-
95
- it('handles non-English text', async () => {
96
- const extractor = new TitleExtractor({ llm: model });
97
- const node = new TextNode({ text: 'θΏ™ζ˜―δΈ€δΈͺ桋试文摣。' });
98
- const titles = await extractor.extract([node]);
99
- expect(titles[0]).toHaveProperty('documentTitle');
100
- expect(typeof titles[0].documentTitle).toBe('string');
101
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
102
- });
103
-
104
- it('handles duplicate/repeated text', async () => {
105
- const extractor = new TitleExtractor({ llm: model });
106
- const node = new TextNode({ text: 'repeat repeat repeat' });
107
- const titles = await extractor.extract([node]);
108
- expect(titles[0]).toHaveProperty('documentTitle');
109
- expect(typeof titles[0].documentTitle).toBe('string');
110
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
111
- });
112
-
113
- it('handles only punctuation', async () => {
114
- const extractor = new TitleExtractor({ llm: model });
115
- const node = new TextNode({ text: '!!!???...' });
116
- const titles = await extractor.extract([node]);
117
- expect(titles[0]).toHaveProperty('documentTitle');
118
- expect(typeof titles[0].documentTitle).toBe('string');
119
- expect(titles[0].documentTitle.length).toBeGreaterThan(0);
120
- });
121
- });