@mastra/rag 0.0.2-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,7 @@
1
+ # @mastra/rag
2
+
3
+ ## 0.0.2-alpha.0
4
+
5
+ ### Patch Changes
6
+
7
+ - 8413645: Initial release for rag
@@ -0,0 +1,18 @@
1
+ services:
2
+ db:
3
+ image: pgvector/pgvector:pg16
4
+ container_name: 'rag-db'
5
+ ports:
6
+ - '5433:5432'
7
+ environment:
8
+ POSTGRES_USER: ${POSTGRES_USER:-postgres}
9
+ POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
10
+ POSTGRES_DB: ${POSTGRES_DB:-mastra}
11
+ volumes:
12
+ - pgdata:/var/lib/postgresql/data
13
+ qdrant:
14
+ image: qdrant/qdrant
15
+ ports:
16
+ - 6333:6333
17
+ volumes:
18
+ pgdata:
package/jest.config.ts ADDED
@@ -0,0 +1,19 @@
1
+ import { config } from 'dotenv';
2
+
3
+ config();
4
+
5
+ export default {
6
+ preset: 'ts-jest',
7
+ extensionsToTreatAsEsm: ['.ts'],
8
+ moduleNameMapper: {
9
+ '^(\\.{1,2}/.*)\\.js$': '$1',
10
+ },
11
+ transform: {
12
+ '^.+\\.tsx?$': [
13
+ 'ts-jest',
14
+ {
15
+ useESM: true,
16
+ },
17
+ ],
18
+ },
19
+ };
package/package.json ADDED
@@ -0,0 +1,57 @@
1
+ {
2
+ "name": "@mastra/rag",
3
+ "version": "0.0.2-alpha.0",
4
+ "description": "",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "module": "dist/rag.esm.js",
8
+ "types": "dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "import": {
12
+ "types": "./dist/index.d.ts",
13
+ "default": "./dist/rag.esm.js"
14
+ },
15
+ "require": {
16
+ "types": "./dist/index.d.ts",
17
+ "default": "./dist/index.js"
18
+ }
19
+ },
20
+ "./package.json": "./package.json"
21
+ },
22
+ "keywords": [],
23
+ "author": "",
24
+ "license": "ISC",
25
+ "dependencies": {
26
+ "@date-fns/utc": "^1.2.0",
27
+ "@llamaindex/core": "^0.4.10",
28
+ "@llamaindex/env": "^0.1.20",
29
+ "@paralleldrive/cuid2": "^2.2.2",
30
+ "@pinecone-database/pinecone": "^3.0.3",
31
+ "@qdrant/js-client-rest": "^1.12.0",
32
+ "date-fns": "^4.1.0",
33
+ "dotenv": "^16.3.1",
34
+ "drizzle-orm": "^0.36.1",
35
+ "llamaindex": "^0.8.15",
36
+ "pg": "^8.13.1",
37
+ "postgres": "^3.4.5",
38
+ "zod": "3.23.7",
39
+ "@mastra/core": "0.1.27-alpha.20"
40
+ },
41
+ "devDependencies": {
42
+ "@babel/preset-env": "^7.26.0",
43
+ "@babel/preset-typescript": "^7.26.0",
44
+ "@tsconfig/recommended": "^1.0.7",
45
+ "@types/node": "^22.9.0",
46
+ "@types/pg": "^8.11.10",
47
+ "drizzle-kit": "^0.28.0",
48
+ "dts-cli": "^2.0.5",
49
+ "jest": "^29.7.0",
50
+ "ts-jest": "^29.2.5"
51
+ },
52
+ "scripts": {
53
+ "build": "dts build",
54
+ "build:dev": "dts watch",
55
+ "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js"
56
+ }
57
+ }
@@ -0,0 +1,229 @@
1
+ import { describe, it, expect } from '@jest/globals';
2
+ import dotenv from 'dotenv';
3
+
4
+ import { MastraDocument } from '.';
5
+
6
+ dotenv.config();
7
+
8
+ const sampleMarkdown = `
9
+ # Complete Guide to Modern Web Development
10
+ ## Introduction
11
+ Welcome to our comprehensive guide on modern web development. This resource covers essential concepts, best practices, and tools that every developer should know in 2024.
12
+
13
+ ### Who This Guide Is For
14
+ - Beginning developers looking to establish a solid foundation
15
+ - Intermediate developers wanting to modernize their skillset
16
+ - Senior developers seeking a refresher on current best practices
17
+
18
+ ## Core Concepts
19
+
20
+ ### 1. Frontend Development
21
+ Modern frontend development has evolved significantly. Here are the key areas to focus on:
22
+
23
+ #### HTML5 Semantic Elements
24
+ Using semantic HTML improves:
25
+ - Accessibility
26
+ - SEO performance
27
+ - Code readability
28
+ - Maintenance
29
+
30
+ \`\`\`html
31
+ <header>
32
+ <nav>
33
+ <ul>
34
+ <li><a href="#home">Home</a></li>
35
+ <li><a href="#about">About</a></li>
36
+ </ul>
37
+ </nav>
38
+ </header>
39
+ \`\`\`
40
+
41
+ #### CSS Best Practices
42
+ 1. Use CSS Custom Properties
43
+ 2. Implement responsive design
44
+ 3. Follow BEM methodology
45
+ 4. Optimize performance
46
+
47
+ ### 2. JavaScript Fundamentals
48
+
49
+ JavaScript is the backbone of web development. Here's what you need to know:
50
+
51
+ \`\`\`javascript
52
+ // Modern JS features
53
+ const exampleFunction = async () => {
54
+ try {
55
+ const response = await fetch('https://api.example.com/data');
56
+ const data = await response.json();
57
+ return data;
58
+ } catch (error) {
59
+ console.error('Error fetching data:', error);
60
+ }
61
+ };
62
+ \`\`\`
63
+
64
+ #### Key Concepts:
65
+ - Promises and async/await
66
+ - ES6+ features
67
+ - TypeScript integration
68
+ - Module systems
69
+
70
+ ### 3. Backend Development
71
+
72
+ Backend development requires understanding:
73
+
74
+ 1. **Server Architecture**
75
+ - RESTful APIs
76
+ - GraphQL
77
+ - Microservices
78
+
79
+ 2. **Database Management**
80
+ - SQL vs NoSQL
81
+ - Query optimization
82
+ - Data modeling
83
+
84
+ 3. **Security Considerations**
85
+ - Authentication
86
+ - Authorization
87
+ - Data encryption
88
+
89
+ ## Tools and Technologies
90
+
91
+ ### Essential Developer Tools
92
+
93
+ | Category | Tools |
94
+ |----------|-------|
95
+ | Version Control | Git, GitHub |
96
+ | Package Managers | npm, yarn |
97
+ | Bundlers | webpack, Vite |
98
+ | Testing | Jest, Cypress |
99
+
100
+ ### Framework Selection
101
+
102
+ #### Frontend Frameworks
103
+ 1. React
104
+ - Component-based architecture
105
+ - Virtual DOM
106
+ - Large ecosystem
107
+
108
+ 2. Vue
109
+ - Progressive framework
110
+ - Easy learning curve
111
+ - Great documentation
112
+
113
+ 3. Angular
114
+ - Full-featured framework
115
+ - TypeScript integration
116
+ - Enterprise-ready
117
+
118
+ #### Backend Frameworks
119
+ - Node.js/Express
120
+ - Django
121
+ - Ruby on Rails
122
+ - Spring Boot
123
+
124
+ ## Best Practices
125
+
126
+ ### Code Quality
127
+ - Write clean, maintainable code
128
+ - Follow SOLID principles
129
+ - Implement proper error handling
130
+ - Use consistent formatting
131
+
132
+ ### Performance Optimization
133
+ 1. Minimize HTTP requests
134
+ 2. Optimize images
135
+ 3. Implement caching
136
+ 4. Use lazy loading
137
+
138
+ ### Security Measures
139
+ - Implement HTTPS
140
+ - Sanitize user input
141
+ - Use secure dependencies
142
+ - Regular security audits
143
+
144
+ ## Deployment and DevOps
145
+
146
+ ### Continuous Integration/Continuous Deployment (CI/CD)
147
+ 1. Automated testing
148
+ 2. Build automation
149
+ 3. Deployment automation
150
+ 4. Monitoring and logging
151
+
152
+ ### Cloud Services
153
+ - AWS
154
+ - Google Cloud Platform
155
+ - Azure
156
+ - Heroku
157
+
158
+ ## Conclusion
159
+
160
+ Remember that web development is an ever-evolving field. Stay current with:
161
+ - Industry trends
162
+ - New tools and frameworks
163
+ - Security best practices
164
+ - Performance optimization techniques
165
+
166
+ ### Additional Resources
167
+
168
+ * [MDN Web Docs](https://developer.mozilla.org)
169
+ * [Web.dev](https://web.dev)
170
+ * [CSS-Tricks](https://css-tricks.com)
171
+ * [JavaScript.info](https://javascript.info)
172
+
173
+ ---
174
+
175
+ > "Any application that can be written in JavaScript, will eventually be written in JavaScript." - Jeff Atwood
176
+
177
+ ---
178
+
179
+ **Note**: This guide is regularly updated to reflect current web development practices and standards.
180
+ `;
181
+
182
+ describe('MastraDocument', () => {
183
+ it('initialization', () => {
184
+ const doc = new MastraDocument({ text: 'test' });
185
+ expect(doc.documents).toHaveLength(1);
186
+ expect(doc.documents[0]?.text).toBe('test');
187
+ });
188
+
189
+ it('initialization with array', () => {
190
+ const doc = new MastraDocument([{ text: 'test' }, { text: 'test2' }]);
191
+ expect(doc.documents).toHaveLength(2);
192
+ expect(doc.documents[0]?.text).toBe('test');
193
+ expect(doc.documents[1]?.text).toBe('test2');
194
+ });
195
+
196
+ it('chunk - no metadata', async () => {
197
+ const doc = new MastraDocument({ text: sampleMarkdown });
198
+
199
+ const nodes = await doc.chunk({
200
+ strategy: {
201
+ chunkSize: 100,
202
+ chunkOverlap: 0,
203
+ separator: `\n`,
204
+ paragraphSeparator: `\n`,
205
+ secondaryChunkingRegex: `/(\n)/g`,
206
+ },
207
+ });
208
+
209
+ expect(nodes.length).toBe(10);
210
+ });
211
+
212
+ it('chunk - metadata title', async () => {
213
+ const doc = new MastraDocument({ text: sampleMarkdown });
214
+
215
+ const nodes = await doc.chunk({
216
+ parseMarkdown: true,
217
+ strategy: {
218
+ chunkSize: 100,
219
+ chunkOverlap: 0,
220
+ separator: `\n`,
221
+ paragraphSeparator: `\n`,
222
+ secondaryChunkingRegex: `/(\n)/g`,
223
+ },
224
+ });
225
+
226
+ expect(nodes[0]?.toJSON().text).toBe(`Complete Guide to Modern Web Development`);
227
+ expect(nodes.length).toBe(27);
228
+ }, 500000);
229
+ });
@@ -0,0 +1,129 @@
1
+ import {
2
+ MarkdownNodeParser,
3
+ Document,
4
+ SummaryExtractor,
5
+ IngestionPipeline,
6
+ LLM,
7
+ TitleCombinePrompt,
8
+ TitleExtractor,
9
+ TitleExtractorPrompt,
10
+ SummaryPrompt,
11
+ QuestionsAnsweredExtractor,
12
+ QuestionExtractPrompt,
13
+ KeywordExtractor,
14
+ KeywordExtractPrompt,
15
+ SentenceSplitter,
16
+ } from 'llamaindex';
17
+
18
+ interface Tokenizer {
19
+ encode: (text: string) => Uint32Array;
20
+ decode: (tokens: Uint32Array) => string;
21
+ }
22
+
23
+ interface DocumentInitializer {
24
+ text: string;
25
+ metadata?: Record<string, any>;
26
+ }
27
+
28
+ type TitleExtractorsArgs = {
29
+ llm?: LLM;
30
+ nodes?: number;
31
+ nodeTemplate?: TitleExtractorPrompt['template'];
32
+ combineTemplate?: TitleCombinePrompt['template'];
33
+ };
34
+
35
+ type SummaryExtractArgs = {
36
+ llm?: LLM;
37
+ summaries?: string[];
38
+ promptTemplate?: SummaryPrompt['template'];
39
+ };
40
+
41
+ type QuestionAnswerExtractArgs = {
42
+ llm?: LLM;
43
+ questions?: number;
44
+ promptTemplate?: QuestionExtractPrompt['template'];
45
+ embeddingOnly?: boolean;
46
+ };
47
+
48
+ type KeywordExtractArgs = {
49
+ llm?: LLM;
50
+ keywords?: number;
51
+ promptTemplate?: KeywordExtractPrompt['template'];
52
+ };
53
+
54
+ type SplitterParams = {
55
+ tokenizer?: Tokenizer;
56
+ };
57
+
58
+ interface BaseSentenceParams {
59
+ chunkSize?: number;
60
+ chunkOverlap?: number;
61
+ separator?: string;
62
+ paragraphSeparator?: string;
63
+ secondaryChunkingRegex?: string;
64
+ }
65
+
66
+ type SentenceParam = BaseSentenceParams & SplitterParams;
67
+
68
+ export class MastraDocument {
69
+ documents: Document[];
70
+ constructor(config: DocumentInitializer | DocumentInitializer[]) {
71
+ if (Array.isArray(config)) {
72
+ this.documents = config.map(({ text, metadata }) => new Document({ text, metadata }));
73
+ } else {
74
+ this.documents = [new Document(config)];
75
+ }
76
+ }
77
+
78
+ async chunk({
79
+ strategy,
80
+ metadataExtraction = {},
81
+ parseMarkdown,
82
+ }: {
83
+ parseMarkdown?: boolean;
84
+ strategy: SentenceParam;
85
+ metadataExtraction?: {
86
+ title?: TitleExtractorsArgs | boolean;
87
+ summary?: SummaryExtractArgs | boolean;
88
+ questionsAnswered?: QuestionAnswerExtractArgs | boolean;
89
+ keyword?: KeywordExtractArgs | boolean;
90
+ };
91
+ }) {
92
+ const { title, summary, questionsAnswered, keyword } = metadataExtraction;
93
+ const transformations = [];
94
+
95
+ if (parseMarkdown) {
96
+ transformations.push(new MarkdownNodeParser());
97
+ }
98
+
99
+ transformations.push(new SentenceSplitter(strategy));
100
+
101
+ if (typeof title !== 'undefined') {
102
+ transformations.push(new TitleExtractor(typeof title === 'boolean' ? {} : title));
103
+ }
104
+
105
+ if (typeof summary !== 'undefined') {
106
+ transformations.push(new SummaryExtractor(typeof summary === 'boolean' ? {} : summary));
107
+ }
108
+
109
+ if (typeof questionsAnswered !== 'undefined') {
110
+ transformations.push(
111
+ new QuestionsAnsweredExtractor(typeof questionsAnswered === 'boolean' ? {} : questionsAnswered),
112
+ );
113
+ }
114
+
115
+ if (typeof keyword !== 'undefined') {
116
+ transformations.push(new KeywordExtractor(typeof keyword === 'boolean' ? {} : keyword));
117
+ }
118
+
119
+ const pipeline = new IngestionPipeline({
120
+ transformations,
121
+ });
122
+
123
+ const nodes = await pipeline.run({
124
+ documents: this.documents,
125
+ });
126
+
127
+ return nodes;
128
+ }
129
+ }
package/src/index.ts ADDED
@@ -0,0 +1,4 @@
1
+ export * from './document';
2
+ export * from './pg';
3
+ export * from './pinecone';
4
+ export * from './qdrant';