@mastra/rag 0.0.2-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/docker-compose.yaml +18 -0
- package/jest.config.ts +19 -0
- package/package.json +57 -0
- package/src/document/index.test.ts +229 -0
- package/src/document/index.ts +129 -0
- package/src/index.ts +4 -0
- package/src/pg/index.ts +255 -0
- package/src/pg/index_test.ts +212 -0
- package/src/pinecone/index.test.ts +130 -0
- package/src/pinecone/index.ts +118 -0
- package/src/qdrant/index.test.ts +119 -0
- package/src/qdrant/index.ts +116 -0
- package/tsconfig.json +10 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
services:
|
|
2
|
+
db:
|
|
3
|
+
image: pgvector/pgvector:pg16
|
|
4
|
+
container_name: 'rag-db'
|
|
5
|
+
ports:
|
|
6
|
+
- '5433:5432'
|
|
7
|
+
environment:
|
|
8
|
+
POSTGRES_USER: ${POSTGRES_USER:-postgres}
|
|
9
|
+
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
|
|
10
|
+
POSTGRES_DB: ${POSTGRES_DB:-mastra}
|
|
11
|
+
volumes:
|
|
12
|
+
- pgdata:/var/lib/postgresql/data
|
|
13
|
+
qdrant:
|
|
14
|
+
image: qdrant/qdrant
|
|
15
|
+
ports:
|
|
16
|
+
- 6333:6333
|
|
17
|
+
volumes:
|
|
18
|
+
pgdata:
|
package/jest.config.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { config } from 'dotenv';
|
|
2
|
+
|
|
3
|
+
config();
|
|
4
|
+
|
|
5
|
+
export default {
|
|
6
|
+
preset: 'ts-jest',
|
|
7
|
+
extensionsToTreatAsEsm: ['.ts'],
|
|
8
|
+
moduleNameMapper: {
|
|
9
|
+
'^(\\.{1,2}/.*)\\.js$': '$1',
|
|
10
|
+
},
|
|
11
|
+
transform: {
|
|
12
|
+
'^.+\\.tsx?$': [
|
|
13
|
+
'ts-jest',
|
|
14
|
+
{
|
|
15
|
+
useESM: true,
|
|
16
|
+
},
|
|
17
|
+
],
|
|
18
|
+
},
|
|
19
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mastra/rag",
|
|
3
|
+
"version": "0.0.2-alpha.0",
|
|
4
|
+
"description": "",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"module": "dist/rag.esm.js",
|
|
8
|
+
"types": "dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"import": {
|
|
12
|
+
"types": "./dist/index.d.ts",
|
|
13
|
+
"default": "./dist/rag.esm.js"
|
|
14
|
+
},
|
|
15
|
+
"require": {
|
|
16
|
+
"types": "./dist/index.d.ts",
|
|
17
|
+
"default": "./dist/index.js"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"./package.json": "./package.json"
|
|
21
|
+
},
|
|
22
|
+
"keywords": [],
|
|
23
|
+
"author": "",
|
|
24
|
+
"license": "ISC",
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@date-fns/utc": "^1.2.0",
|
|
27
|
+
"@llamaindex/core": "^0.4.10",
|
|
28
|
+
"@llamaindex/env": "^0.1.20",
|
|
29
|
+
"@paralleldrive/cuid2": "^2.2.2",
|
|
30
|
+
"@pinecone-database/pinecone": "^3.0.3",
|
|
31
|
+
"@qdrant/js-client-rest": "^1.12.0",
|
|
32
|
+
"date-fns": "^4.1.0",
|
|
33
|
+
"dotenv": "^16.3.1",
|
|
34
|
+
"drizzle-orm": "^0.36.1",
|
|
35
|
+
"llamaindex": "^0.8.15",
|
|
36
|
+
"pg": "^8.13.1",
|
|
37
|
+
"postgres": "^3.4.5",
|
|
38
|
+
"zod": "3.23.7",
|
|
39
|
+
"@mastra/core": "0.1.27-alpha.20"
|
|
40
|
+
},
|
|
41
|
+
"devDependencies": {
|
|
42
|
+
"@babel/preset-env": "^7.26.0",
|
|
43
|
+
"@babel/preset-typescript": "^7.26.0",
|
|
44
|
+
"@tsconfig/recommended": "^1.0.7",
|
|
45
|
+
"@types/node": "^22.9.0",
|
|
46
|
+
"@types/pg": "^8.11.10",
|
|
47
|
+
"drizzle-kit": "^0.28.0",
|
|
48
|
+
"dts-cli": "^2.0.5",
|
|
49
|
+
"jest": "^29.7.0",
|
|
50
|
+
"ts-jest": "^29.2.5"
|
|
51
|
+
},
|
|
52
|
+
"scripts": {
|
|
53
|
+
"build": "dts build",
|
|
54
|
+
"build:dev": "dts watch",
|
|
55
|
+
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js"
|
|
56
|
+
}
|
|
57
|
+
}
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
import { describe, it, expect } from '@jest/globals';
|
|
2
|
+
import dotenv from 'dotenv';
|
|
3
|
+
|
|
4
|
+
import { MastraDocument } from '.';
|
|
5
|
+
|
|
6
|
+
dotenv.config();
|
|
7
|
+
|
|
8
|
+
const sampleMarkdown = `
|
|
9
|
+
# Complete Guide to Modern Web Development
|
|
10
|
+
## Introduction
|
|
11
|
+
Welcome to our comprehensive guide on modern web development. This resource covers essential concepts, best practices, and tools that every developer should know in 2024.
|
|
12
|
+
|
|
13
|
+
### Who This Guide Is For
|
|
14
|
+
- Beginning developers looking to establish a solid foundation
|
|
15
|
+
- Intermediate developers wanting to modernize their skillset
|
|
16
|
+
- Senior developers seeking a refresher on current best practices
|
|
17
|
+
|
|
18
|
+
## Core Concepts
|
|
19
|
+
|
|
20
|
+
### 1. Frontend Development
|
|
21
|
+
Modern frontend development has evolved significantly. Here are the key areas to focus on:
|
|
22
|
+
|
|
23
|
+
#### HTML5 Semantic Elements
|
|
24
|
+
Using semantic HTML improves:
|
|
25
|
+
- Accessibility
|
|
26
|
+
- SEO performance
|
|
27
|
+
- Code readability
|
|
28
|
+
- Maintenance
|
|
29
|
+
|
|
30
|
+
\`\`\`html
|
|
31
|
+
<header>
|
|
32
|
+
<nav>
|
|
33
|
+
<ul>
|
|
34
|
+
<li><a href="#home">Home</a></li>
|
|
35
|
+
<li><a href="#about">About</a></li>
|
|
36
|
+
</ul>
|
|
37
|
+
</nav>
|
|
38
|
+
</header>
|
|
39
|
+
\`\`\`
|
|
40
|
+
|
|
41
|
+
#### CSS Best Practices
|
|
42
|
+
1. Use CSS Custom Properties
|
|
43
|
+
2. Implement responsive design
|
|
44
|
+
3. Follow BEM methodology
|
|
45
|
+
4. Optimize performance
|
|
46
|
+
|
|
47
|
+
### 2. JavaScript Fundamentals
|
|
48
|
+
|
|
49
|
+
JavaScript is the backbone of web development. Here's what you need to know:
|
|
50
|
+
|
|
51
|
+
\`\`\`javascript
|
|
52
|
+
// Modern JS features
|
|
53
|
+
const exampleFunction = async () => {
|
|
54
|
+
try {
|
|
55
|
+
const response = await fetch('https://api.example.com/data');
|
|
56
|
+
const data = await response.json();
|
|
57
|
+
return data;
|
|
58
|
+
} catch (error) {
|
|
59
|
+
console.error('Error fetching data:', error);
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
\`\`\`
|
|
63
|
+
|
|
64
|
+
#### Key Concepts:
|
|
65
|
+
- Promises and async/await
|
|
66
|
+
- ES6+ features
|
|
67
|
+
- TypeScript integration
|
|
68
|
+
- Module systems
|
|
69
|
+
|
|
70
|
+
### 3. Backend Development
|
|
71
|
+
|
|
72
|
+
Backend development requires understanding:
|
|
73
|
+
|
|
74
|
+
1. **Server Architecture**
|
|
75
|
+
- RESTful APIs
|
|
76
|
+
- GraphQL
|
|
77
|
+
- Microservices
|
|
78
|
+
|
|
79
|
+
2. **Database Management**
|
|
80
|
+
- SQL vs NoSQL
|
|
81
|
+
- Query optimization
|
|
82
|
+
- Data modeling
|
|
83
|
+
|
|
84
|
+
3. **Security Considerations**
|
|
85
|
+
- Authentication
|
|
86
|
+
- Authorization
|
|
87
|
+
- Data encryption
|
|
88
|
+
|
|
89
|
+
## Tools and Technologies
|
|
90
|
+
|
|
91
|
+
### Essential Developer Tools
|
|
92
|
+
|
|
93
|
+
| Category | Tools |
|
|
94
|
+
|----------|-------|
|
|
95
|
+
| Version Control | Git, GitHub |
|
|
96
|
+
| Package Managers | npm, yarn |
|
|
97
|
+
| Bundlers | webpack, Vite |
|
|
98
|
+
| Testing | Jest, Cypress |
|
|
99
|
+
|
|
100
|
+
### Framework Selection
|
|
101
|
+
|
|
102
|
+
#### Frontend Frameworks
|
|
103
|
+
1. React
|
|
104
|
+
- Component-based architecture
|
|
105
|
+
- Virtual DOM
|
|
106
|
+
- Large ecosystem
|
|
107
|
+
|
|
108
|
+
2. Vue
|
|
109
|
+
- Progressive framework
|
|
110
|
+
- Easy learning curve
|
|
111
|
+
- Great documentation
|
|
112
|
+
|
|
113
|
+
3. Angular
|
|
114
|
+
- Full-featured framework
|
|
115
|
+
- TypeScript integration
|
|
116
|
+
- Enterprise-ready
|
|
117
|
+
|
|
118
|
+
#### Backend Frameworks
|
|
119
|
+
- Node.js/Express
|
|
120
|
+
- Django
|
|
121
|
+
- Ruby on Rails
|
|
122
|
+
- Spring Boot
|
|
123
|
+
|
|
124
|
+
## Best Practices
|
|
125
|
+
|
|
126
|
+
### Code Quality
|
|
127
|
+
- Write clean, maintainable code
|
|
128
|
+
- Follow SOLID principles
|
|
129
|
+
- Implement proper error handling
|
|
130
|
+
- Use consistent formatting
|
|
131
|
+
|
|
132
|
+
### Performance Optimization
|
|
133
|
+
1. Minimize HTTP requests
|
|
134
|
+
2. Optimize images
|
|
135
|
+
3. Implement caching
|
|
136
|
+
4. Use lazy loading
|
|
137
|
+
|
|
138
|
+
### Security Measures
|
|
139
|
+
- Implement HTTPS
|
|
140
|
+
- Sanitize user input
|
|
141
|
+
- Use secure dependencies
|
|
142
|
+
- Regular security audits
|
|
143
|
+
|
|
144
|
+
## Deployment and DevOps
|
|
145
|
+
|
|
146
|
+
### Continuous Integration/Continuous Deployment (CI/CD)
|
|
147
|
+
1. Automated testing
|
|
148
|
+
2. Build automation
|
|
149
|
+
3. Deployment automation
|
|
150
|
+
4. Monitoring and logging
|
|
151
|
+
|
|
152
|
+
### Cloud Services
|
|
153
|
+
- AWS
|
|
154
|
+
- Google Cloud Platform
|
|
155
|
+
- Azure
|
|
156
|
+
- Heroku
|
|
157
|
+
|
|
158
|
+
## Conclusion
|
|
159
|
+
|
|
160
|
+
Remember that web development is an ever-evolving field. Stay current with:
|
|
161
|
+
- Industry trends
|
|
162
|
+
- New tools and frameworks
|
|
163
|
+
- Security best practices
|
|
164
|
+
- Performance optimization techniques
|
|
165
|
+
|
|
166
|
+
### Additional Resources
|
|
167
|
+
|
|
168
|
+
* [MDN Web Docs](https://developer.mozilla.org)
|
|
169
|
+
* [Web.dev](https://web.dev)
|
|
170
|
+
* [CSS-Tricks](https://css-tricks.com)
|
|
171
|
+
* [JavaScript.info](https://javascript.info)
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
> "Any application that can be written in JavaScript, will eventually be written in JavaScript." - Jeff Atwood
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
**Note**: This guide is regularly updated to reflect current web development practices and standards.
|
|
180
|
+
`;
|
|
181
|
+
|
|
182
|
+
describe('MastraDocument', () => {
|
|
183
|
+
it('initialization', () => {
|
|
184
|
+
const doc = new MastraDocument({ text: 'test' });
|
|
185
|
+
expect(doc.documents).toHaveLength(1);
|
|
186
|
+
expect(doc.documents[0]?.text).toBe('test');
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
it('initialization with array', () => {
|
|
190
|
+
const doc = new MastraDocument([{ text: 'test' }, { text: 'test2' }]);
|
|
191
|
+
expect(doc.documents).toHaveLength(2);
|
|
192
|
+
expect(doc.documents[0]?.text).toBe('test');
|
|
193
|
+
expect(doc.documents[1]?.text).toBe('test2');
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
it('chunk - no metadata', async () => {
|
|
197
|
+
const doc = new MastraDocument({ text: sampleMarkdown });
|
|
198
|
+
|
|
199
|
+
const nodes = await doc.chunk({
|
|
200
|
+
strategy: {
|
|
201
|
+
chunkSize: 100,
|
|
202
|
+
chunkOverlap: 0,
|
|
203
|
+
separator: `\n`,
|
|
204
|
+
paragraphSeparator: `\n`,
|
|
205
|
+
secondaryChunkingRegex: `/(\n)/g`,
|
|
206
|
+
},
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
expect(nodes.length).toBe(10);
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
it('chunk - metadata title', async () => {
|
|
213
|
+
const doc = new MastraDocument({ text: sampleMarkdown });
|
|
214
|
+
|
|
215
|
+
const nodes = await doc.chunk({
|
|
216
|
+
parseMarkdown: true,
|
|
217
|
+
strategy: {
|
|
218
|
+
chunkSize: 100,
|
|
219
|
+
chunkOverlap: 0,
|
|
220
|
+
separator: `\n`,
|
|
221
|
+
paragraphSeparator: `\n`,
|
|
222
|
+
secondaryChunkingRegex: `/(\n)/g`,
|
|
223
|
+
},
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
expect(nodes[0]?.toJSON().text).toBe(`Complete Guide to Modern Web Development`);
|
|
227
|
+
expect(nodes.length).toBe(27);
|
|
228
|
+
}, 500000);
|
|
229
|
+
});
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import {
|
|
2
|
+
MarkdownNodeParser,
|
|
3
|
+
Document,
|
|
4
|
+
SummaryExtractor,
|
|
5
|
+
IngestionPipeline,
|
|
6
|
+
LLM,
|
|
7
|
+
TitleCombinePrompt,
|
|
8
|
+
TitleExtractor,
|
|
9
|
+
TitleExtractorPrompt,
|
|
10
|
+
SummaryPrompt,
|
|
11
|
+
QuestionsAnsweredExtractor,
|
|
12
|
+
QuestionExtractPrompt,
|
|
13
|
+
KeywordExtractor,
|
|
14
|
+
KeywordExtractPrompt,
|
|
15
|
+
SentenceSplitter,
|
|
16
|
+
} from 'llamaindex';
|
|
17
|
+
|
|
18
|
+
interface Tokenizer {
|
|
19
|
+
encode: (text: string) => Uint32Array;
|
|
20
|
+
decode: (tokens: Uint32Array) => string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
interface DocumentInitializer {
|
|
24
|
+
text: string;
|
|
25
|
+
metadata?: Record<string, any>;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
type TitleExtractorsArgs = {
|
|
29
|
+
llm?: LLM;
|
|
30
|
+
nodes?: number;
|
|
31
|
+
nodeTemplate?: TitleExtractorPrompt['template'];
|
|
32
|
+
combineTemplate?: TitleCombinePrompt['template'];
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
type SummaryExtractArgs = {
|
|
36
|
+
llm?: LLM;
|
|
37
|
+
summaries?: string[];
|
|
38
|
+
promptTemplate?: SummaryPrompt['template'];
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
type QuestionAnswerExtractArgs = {
|
|
42
|
+
llm?: LLM;
|
|
43
|
+
questions?: number;
|
|
44
|
+
promptTemplate?: QuestionExtractPrompt['template'];
|
|
45
|
+
embeddingOnly?: boolean;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
type KeywordExtractArgs = {
|
|
49
|
+
llm?: LLM;
|
|
50
|
+
keywords?: number;
|
|
51
|
+
promptTemplate?: KeywordExtractPrompt['template'];
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
type SplitterParams = {
|
|
55
|
+
tokenizer?: Tokenizer;
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
interface BaseSentenceParams {
|
|
59
|
+
chunkSize?: number;
|
|
60
|
+
chunkOverlap?: number;
|
|
61
|
+
separator?: string;
|
|
62
|
+
paragraphSeparator?: string;
|
|
63
|
+
secondaryChunkingRegex?: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
type SentenceParam = BaseSentenceParams & SplitterParams;
|
|
67
|
+
|
|
68
|
+
export class MastraDocument {
|
|
69
|
+
documents: Document[];
|
|
70
|
+
constructor(config: DocumentInitializer | DocumentInitializer[]) {
|
|
71
|
+
if (Array.isArray(config)) {
|
|
72
|
+
this.documents = config.map(({ text, metadata }) => new Document({ text, metadata }));
|
|
73
|
+
} else {
|
|
74
|
+
this.documents = [new Document(config)];
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async chunk({
|
|
79
|
+
strategy,
|
|
80
|
+
metadataExtraction = {},
|
|
81
|
+
parseMarkdown,
|
|
82
|
+
}: {
|
|
83
|
+
parseMarkdown?: boolean;
|
|
84
|
+
strategy: SentenceParam;
|
|
85
|
+
metadataExtraction?: {
|
|
86
|
+
title?: TitleExtractorsArgs | boolean;
|
|
87
|
+
summary?: SummaryExtractArgs | boolean;
|
|
88
|
+
questionsAnswered?: QuestionAnswerExtractArgs | boolean;
|
|
89
|
+
keyword?: KeywordExtractArgs | boolean;
|
|
90
|
+
};
|
|
91
|
+
}) {
|
|
92
|
+
const { title, summary, questionsAnswered, keyword } = metadataExtraction;
|
|
93
|
+
const transformations = [];
|
|
94
|
+
|
|
95
|
+
if (parseMarkdown) {
|
|
96
|
+
transformations.push(new MarkdownNodeParser());
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
transformations.push(new SentenceSplitter(strategy));
|
|
100
|
+
|
|
101
|
+
if (typeof title !== 'undefined') {
|
|
102
|
+
transformations.push(new TitleExtractor(typeof title === 'boolean' ? {} : title));
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (typeof summary !== 'undefined') {
|
|
106
|
+
transformations.push(new SummaryExtractor(typeof summary === 'boolean' ? {} : summary));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (typeof questionsAnswered !== 'undefined') {
|
|
110
|
+
transformations.push(
|
|
111
|
+
new QuestionsAnsweredExtractor(typeof questionsAnswered === 'boolean' ? {} : questionsAnswered),
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (typeof keyword !== 'undefined') {
|
|
116
|
+
transformations.push(new KeywordExtractor(typeof keyword === 'boolean' ? {} : keyword));
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const pipeline = new IngestionPipeline({
|
|
120
|
+
transformations,
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
const nodes = await pipeline.run({
|
|
124
|
+
documents: this.documents,
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
return nodes;
|
|
128
|
+
}
|
|
129
|
+
}
|
package/src/index.ts
ADDED