@mastra/rag 0.0.2-alpha.13 → 0.0.2-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 0.0.2-alpha.15
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [c2dd6b5]
8
+ - @mastra/core@0.1.27-alpha.31
9
+
10
+ ## 0.0.2-alpha.14
11
+
12
+ ### Patch Changes
13
+
14
+ - 1c3232a: ChromaDB
15
+
3
16
  ## 0.0.2-alpha.13
4
17
 
5
18
  ### Patch Changes
@@ -0,0 +1,20 @@
1
+ import { MastraVector, QueryResult, IndexStats } from '@mastra/core';
2
+ export declare class ChromaVector extends MastraVector {
3
+ private client;
4
+ private collections;
5
+ constructor({ path, auth, }: {
6
+ path: string;
7
+ auth?: {
8
+ provider: string;
9
+ credentials: string;
10
+ };
11
+ });
12
+ private getCollection;
13
+ private validateVectorDimensions;
14
+ upsert(indexName: string, vectors: number[][], metadata?: Record<string, any>[], ids?: string[]): Promise<string[]>;
15
+ createIndex(indexName: string, dimension: number, metric?: 'cosine' | 'euclidean' | 'dotproduct'): Promise<void>;
16
+ query(indexName: string, queryVector: number[], topK?: number, filter?: Record<string, any>): Promise<QueryResult[]>;
17
+ listIndexes(): Promise<string[]>;
18
+ describeIndex(indexName: string): Promise<IndexStats>;
19
+ deleteIndex(indexName: string): Promise<void>;
20
+ }
@@ -14,5 +14,9 @@ services:
14
14
  image: qdrant/qdrant
15
15
  ports:
16
16
  - 6333:6333
17
+ chroma:
18
+ image: chromadb/chroma
19
+ ports:
20
+ - 8000:8000
17
21
  volumes:
18
22
  pgdata:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/rag",
3
- "version": "0.0.2-alpha.13",
3
+ "version": "0.0.2-alpha.15",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -32,13 +32,15 @@
32
32
  "@pinecone-database/pinecone": "^3.0.3",
33
33
  "@qdrant/js-client-rest": "^1.12.0",
34
34
  "@upstash/vector": "^1.1.7",
35
+ "chromadb": "^1.9.4",
36
+ "chromadb-default-embed": "^2.13.2",
35
37
  "date-fns": "^4.1.0",
36
38
  "dotenv": "^16.3.1",
37
39
  "jsdom": "^25.0.1",
38
40
  "llamaindex": "^0.8.15",
39
41
  "pg": "^8.13.1",
40
42
  "postgres": "^3.4.5",
41
- "@mastra/core": "0.1.27-alpha.30"
43
+ "@mastra/core": "0.1.27-alpha.31"
42
44
  },
43
45
  "devDependencies": {
44
46
  "@babel/preset-env": "^7.26.0",
@@ -0,0 +1,154 @@
1
+ import { QueryResult, IndexStats } from '@mastra/core';
2
+
3
+ import { ChromaVector } from './';
4
+
5
+ describe('ChromaVector Integration Tests', () => {
6
+ let vectorDB: ChromaVector;
7
+ const testIndexName = 'test-index';
8
+ const dimension = 3;
9
+
10
+ beforeEach(async () => {
11
+ vectorDB = new ChromaVector({
12
+ path: 'http://localhost:8000',
13
+ });
14
+ // Clean up any existing test index
15
+ try {
16
+ await vectorDB.deleteIndex(testIndexName);
17
+ } catch (error) {
18
+ // Ignore errors if index doesn't exist
19
+ }
20
+ await vectorDB.createIndex(testIndexName, dimension);
21
+ }, 5000);
22
+
23
+ afterEach(async () => {
24
+ // Cleanup after tests
25
+ try {
26
+ await vectorDB.deleteIndex(testIndexName);
27
+ } catch (error) {
28
+ // Ignore cleanup errors
29
+ }
30
+ }, 5000);
31
+
32
+ describe('Index Management', () => {
33
+ test('should create and list indexes', async () => {
34
+ const indexes = await vectorDB.listIndexes();
35
+ expect(indexes).toContain(testIndexName);
36
+ });
37
+
38
+ test('should describe index correctly', async () => {
39
+ const stats: IndexStats = await vectorDB.describeIndex(testIndexName);
40
+ expect(stats.dimension).toBe(dimension);
41
+ expect(stats.count).toBe(0);
42
+ expect(stats.metric).toBe('cosine');
43
+ });
44
+
45
+ test('should delete index', async () => {
46
+ await vectorDB.deleteIndex(testIndexName);
47
+ const indexes = await vectorDB.listIndexes();
48
+ expect(indexes).not.toContain(testIndexName);
49
+ });
50
+
51
+ test('should create index with different metrics', async () => {
52
+ const metricsToTest: Array<'cosine' | 'euclidean' | 'dotproduct'> = ['euclidean', 'dotproduct'];
53
+
54
+ for (const metric of metricsToTest) {
55
+ const testIndex = `test-index-${metric}`;
56
+ await vectorDB.createIndex(testIndex, dimension, metric);
57
+
58
+ const stats = await vectorDB.describeIndex(testIndex);
59
+ expect(stats.metric).toBe(metric);
60
+
61
+ await vectorDB.deleteIndex(testIndex);
62
+ }
63
+ });
64
+ });
65
+
66
+ describe('Vector Operations', () => {
67
+ const testVectors = [
68
+ [1.0, 0.0, 0.0],
69
+ [0.0, 1.0, 0.0],
70
+ [0.0, 0.0, 1.0],
71
+ ];
72
+ const testMetadata = [{ label: 'x-axis' }, { label: 'y-axis' }, { label: 'z-axis' }];
73
+ const testIds = ['vec1', 'vec2', 'vec3'];
74
+
75
+ test('should upsert vectors with generated ids', async () => {
76
+ const ids = await vectorDB.upsert(testIndexName, testVectors);
77
+ expect(ids).toHaveLength(testVectors.length);
78
+ ids.forEach(id => expect(typeof id).toBe('string'));
79
+
80
+ const stats = await vectorDB.describeIndex(testIndexName);
81
+ expect(stats.count).toBe(testVectors.length);
82
+ });
83
+
84
+ test('should upsert vectors with provided ids and metadata', async () => {
85
+ await vectorDB.upsert(testIndexName, testVectors, testMetadata, testIds);
86
+
87
+ const stats = await vectorDB.describeIndex(testIndexName);
88
+ expect(stats.count).toBe(testVectors.length);
89
+
90
+ // Query each vector to verify metadata
91
+ for (let i = 0; i < testVectors.length; i++) {
92
+ const results = await vectorDB.query(testIndexName, testVectors?.[i]!, 1);
93
+ expect(results?.[0]?.id).toBe(testIds[i]);
94
+ expect(results?.[0]?.metadata).toEqual(testMetadata[i]);
95
+ }
96
+ });
97
+
98
+ test('should perform vector search with topK', async () => {
99
+ await vectorDB.upsert(testIndexName, testVectors, testMetadata, testIds);
100
+
101
+ const queryVector = [1.0, 0.1, 0.1];
102
+ const topK = 2;
103
+
104
+ const results: QueryResult[] = await vectorDB.query(testIndexName, queryVector, topK);
105
+
106
+ expect(results).toHaveLength(topK);
107
+ expect(results?.[0]?.id).toBe(testIds[0]); // Should match x-axis vector most closely
108
+ });
109
+
110
+ test('should filter query results', async () => {
111
+ await vectorDB.upsert(testIndexName, testVectors, testMetadata, testIds);
112
+
113
+ const queryVector = [1.0, 1.0, 1.0];
114
+ const filter = { label: 'x-axis' };
115
+
116
+ const results = await vectorDB.query(testIndexName, queryVector, 3, filter);
117
+
118
+ expect(results).toHaveLength(1);
119
+ expect(results?.[0]?.metadata?.label).toBe('x-axis');
120
+ });
121
+
122
+ test('should update existing vectors', async () => {
123
+ // Initial upsert
124
+ await vectorDB.upsert(testIndexName, testVectors, testMetadata, testIds);
125
+
126
+ // Update first vector
127
+ const updatedVector = [[0.5, 0.5, 0.0]];
128
+ const updatedMetadata = [{ label: 'updated-x-axis' }];
129
+ await vectorDB.upsert(testIndexName, updatedVector, updatedMetadata, [testIds?.[0]!]);
130
+
131
+ // Verify update
132
+ const results = await vectorDB.query(testIndexName, updatedVector?.[0]!, 1);
133
+ expect(results?.[0]?.id).toBe(testIds[0]);
134
+ expect(results?.[0]?.metadata).toEqual(updatedMetadata[0]);
135
+ });
136
+ });
137
+
138
+ describe('Error Handling', () => {
139
+ test('should handle non-existent index queries', async () => {
140
+ await expect(vectorDB.query('non-existent-index-yu', [1, 2, 3])).rejects.toThrow();
141
+ });
142
+
143
+ test('should handle invalid dimension vectors', async () => {
144
+ const invalidVector = [1, 2, 3, 4]; // 4D vector for 3D index
145
+ await expect(vectorDB.upsert(testIndexName, [invalidVector])).rejects.toThrow();
146
+ });
147
+
148
+ test('should handle mismatched metadata and vectors length', async () => {
149
+ const vectors = [[1, 2, 3]];
150
+ const metadata = [{}, {}]; // More metadata than vectors
151
+ await expect(vectorDB.upsert(testIndexName, vectors, metadata)).rejects.toThrow();
152
+ });
153
+ });
154
+ });
@@ -0,0 +1,135 @@
1
+ import { MastraVector, QueryResult, IndexStats } from '@mastra/core';
2
+ import { ChromaClient } from 'chromadb';
3
+
4
+ export class ChromaVector extends MastraVector {
5
+ private client: ChromaClient;
6
+ private collections: Map<string, any>;
7
+
8
+ constructor({
9
+ path,
10
+ auth,
11
+ }: {
12
+ path: string;
13
+ auth?: {
14
+ provider: string;
15
+ credentials: string;
16
+ };
17
+ }) {
18
+ super();
19
+ this.client = new ChromaClient({
20
+ path,
21
+ auth,
22
+ });
23
+ this.collections = new Map();
24
+ }
25
+
26
+ private async getCollection(indexName: string, throwIfNotExists: boolean = true) {
27
+ try {
28
+ const collection = await this.client.getCollection({ name: indexName, embeddingFunction: undefined as any });
29
+ this.collections.set(indexName, collection);
30
+ } catch (error) {
31
+ if (throwIfNotExists) {
32
+ throw new Error(`Index ${indexName} does not exist`);
33
+ }
34
+ return null;
35
+ }
36
+ return this.collections.get(indexName);
37
+ }
38
+
39
+ private validateVectorDimensions(vectors: number[][], dimension: number): void {
40
+ for (let i = 0; i < vectors.length; i++) {
41
+ if (vectors?.[i]?.length !== dimension) {
42
+ throw new Error(
43
+ `Vector at index ${i} has invalid dimension ${vectors?.[i]?.length}. Expected ${dimension} dimensions.`,
44
+ );
45
+ }
46
+ }
47
+ }
48
+
49
+ async upsert(
50
+ indexName: string,
51
+ vectors: number[][],
52
+ metadata?: Record<string, any>[],
53
+ ids?: string[],
54
+ ): Promise<string[]> {
55
+ const collection = await this.getCollection(indexName);
56
+
57
+ // Get index stats to check dimension
58
+ const stats = await this.describeIndex(indexName);
59
+
60
+ // Validate vector dimensions
61
+ this.validateVectorDimensions(vectors, stats.dimension);
62
+
63
+ // Generate IDs if not provided
64
+ const generatedIds = ids || vectors.map(() => crypto.randomUUID());
65
+
66
+ // Ensure metadata exists for each vector
67
+ const normalizedMetadata = metadata || vectors.map(() => ({}));
68
+
69
+ await collection.upsert({
70
+ ids: generatedIds,
71
+ embeddings: vectors,
72
+ metadatas: normalizedMetadata,
73
+ });
74
+
75
+ return generatedIds;
76
+ }
77
+
78
+ async createIndex(
79
+ indexName: string,
80
+ dimension: number,
81
+ metric: 'cosine' | 'euclidean' | 'dotproduct' = 'cosine',
82
+ ): Promise<void> {
83
+ await this.client.createCollection({
84
+ name: indexName,
85
+ metadata: {
86
+ dimension,
87
+ metric,
88
+ },
89
+ });
90
+ }
91
+
92
+ async query(
93
+ indexName: string,
94
+ queryVector: number[],
95
+ topK: number = 10,
96
+ filter?: Record<string, any>,
97
+ ): Promise<QueryResult[]> {
98
+ const collection = await this.getCollection(indexName, true);
99
+
100
+ const results = await collection.query({
101
+ queryEmbeddings: [queryVector],
102
+ nResults: topK,
103
+ where: filter,
104
+ });
105
+
106
+ // Transform ChromaDB results to QueryResult format
107
+ return (results.ids[0] || []).map((id: string, index: number) => ({
108
+ id,
109
+ score: results.distances?.[0]?.[index] || 0,
110
+ metadata: results.metadatas?.[0]?.[index] || {},
111
+ }));
112
+ }
113
+
114
+ async listIndexes(): Promise<string[]> {
115
+ const collections = await this.client.listCollections();
116
+ return collections.map(collection => collection.name);
117
+ }
118
+
119
+ async describeIndex(indexName: string): Promise<IndexStats> {
120
+ const collection = await this.getCollection(indexName);
121
+ const count = await collection.count();
122
+ const metadata = collection.metadata;
123
+
124
+ return {
125
+ dimension: metadata?.dimension || 0,
126
+ count,
127
+ metric: metadata?.metric as 'cosine' | 'euclidean' | 'dotproduct',
128
+ };
129
+ }
130
+
131
+ async deleteIndex(indexName: string): Promise<void> {
132
+ await this.client.deleteCollection({ name: indexName });
133
+ this.collections.delete(indexName);
134
+ }
135
+ }
@@ -10,169 +10,6 @@ Welcome to our comprehensive guide on modern web development. This resource cove
10
10
  - Beginning developers looking to establish a solid foundation
11
11
  - Intermediate developers wanting to modernize their skillset
12
12
  - Senior developers seeking a refresher on current best practices
13
-
14
- ## Core Concepts
15
-
16
- ### 1. Frontend Development
17
- Modern frontend development has evolved significantly. Here are the key areas to focus on:
18
-
19
- #### HTML5 Semantic Elements
20
- Using semantic HTML improves:
21
- - Accessibility
22
- - SEO performance
23
- - Code readability
24
- - Maintenance
25
-
26
- \`\`\`html
27
- <header>
28
- <nav>
29
- <ul>
30
- <li><a href="#home">Home</a></li>
31
- <li><a href="#about">About</a></li>
32
- </ul>
33
- </nav>
34
- </header>
35
- \`\`\`
36
-
37
- #### CSS Best Practices
38
- 1. Use CSS Custom Properties
39
- 2. Implement responsive design
40
- 3. Follow BEM methodology
41
- 4. Optimize performance
42
-
43
- ### 2. JavaScript Fundamentals
44
-
45
- JavaScript is the backbone of web development. Here's what you need to know:
46
-
47
- \`\`\`javascript
48
- // Modern JS features
49
- const exampleFunction = async () => {
50
- try {
51
- const response = await fetch('https://api.example.com/data');
52
- const data = await response.json();
53
- return data;
54
- } catch (error) {
55
- console.error('Error fetching data:', error);
56
- }
57
- };
58
- \`\`\`
59
-
60
- #### Key Concepts:
61
- - Promises and async/await
62
- - ES6+ features
63
- - TypeScript integration
64
- - Module systems
65
-
66
- ### 3. Backend Development
67
-
68
- Backend development requires understanding:
69
-
70
- 1. **Server Architecture**
71
- - RESTful APIs
72
- - GraphQL
73
- - Microservices
74
-
75
- 2. **Database Management**
76
- - SQL vs NoSQL
77
- - Query optimization
78
- - Data modeling
79
-
80
- 3. **Security Considerations**
81
- - Authentication
82
- - Authorization
83
- - Data encryption
84
-
85
- ## Tools and Technologies
86
-
87
- ### Essential Developer Tools
88
-
89
- | Category | Tools |
90
- |----------|-------|
91
- | Version Control | Git, GitHub |
92
- | Package Managers | npm, yarn |
93
- | Bundlers | webpack, Vite |
94
- | Testing | Jest, Cypress |
95
-
96
- ### Framework Selection
97
-
98
- #### Frontend Frameworks
99
- 1. React
100
- - Component-based architecture
101
- - Virtual DOM
102
- - Large ecosystem
103
-
104
- 2. Vue
105
- - Progressive framework
106
- - Easy learning curve
107
- - Great documentation
108
-
109
- 3. Angular
110
- - Full-featured framework
111
- - TypeScript integration
112
- - Enterprise-ready
113
-
114
- #### Backend Frameworks
115
- - Node.js/Express
116
- - Django
117
- - Ruby on Rails
118
- - Spring Boot
119
-
120
- ## Best Practices
121
-
122
- ### Code Quality
123
- - Write clean, maintainable code
124
- - Follow SOLID principles
125
- - Implement proper error handling
126
- - Use consistent formatting
127
-
128
- ### Performance Optimization
129
- 1. Minimize HTTP requests
130
- 2. Optimize images
131
- 3. Implement caching
132
- 4. Use lazy loading
133
-
134
- ### Security Measures
135
- - Implement HTTPS
136
- - Sanitize user input
137
- - Use secure dependencies
138
- - Regular security audits
139
-
140
- ## Deployment and DevOps
141
-
142
- ### Continuous Integration/Continuous Deployment (CI/CD)
143
- 1. Automated testing
144
- 2. Build automation
145
- 3. Deployment automation
146
- 4. Monitoring and logging
147
-
148
- ### Cloud Services
149
- - AWS
150
- - Google Cloud Platform
151
- - Azure
152
- - Heroku
153
-
154
- ## Conclusion
155
-
156
- Remember that web development is an ever-evolving field. Stay current with:
157
- - Industry trends
158
- - New tools and frameworks
159
- - Security best practices
160
- - Performance optimization techniques
161
-
162
- ### Additional Resources
163
-
164
- * [MDN Web Docs](https://developer.mozilla.org)
165
- * [Web.dev](https://web.dev)
166
- * [CSS-Tricks](https://css-tricks.com)
167
- * [JavaScript.info](https://javascript.info)
168
-
169
- ---
170
-
171
- > "Any application that can be written in JavaScript, will eventually be written in JavaScript." - Jeff Atwood
172
-
173
- ---
174
-
175
- **Note**: This guide is regularly updated to reflect current web development practices and standards.
176
13
  `;
177
14
 
178
15
  describe('MastraDocument', () => {
@@ -195,19 +32,17 @@ describe('MastraDocument', () => {
195
32
 
196
33
  await doc.chunk({
197
34
  extract: {
198
- summary: true,
199
- title: true,
35
+ keywords: true,
200
36
  },
201
37
  options: {
202
- chunkSize: 100,
38
+ chunkSize: 1500,
203
39
  chunkOverlap: 0,
204
40
  separator: `\n`,
205
41
  },
206
42
  });
207
43
 
208
- expect(doc.getDocs()?.[0]?.text).toBe(`Complete Guide to Modern Web Development`);
209
- expect(doc.getDocs().length).toBe(56);
210
- }, 500000);
44
+ expect(doc.getMetadata()?.[0]).toBeTruthy();
45
+ }, 15000);
211
46
  });
212
47
 
213
48
  describe('chunkCharacter', () => {
@@ -672,7 +507,6 @@ describe('MastraDocument', () => {
672
507
  const c = JSON.parse(chunk);
673
508
  const retVal: Record<string, string> = {};
674
509
  Object.entries(c).forEach(([key, value]) => {
675
- console.log(key, value);
676
510
  retVal[key] = JSON.parse(`"${value as string}"`);
677
511
  });
678
512