@mastra/mongodb 0.13.5 → 0.13.6-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,656 +0,0 @@
1
- import { MastraError, ErrorDomain, ErrorCategory } from '@mastra/core/error';
2
- import { MastraVector } from '@mastra/core/vector';
3
- import type {
4
- QueryResult,
5
- IndexStats,
6
- CreateIndexParams,
7
- UpsertVectorParams,
8
- QueryVectorParams,
9
- DescribeIndexParams,
10
- DeleteIndexParams,
11
- DeleteVectorParams,
12
- UpdateVectorParams,
13
- } from '@mastra/core/vector';
14
- import { MongoClient } from 'mongodb';
15
- import type { MongoClientOptions, Document, Db, Collection } from 'mongodb';
16
- import { v4 as uuidv4 } from 'uuid';
17
-
18
- import { MongoDBFilterTranslator } from './filter';
19
- import type { MongoDBVectorFilter } from './filter';
20
-
21
- // Define necessary types and interfaces
22
- export interface MongoDBUpsertVectorParams extends UpsertVectorParams {
23
- documents?: string[];
24
- }
25
-
26
- interface MongoDBQueryVectorParams extends QueryVectorParams<MongoDBVectorFilter> {
27
- documentFilter?: MongoDBVectorFilter;
28
- }
29
-
30
- export interface MongoDBIndexReadyParams {
31
- indexName: string;
32
- timeoutMs?: number;
33
- checkIntervalMs?: number;
34
- }
35
-
36
- // Define the document interface
37
- interface MongoDBDocument extends Document {
38
- _id: string; // Explicitly declare '_id' as string
39
- embedding?: number[];
40
- metadata?: Record<string, any>;
41
- document?: string;
42
- [key: string]: any; // Index signature for additional properties
43
- }
44
- // The MongoDBVector class
45
- export class MongoDBVector extends MastraVector<MongoDBVectorFilter> {
46
- private client: MongoClient;
47
- private db: Db;
48
- private collections: Map<string, Collection<MongoDBDocument>>;
49
- private readonly embeddingFieldName = 'embedding';
50
- private readonly metadataFieldName = 'metadata';
51
- private readonly documentFieldName = 'document';
52
- private collectionForValidation: Collection<MongoDBDocument> | null = null;
53
- private mongoMetricMap: { [key: string]: string } = {
54
- cosine: 'cosine',
55
- euclidean: 'euclidean',
56
- dotproduct: 'dotProduct',
57
- };
58
-
59
- constructor({ uri, dbName, options }: { uri: string; dbName: string; options?: MongoClientOptions }) {
60
- super();
61
- this.client = new MongoClient(uri, options);
62
- this.db = this.client.db(dbName);
63
- this.collections = new Map();
64
- }
65
-
66
- // Public methods
67
- async connect(): Promise<void> {
68
- try {
69
- await this.client.connect();
70
- } catch (error) {
71
- throw new MastraError(
72
- {
73
- id: 'STORAGE_MONGODB_VECTOR_CONNECT_FAILED',
74
- domain: ErrorDomain.STORAGE,
75
- category: ErrorCategory.THIRD_PARTY,
76
- },
77
- error,
78
- );
79
- }
80
- }
81
-
82
- async disconnect(): Promise<void> {
83
- try {
84
- await this.client.close();
85
- } catch (error) {
86
- throw new MastraError(
87
- {
88
- id: 'STORAGE_MONGODB_VECTOR_DISCONNECT_FAILED',
89
- domain: ErrorDomain.STORAGE,
90
- category: ErrorCategory.THIRD_PARTY,
91
- },
92
- error,
93
- );
94
- }
95
- }
96
-
97
- async createIndex({ indexName, dimension, metric = 'cosine' }: CreateIndexParams): Promise<void> {
98
- let mongoMetric;
99
- try {
100
- if (!Number.isInteger(dimension) || dimension <= 0) {
101
- throw new Error('Dimension must be a positive integer');
102
- }
103
-
104
- mongoMetric = this.mongoMetricMap[metric];
105
- if (!mongoMetric) {
106
- throw new Error(`Invalid metric: "${metric}". Must be one of: cosine, euclidean, dotproduct`);
107
- }
108
- } catch (error) {
109
- throw new MastraError(
110
- {
111
- id: 'STORAGE_MONGODB_VECTOR_CREATE_INDEX_INVALID_ARGS',
112
- domain: ErrorDomain.STORAGE,
113
- category: ErrorCategory.USER,
114
- details: {
115
- indexName,
116
- dimension,
117
- metric,
118
- },
119
- },
120
- error,
121
- );
122
- }
123
-
124
- let collection;
125
- try {
126
- // Check if collection exists
127
- const collectionExists = await this.db.listCollections({ name: indexName }).hasNext();
128
- if (!collectionExists) {
129
- await this.db.createCollection(indexName);
130
- }
131
- collection = await this.getCollection(indexName);
132
-
133
- const indexNameInternal = `${indexName}_vector_index`;
134
-
135
- const embeddingField = this.embeddingFieldName;
136
- const numDimensions = dimension;
137
-
138
- // Create search indexes
139
- await collection.createSearchIndex({
140
- definition: {
141
- fields: [
142
- {
143
- type: 'vector',
144
- path: embeddingField,
145
- numDimensions: numDimensions,
146
- similarity: mongoMetric,
147
- },
148
- {
149
- type: 'filter',
150
- path: '_id',
151
- },
152
- ],
153
- },
154
- name: indexNameInternal,
155
- type: 'vectorSearch',
156
- });
157
- await collection.createSearchIndex({
158
- definition: {
159
- mappings: {
160
- dynamic: true,
161
- },
162
- },
163
- name: `${indexName}_search_index`,
164
- type: 'search',
165
- });
166
- } catch (error: any) {
167
- if (error.codeName !== 'IndexAlreadyExists') {
168
- throw new MastraError(
169
- {
170
- id: 'STORAGE_MONGODB_VECTOR_CREATE_INDEX_FAILED',
171
- domain: ErrorDomain.STORAGE,
172
- category: ErrorCategory.THIRD_PARTY,
173
- },
174
- error,
175
- );
176
- }
177
- }
178
-
179
- try {
180
- // Store the dimension and metric in a special metadata document
181
- await collection?.updateOne({ _id: '__index_metadata__' }, { $set: { dimension, metric } }, { upsert: true });
182
- } catch (error) {
183
- throw new MastraError(
184
- {
185
- id: 'STORAGE_MONGODB_VECTOR_CREATE_INDEX_FAILED_STORE_METADATA',
186
- domain: ErrorDomain.STORAGE,
187
- category: ErrorCategory.THIRD_PARTY,
188
- details: {
189
- indexName,
190
- },
191
- },
192
- error,
193
- );
194
- }
195
- }
196
-
197
- /**
198
- * Waits for the index to be ready.
199
- *
200
- * @param {string} indexName - The name of the index to wait for
201
- * @param {number} timeoutMs - The maximum time in milliseconds to wait for the index to be ready (default: 60000)
202
- * @param {number} checkIntervalMs - The interval in milliseconds at which to check if the index is ready (default: 2000)
203
- * @returns A promise that resolves when the index is ready
204
- */
205
- async waitForIndexReady({
206
- indexName,
207
- timeoutMs = 60000,
208
- checkIntervalMs = 2000,
209
- }: MongoDBIndexReadyParams): Promise<void> {
210
- const collection = await this.getCollection(indexName, true);
211
- const indexNameInternal = `${indexName}_vector_index`;
212
-
213
- const startTime = Date.now();
214
- while (Date.now() - startTime < timeoutMs) {
215
- const indexInfo: any[] = await (collection as any).listSearchIndexes().toArray();
216
- const indexData = indexInfo.find((idx: any) => idx.name === indexNameInternal);
217
- const status = indexData?.status;
218
- if (status === 'READY') {
219
- return;
220
- }
221
- await new Promise(resolve => setTimeout(resolve, checkIntervalMs));
222
- }
223
- throw new Error(`Index "${indexNameInternal}" did not become ready within timeout`);
224
- }
225
-
226
- async upsert({ indexName, vectors, metadata, ids, documents }: MongoDBUpsertVectorParams): Promise<string[]> {
227
- try {
228
- const collection = await this.getCollection(indexName);
229
-
230
- this.collectionForValidation = collection;
231
-
232
- // Get index stats to check dimension
233
- const stats = await this.describeIndex({ indexName });
234
-
235
- // Validate vector dimensions
236
- await this.validateVectorDimensions(vectors, stats.dimension);
237
-
238
- // Generate IDs if not provided
239
- const generatedIds = ids || vectors.map(() => uuidv4());
240
-
241
- const operations = vectors.map((vector, idx) => {
242
- const id = generatedIds[idx];
243
- const meta = metadata?.[idx] || {};
244
- const doc = documents?.[idx];
245
-
246
- // Normalize metadata - convert Date objects to ISO strings
247
- const normalizedMeta = Object.keys(meta).reduce(
248
- (acc, key) => {
249
- acc[key] = meta[key] instanceof Date ? meta[key].toISOString() : meta[key];
250
- return acc;
251
- },
252
- {} as Record<string, any>,
253
- );
254
-
255
- const updateDoc: Partial<MongoDBDocument> = {
256
- [this.embeddingFieldName]: vector,
257
- [this.metadataFieldName]: normalizedMeta,
258
- };
259
- if (doc !== undefined) {
260
- updateDoc[this.documentFieldName] = doc;
261
- }
262
-
263
- return {
264
- updateOne: {
265
- filter: { _id: id }, // '_id' is a string as per MongoDBDocument interface
266
- update: { $set: updateDoc },
267
- upsert: true,
268
- },
269
- };
270
- });
271
-
272
- await collection.bulkWrite(operations);
273
-
274
- return generatedIds;
275
- } catch (error) {
276
- throw new MastraError(
277
- {
278
- id: 'STORAGE_MONGODB_VECTOR_UPSERT_FAILED',
279
- domain: ErrorDomain.STORAGE,
280
- category: ErrorCategory.THIRD_PARTY,
281
- details: {
282
- indexName,
283
- },
284
- },
285
- error,
286
- );
287
- }
288
- }
289
- async query({
290
- indexName,
291
- queryVector,
292
- topK = 10,
293
- filter,
294
- includeVector = false,
295
- documentFilter,
296
- }: MongoDBQueryVectorParams): Promise<QueryResult[]> {
297
- try {
298
- const collection = await this.getCollection(indexName, true);
299
- const indexNameInternal = `${indexName}_vector_index`;
300
-
301
- // Transform the filters using MongoDBFilterTranslator
302
- const mongoFilter = this.transformFilter(filter);
303
- const documentMongoFilter = documentFilter ? { [this.documentFieldName]: documentFilter } : {};
304
-
305
- // Transform metadata field filters to use dot notation
306
- const transformedMongoFilter = this.transformMetadataFilter(mongoFilter);
307
-
308
- // Combine the filters
309
- let combinedFilter: any = {};
310
- if (Object.keys(transformedMongoFilter).length > 0 && Object.keys(documentMongoFilter).length > 0) {
311
- combinedFilter = { $and: [transformedMongoFilter, documentMongoFilter] };
312
- } else if (Object.keys(transformedMongoFilter).length > 0) {
313
- combinedFilter = transformedMongoFilter;
314
- } else if (Object.keys(documentMongoFilter).length > 0) {
315
- combinedFilter = documentMongoFilter;
316
- }
317
-
318
- const vectorSearch: Document = {
319
- index: indexNameInternal,
320
- queryVector: queryVector,
321
- path: this.embeddingFieldName,
322
- numCandidates: 100,
323
- limit: topK,
324
- };
325
-
326
- if (Object.keys(combinedFilter).length > 0) {
327
- // pre-filter for candidate document IDs
328
- const candidateIds = await collection
329
- .aggregate([{ $match: combinedFilter }, { $project: { _id: 1 } }])
330
- .map(doc => doc._id)
331
- .toArray();
332
-
333
- if (candidateIds.length > 0) {
334
- vectorSearch.filter = { _id: { $in: candidateIds } };
335
- } else {
336
- // No documents match the filter, return empty results
337
- return [];
338
- }
339
- }
340
-
341
- // Build the aggregation pipeline
342
- const pipeline = [
343
- {
344
- $vectorSearch: vectorSearch,
345
- },
346
- {
347
- $set: { score: { $meta: 'vectorSearchScore' } },
348
- },
349
- {
350
- $project: {
351
- _id: 1,
352
- score: 1,
353
- metadata: `$${this.metadataFieldName}`,
354
- document: `$${this.documentFieldName}`,
355
- ...(includeVector && { vector: `$${this.embeddingFieldName}` }),
356
- },
357
- },
358
- ];
359
-
360
- const results = await collection.aggregate(pipeline).toArray();
361
-
362
- return results.map((result: any) => ({
363
- id: result._id,
364
- score: result.score,
365
- metadata: result.metadata,
366
- vector: includeVector ? result.vector : undefined,
367
- document: result.document,
368
- }));
369
- } catch (error) {
370
- throw new MastraError(
371
- {
372
- id: 'STORAGE_MONGODB_VECTOR_QUERY_FAILED',
373
- domain: ErrorDomain.STORAGE,
374
- category: ErrorCategory.THIRD_PARTY,
375
- details: {
376
- indexName,
377
- },
378
- },
379
- error,
380
- );
381
- }
382
- }
383
-
384
- async listIndexes(): Promise<string[]> {
385
- try {
386
- const collections = await this.db.listCollections().toArray();
387
- return collections.map(col => col.name);
388
- } catch (error) {
389
- throw new MastraError(
390
- {
391
- id: 'STORAGE_MONGODB_VECTOR_LIST_INDEXES_FAILED',
392
- domain: ErrorDomain.STORAGE,
393
- category: ErrorCategory.THIRD_PARTY,
394
- },
395
- error,
396
- );
397
- }
398
- }
399
-
400
- /**
401
- * Retrieves statistics about a vector index.
402
- *
403
- * @param {string} indexName - The name of the index to describe
404
- * @returns A promise that resolves to the index statistics including dimension, count and metric
405
- */
406
- async describeIndex({ indexName }: DescribeIndexParams): Promise<IndexStats> {
407
- try {
408
- const collection = await this.getCollection(indexName, true);
409
-
410
- // Get the count of documents, excluding the metadata document
411
- const count = await collection.countDocuments({ _id: { $ne: '__index_metadata__' } });
412
-
413
- // Retrieve the dimension and metric from the metadata document
414
- const metadataDoc = await collection.findOne({ _id: '__index_metadata__' });
415
- const dimension = metadataDoc?.dimension || 0;
416
- const metric = metadataDoc?.metric || 'cosine';
417
-
418
- return {
419
- dimension,
420
- count,
421
- metric: metric as 'cosine' | 'euclidean' | 'dotproduct',
422
- };
423
- } catch (error) {
424
- throw new MastraError(
425
- {
426
- id: 'STORAGE_MONGODB_VECTOR_DESCRIBE_INDEX_FAILED',
427
- domain: ErrorDomain.STORAGE,
428
- category: ErrorCategory.THIRD_PARTY,
429
- details: {
430
- indexName,
431
- },
432
- },
433
- error,
434
- );
435
- }
436
- }
437
-
438
- async deleteIndex({ indexName }: DeleteIndexParams): Promise<void> {
439
- const collection = await this.getCollection(indexName, false); // Do not throw error if collection doesn't exist
440
- try {
441
- if (collection) {
442
- await collection.drop();
443
- this.collections.delete(indexName);
444
- } else {
445
- // Optionally, you can log or handle the case where the collection doesn't exist
446
- throw new Error(`Index (Collection) "${indexName}" does not exist`);
447
- }
448
- } catch (error) {
449
- throw new MastraError(
450
- {
451
- id: 'STORAGE_MONGODB_VECTOR_DELETE_INDEX_FAILED',
452
- domain: ErrorDomain.STORAGE,
453
- category: ErrorCategory.THIRD_PARTY,
454
- details: {
455
- indexName,
456
- },
457
- },
458
- error,
459
- );
460
- }
461
- }
462
-
463
- /**
464
- * Updates a vector by its ID with the provided vector and/or metadata.
465
- * @param indexName - The name of the index containing the vector.
466
- * @param id - The ID of the vector to update.
467
- * @param update - An object containing the vector and/or metadata to update.
468
- * @param update.vector - An optional array of numbers representing the new vector.
469
- * @param update.metadata - An optional record containing the new metadata.
470
- * @returns A promise that resolves when the update is complete.
471
- * @throws Will throw an error if no updates are provided or if the update operation fails.
472
- */
473
- async updateVector({ indexName, id, update }: UpdateVectorParams): Promise<void> {
474
- try {
475
- if (!update.vector && !update.metadata) {
476
- throw new Error('No updates provided');
477
- }
478
-
479
- const collection = await this.getCollection(indexName, true);
480
- const updateDoc: Record<string, any> = {};
481
-
482
- if (update.vector) {
483
- const stats = await this.describeIndex({ indexName });
484
- await this.validateVectorDimensions([update.vector], stats.dimension);
485
- updateDoc[this.embeddingFieldName] = update.vector;
486
- }
487
-
488
- if (update.metadata) {
489
- // Normalize metadata in updates too
490
- const normalizedMeta = Object.keys(update.metadata).reduce(
491
- (acc, key) => {
492
- acc[key] =
493
- update.metadata![key] instanceof Date ? update.metadata![key].toISOString() : update.metadata![key];
494
- return acc;
495
- },
496
- {} as Record<string, any>,
497
- );
498
-
499
- updateDoc[this.metadataFieldName] = normalizedMeta;
500
- }
501
-
502
- await collection.findOneAndUpdate({ _id: id }, { $set: updateDoc });
503
- } catch (error: any) {
504
- throw new MastraError(
505
- {
506
- id: 'STORAGE_MONGODB_VECTOR_UPDATE_VECTOR_FAILED',
507
- domain: ErrorDomain.STORAGE,
508
- category: ErrorCategory.THIRD_PARTY,
509
- details: {
510
- indexName,
511
- id,
512
- },
513
- },
514
- error,
515
- );
516
- }
517
- }
518
-
519
- /**
520
- * Deletes a vector by its ID.
521
- * @param indexName - The name of the index containing the vector.
522
- * @param id - The ID of the vector to delete.
523
- * @returns A promise that resolves when the deletion is complete.
524
- * @throws Will throw an error if the deletion operation fails.
525
- */
526
- async deleteVector({ indexName, id }: DeleteVectorParams): Promise<void> {
527
- try {
528
- const collection = await this.getCollection(indexName, true);
529
- await collection.deleteOne({ _id: id });
530
- } catch (error: any) {
531
- throw new MastraError(
532
- {
533
- id: 'STORAGE_MONGODB_VECTOR_DELETE_VECTOR_FAILED',
534
- domain: ErrorDomain.STORAGE,
535
- category: ErrorCategory.THIRD_PARTY,
536
- details: {
537
- indexName,
538
- id,
539
- },
540
- },
541
- error,
542
- );
543
- }
544
- }
545
-
546
- // Private methods
547
- private async getCollection(
548
- indexName: string,
549
- throwIfNotExists: boolean = true,
550
- ): Promise<Collection<MongoDBDocument>> {
551
- if (this.collections.has(indexName)) {
552
- return this.collections.get(indexName)!;
553
- }
554
-
555
- const collection = this.db.collection<MongoDBDocument>(indexName);
556
-
557
- // Check if collection exists
558
- const collectionExists = await this.db.listCollections({ name: indexName }).hasNext();
559
- if (!collectionExists && throwIfNotExists) {
560
- throw new Error(`Index (Collection) "${indexName}" does not exist`);
561
- }
562
-
563
- this.collections.set(indexName, collection);
564
- return collection;
565
- }
566
-
567
- private async validateVectorDimensions(vectors: number[][], dimension: number): Promise<void> {
568
- if (vectors.length === 0) {
569
- throw new Error('No vectors provided for validation');
570
- }
571
-
572
- if (dimension === 0) {
573
- // If dimension is not set, retrieve and set it from the vectors
574
- dimension = vectors[0] ? vectors[0].length : 0;
575
- await this.setIndexDimension(dimension);
576
- }
577
-
578
- for (let i = 0; i < vectors.length; i++) {
579
- let v = vectors[i]?.length;
580
- if (v !== dimension) {
581
- throw new Error(`Vector at index ${i} has invalid dimension ${v}. Expected ${dimension} dimensions.`);
582
- }
583
- }
584
- }
585
-
586
- private async setIndexDimension(dimension: number): Promise<void> {
587
- // Store the dimension in a special metadata document
588
- const collection = this.collectionForValidation!; // 'collectionForValidation' is set in 'upsert' method
589
- await collection.updateOne({ _id: '__index_metadata__' }, { $set: { dimension } }, { upsert: true });
590
- }
591
-
592
- private transformFilter(filter?: MongoDBVectorFilter) {
593
- const translator = new MongoDBFilterTranslator();
594
- if (!filter) return {};
595
- return translator.translate(filter);
596
- }
597
-
598
- /**
599
- * Transform metadata field filters to use MongoDB dot notation.
600
- * Fields that are stored in the metadata subdocument need to be prefixed with 'metadata.'
601
- * This handles filters from the Memory system which expects direct field access.
602
- *
603
- * @param filter - The filter object to transform
604
- * @returns Transformed filter with metadata fields properly prefixed
605
- */
606
- private transformMetadataFilter(filter: any): any {
607
- if (!filter || typeof filter !== 'object') return filter;
608
-
609
- const transformed: any = {};
610
-
611
- for (const [key, value] of Object.entries(filter)) {
612
- // Check if this is a MongoDB operator (starts with $)
613
- if (key.startsWith('$')) {
614
- // For logical operators like $and, $or, recursively transform their contents
615
- if (Array.isArray(value)) {
616
- transformed[key] = value.map(item => this.transformMetadataFilter(item));
617
- } else {
618
- transformed[key] = this.transformMetadataFilter(value);
619
- }
620
- }
621
- // Check if the key already has 'metadata.' prefix
622
- else if (key.startsWith('metadata.')) {
623
- // Already prefixed, keep as is
624
- transformed[key] = value;
625
- }
626
- // Check if this is a known metadata field that needs prefixing
627
- else if (this.isMetadataField(key)) {
628
- // Add metadata. prefix for fields stored in metadata subdocument
629
- transformed[`metadata.${key}`] = value;
630
- } else {
631
- // Keep other fields as is
632
- transformed[key] = value;
633
- }
634
- }
635
-
636
- return transformed;
637
- }
638
-
639
- /**
640
- * Determine if a field should be treated as a metadata field.
641
- * Common metadata fields include thread_id, resource_id, message_id, and any field
642
- * that doesn't start with underscore (MongoDB system fields).
643
- */
644
- private isMetadataField(key: string): boolean {
645
- // MongoDB system fields start with underscore
646
- if (key.startsWith('_')) return false;
647
-
648
- // Document-level fields that are NOT in metadata
649
- const documentFields = ['_id', this.embeddingFieldName, this.documentFieldName];
650
- if (documentFields.includes(key)) return false;
651
-
652
- // Everything else is assumed to be in metadata
653
- // This includes thread_id, resource_id, message_id, and any custom fields
654
- return true;
655
- }
656
- }