@mastra/qdrant 1.0.0-beta.2 → 1.0.0-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,643 @@
1
+ > Guide on vector storage options in Mastra, including embedded and dedicated vector databases for similarity search.
2
+
3
+ # Storing Embeddings in A Vector Database
4
+
5
+ After generating embeddings, you need to store them in a database that supports vector similarity search. Mastra provides a consistent interface for storing and querying embeddings across various vector databases.
6
+
7
+ ## Supported Databases
8
+
9
+ **mongodb:**
10
+
11
+ ```ts title="vector-store.ts"
12
+ import { MongoDBVector } from "@mastra/mongodb";
13
+
14
+ const store = new MongoDBVector({
15
+ id: 'mongodb-vector',
16
+ uri: process.env.MONGODB_URI,
17
+ dbName: process.env.MONGODB_DATABASE,
18
+ });
19
+ await store.createIndex({
20
+ indexName: "myCollection",
21
+ dimension: 1536,
22
+ });
23
+ await store.upsert({
24
+ indexName: "myCollection",
25
+ vectors: embeddings,
26
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
27
+ });
28
+ ```
29
+
30
+ ### Using MongoDB Atlas Vector search
31
+
32
+ For detailed setup instructions and best practices, see the [official MongoDB Atlas Vector Search documentation](https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/?utm_campaign=devrel&utm_source=third-party-content&utm_medium=cta&utm_content=mastra-docs).
33
+
34
+
35
+
36
+ **pg-vector:**
37
+
38
+ ```ts title="vector-store.ts"
39
+ import { PgVector } from "@mastra/pg";
40
+
41
+ const store = new PgVector({
42
+ id: 'pg-vector',
43
+ connectionString: process.env.POSTGRES_CONNECTION_STRING,
44
+ });
45
+
46
+ await store.createIndex({
47
+ indexName: "myCollection",
48
+ dimension: 1536,
49
+ });
50
+
51
+ await store.upsert({
52
+ indexName: "myCollection",
53
+ vectors: embeddings,
54
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
55
+ });
56
+ ```
57
+
58
+ ### Using PostgreSQL with pgvector
59
+
60
+ PostgreSQL with the pgvector extension is a good solution for teams already using PostgreSQL who want to minimize infrastructure complexity.
61
+ For detailed setup instructions and best practices, see the [official pgvector repository](https://github.com/pgvector/pgvector).
62
+
63
+
64
+
65
+ **pinecone:**
66
+
67
+ ```ts title="vector-store.ts"
68
+ import { PineconeVector } from "@mastra/pinecone";
69
+
70
+ const store = new PineconeVector({
71
+ id: 'pinecone-vector',
72
+ apiKey: process.env.PINECONE_API_KEY,
73
+ });
74
+ await store.createIndex({
75
+ indexName: "myCollection",
76
+ dimension: 1536,
77
+ });
78
+ await store.upsert({
79
+ indexName: "myCollection",
80
+ vectors: embeddings,
81
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
82
+ });
83
+ ```
84
+
85
+
86
+
87
+ **qdrant:**
88
+
89
+ ```ts title="vector-store.ts"
90
+ import { QdrantVector } from "@mastra/qdrant";
91
+
92
+ const store = new QdrantVector({
93
+ id: 'qdrant-vector',
94
+ url: process.env.QDRANT_URL,
95
+ apiKey: process.env.QDRANT_API_KEY,
96
+ });
97
+
98
+ await store.createIndex({
99
+ indexName: "myCollection",
100
+ dimension: 1536,
101
+ });
102
+
103
+ await store.upsert({
104
+ indexName: "myCollection",
105
+ vectors: embeddings,
106
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
107
+ });
108
+ ```
109
+
110
+
111
+
112
+ **chroma:**
113
+
114
+ ```ts title="vector-store.ts"
115
+ import { ChromaVector } from "@mastra/chroma";
116
+
117
+ // Running Chroma locally
118
+ // const store = new ChromaVector()
119
+
120
+ // Running on Chroma Cloud
121
+ const store = new ChromaVector({
122
+ id: 'chroma-vector',
123
+ apiKey: process.env.CHROMA_API_KEY,
124
+ tenant: process.env.CHROMA_TENANT,
125
+ database: process.env.CHROMA_DATABASE,
126
+ });
127
+
128
+ await store.createIndex({
129
+ indexName: "myCollection",
130
+ dimension: 1536,
131
+ });
132
+
133
+ await store.upsert({
134
+ indexName: "myCollection",
135
+ vectors: embeddings,
136
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
137
+ });
138
+ ```
139
+
140
+
141
+
142
+ **astra:**
143
+
144
+ ```ts title="vector-store.ts"
145
+ import { AstraVector } from "@mastra/astra";
146
+
147
+ const store = new AstraVector({
148
+ id: 'astra-vector',
149
+ token: process.env.ASTRA_DB_TOKEN,
150
+ endpoint: process.env.ASTRA_DB_ENDPOINT,
151
+ keyspace: process.env.ASTRA_DB_KEYSPACE,
152
+ });
153
+
154
+ await store.createIndex({
155
+ indexName: "myCollection",
156
+ dimension: 1536,
157
+ });
158
+
159
+ await store.upsert({
160
+ indexName: "myCollection",
161
+ vectors: embeddings,
162
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
163
+ });
164
+ ```
165
+
166
+
167
+
168
+ **libsql:**
169
+
170
+ ```ts title="vector-store.ts"
171
+ import { LibSQLVector } from "@mastra/core/vector/libsql";
172
+
173
+ const store = new LibSQLVector({
174
+ id: 'libsql-vector',
175
+ url: process.env.DATABASE_URL,
176
+ authToken: process.env.DATABASE_AUTH_TOKEN, // Optional: for Turso cloud databases
177
+ });
178
+
179
+ await store.createIndex({
180
+ indexName: "myCollection",
181
+ dimension: 1536,
182
+ });
183
+
184
+ await store.upsert({
185
+ indexName: "myCollection",
186
+ vectors: embeddings,
187
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
188
+ });
189
+ ```
190
+
191
+
192
+
193
+ **upstash:**
194
+
195
+ ```ts title="vector-store.ts"
196
+ import { UpstashVector } from "@mastra/upstash";
197
+
198
+ // In upstash they refer to the store as an index
199
+ const store = new UpstashVector({
200
+ id: 'upstash-vector',
201
+ url: process.env.UPSTASH_URL,
202
+ token: process.env.UPSTASH_TOKEN,
203
+ });
204
+
205
+ // There is no store.createIndex call here, Upstash creates indexes (known as namespaces in Upstash) automatically
206
+ // when you upsert if that namespace does not exist yet.
207
+ await store.upsert({
208
+ indexName: "myCollection", // the namespace name in Upstash
209
+ vectors: embeddings,
210
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
211
+ });
212
+ ```
213
+
214
+
215
+
216
+ **cloudflare:**
217
+
218
+ ```ts title="vector-store.ts"
219
+ import { CloudflareVector } from "@mastra/vectorize";
220
+
221
+ const store = new CloudflareVector({
222
+ id: 'cloudflare-vector',
223
+ accountId: process.env.CF_ACCOUNT_ID,
224
+ apiToken: process.env.CF_API_TOKEN,
225
+ });
226
+ await store.createIndex({
227
+ indexName: "myCollection",
228
+ dimension: 1536,
229
+ });
230
+ await store.upsert({
231
+ indexName: "myCollection",
232
+ vectors: embeddings,
233
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
234
+ });
235
+ ```
236
+
237
+
238
+
239
+ **opensearch:**
240
+
241
+ ```ts title="vector-store.ts"
242
+ import { OpenSearchVector } from "@mastra/opensearch";
243
+
244
+ const store = new OpenSearchVector({ id: "opensearch", node: process.env.OPENSEARCH_URL });
245
+
246
+ await store.createIndex({
247
+ indexName: "my-collection",
248
+ dimension: 1536,
249
+ });
250
+
251
+ await store.upsert({
252
+ indexName: "my-collection",
253
+ vectors: embeddings,
254
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
255
+ });
256
+ ```
257
+
258
+
259
+
260
+ **elasticsearch:**
261
+
262
+ ```ts title="vector-store.ts"
263
+ import { ElasticSearchVector } from "@mastra/elasticsearch";
264
+
265
+ const store = new ElasticSearchVector({ id: 'elasticsearch-vector', url: process.env.ELASTICSEARCH_URL });
266
+
267
+ await store.createIndex({
268
+ indexName: "my-collection",
269
+ dimension: 1536,
270
+ });
271
+
272
+ await store.upsert({
273
+ indexName: "my-collection",
274
+ vectors: embeddings,
275
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
276
+ });
277
+ ```
278
+
279
+
280
+ **couchbase:**
281
+
282
+ ```ts title="vector-store.ts"
283
+ import { CouchbaseVector } from "@mastra/couchbase";
284
+
285
+ const store = new CouchbaseVector({
286
+ id: 'couchbase-vector',
287
+ connectionString: process.env.COUCHBASE_CONNECTION_STRING,
288
+ username: process.env.COUCHBASE_USERNAME,
289
+ password: process.env.COUCHBASE_PASSWORD,
290
+ bucketName: process.env.COUCHBASE_BUCKET,
291
+ scopeName: process.env.COUCHBASE_SCOPE,
292
+ collectionName: process.env.COUCHBASE_COLLECTION,
293
+ });
294
+ await store.createIndex({
295
+ indexName: "myCollection",
296
+ dimension: 1536,
297
+ });
298
+ await store.upsert({
299
+ indexName: "myCollection",
300
+ vectors: embeddings,
301
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
302
+ });
303
+ ```
304
+
305
+
306
+ **lancedb:**
307
+
308
+ ```ts title="vector-store.ts"
309
+ import { LanceVectorStore } from "@mastra/lance";
310
+
311
+ const store = await LanceVectorStore.create("/path/to/db");
312
+
313
+ await store.createIndex({
314
+ tableName: "myVectors",
315
+ indexName: "myCollection",
316
+ dimension: 1536,
317
+ });
318
+
319
+ await store.upsert({
320
+ tableName: "myVectors",
321
+ vectors: embeddings,
322
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
323
+ });
324
+ ```
325
+
326
+ ### Using LanceDB
327
+
328
+ LanceDB is an embedded vector database built on the Lance columnar format, suitable for local development or cloud deployment.
329
+ For detailed setup instructions and best practices, see the [official LanceDB documentation](https://lancedb.github.io/lancedb/).
330
+
331
+
332
+ **s3vectors:**
333
+
334
+ ```ts title="vector-store.ts"
335
+ import { S3Vectors } from "@mastra/s3vectors";
336
+
337
+ const store = new S3Vectors({
338
+ id: 's3-vectors',
339
+ vectorBucketName: "my-vector-bucket",
340
+ clientConfig: {
341
+ region: "us-east-1",
342
+ },
343
+ nonFilterableMetadataKeys: ["content"],
344
+ });
345
+
346
+ await store.createIndex({
347
+ indexName: "my-index",
348
+ dimension: 1536,
349
+ });
350
+ await store.upsert({
351
+ indexName: "my-index",
352
+ vectors: embeddings,
353
+ metadata: chunks.map((chunk) => ({ text: chunk.text })),
354
+ });
355
+ ```
356
+
357
+
358
+
359
+ ## Using Vector Storage
360
+
361
+ Once initialized, all vector stores share the same interface for creating indexes, upserting embeddings, and querying.
362
+
363
+ ### Creating Indexes
364
+
365
+ Before storing embeddings, you need to create an index with the appropriate dimension size for your embedding model:
366
+
367
+ ```ts title="store-embeddings.ts"
368
+ // Create an index with dimension 1536 (for text-embedding-3-small)
369
+ await store.createIndex({
370
+ indexName: "myCollection",
371
+ dimension: 1536,
372
+ });
373
+ ```
374
+
375
+ The dimension size must match the output dimension of your chosen embedding model. Common dimension sizes are:
376
+
377
+ - OpenAI text-embedding-3-small: 1536 dimensions (or custom, e.g., 256)
378
+ - Cohere embed-multilingual-v3: 1024 dimensions
379
+ - Google text-embedding-004: 768 dimensions (or custom)
380
+
381
+ > **Note:**
382
+ Index dimensions cannot be changed after creation. To use a different model, delete and recreate the index with the new dimension size.
383
+
384
+ ### Naming Rules for Databases
385
+
386
+ Each vector database enforces specific naming conventions for indexes and collections to ensure compatibility and prevent conflicts.
387
+
388
+ **mongodb:**
389
+
390
+ Collection (index) names must:
391
+ - Start with a letter or underscore
392
+ - Be up to 120 bytes long
393
+ - Contain only letters, numbers, underscores, or dots
394
+ - Cannot contain `$` or the null character
395
+ - Example: `my_collection.123` is valid
396
+ - Example: `my-index` is not valid (contains hyphen)
397
+ - Example: `My$Collection` is not valid (contains `$`)
398
+
399
+ **pgVector:**
400
+
401
+ Index names must:
402
+ - Start with a letter or underscore
403
+ - Contain only letters, numbers, and underscores
404
+ - Example: `my_index_123` is valid
405
+ - Example: `my-index` is not valid (contains hyphen)
406
+
407
+ **pinecone:**
408
+
409
+ Index names must:
410
+ - Use only lowercase letters, numbers, and dashes
411
+ - Not contain dots (used for DNS routing)
412
+ - Not use non-Latin characters or emojis
413
+ - Have a combined length (with project ID) under 52 characters
414
+ - Example: `my-index-123` is valid
415
+ - Example: `my.index` is not valid (contains dot)
416
+
417
+ **qdrant:**
418
+
419
+ Collection names must:
420
+ - Be 1-255 characters long
421
+ - Not contain any of these special characters:
422
+ - `< > : " / \ | ? *`
423
+ - Null character (`\0`)
424
+ - Unit separator (`\u{1F}`)
425
+ - Example: `my_collection_123` is valid
426
+ - Example: `my/collection` is not valid (contains slash)
427
+
428
+ **chroma:**
429
+
430
+ Collection names must:
431
+ - Be 3-63 characters long
432
+ - Start and end with a letter or number
433
+ - Contain only letters, numbers, underscores, or hyphens
434
+ - Not contain consecutive periods (..)
435
+ - Not be a valid IPv4 address
436
+ - Example: `my-collection-123` is valid
437
+ - Example: `my..collection` is not valid (consecutive periods)
438
+
439
+ **astra:**
440
+
441
+ Collection names must:
442
+ - Not be empty
443
+ - Be 48 characters or less
444
+ - Contain only letters, numbers, and underscores
445
+ - Example: `my_collection_123` is valid
446
+ - Example: `my-collection` is not valid (contains hyphen)
447
+
448
+ **libsql:**
449
+
450
+ Index names must:
451
+ - Start with a letter or underscore
452
+ - Contain only letters, numbers, and underscores
453
+ - Example: `my_index_123` is valid
454
+ - Example: `my-index` is not valid (contains hyphen)
455
+
456
+ **upstash:**
457
+
458
+ Namespace names must:
459
+ - Be 2-100 characters long
460
+ - Contain only:
461
+ - Alphanumeric characters (a-z, A-Z, 0-9)
462
+ - Underscores, hyphens, dots
463
+ - Not start or end with special characters (_, -, .)
464
+ - Can be case-sensitive
465
+ - Example: `MyNamespace123` is valid
466
+ - Example: `_namespace` is not valid (starts with underscore)
467
+
468
+ **cloudflare:**
469
+
470
+ Index names must:
471
+ - Start with a letter
472
+ - Be shorter than 32 characters
473
+ - Contain only lowercase ASCII letters, numbers, and dashes
474
+ - Use dashes instead of spaces
475
+ - Example: `my-index-123` is valid
476
+ - Example: `My_Index` is not valid (uppercase and underscore)
477
+
478
+ **opensearch:**
479
+
480
+ Index names must:
481
+ - Use only lowercase letters
482
+ - Not begin with underscores or hyphens
483
+ - Not contain spaces, commas
484
+ - Not contain special characters (e.g. `:`, `"`, `*`, `+`, `/`, `\`, `|`, `?`, `#`, `>`, `<`)
485
+ - Example: `my-index-123` is valid
486
+ - Example: `My_Index` is not valid (contains uppercase letters)
487
+ - Example: `_myindex` is not valid (begins with underscore)
488
+
489
+ **elasticsearch:**
490
+
491
+ Index names must:
492
+ - Use only lowercase letters
493
+ - Not exceed 255 bytes (counting multi-byte characters)
494
+ - Not begin with underscores, hyphens, or plus signs
495
+ - Not contain spaces, commas
496
+ - Not contain special characters (e.g. `:`, `"`, `*`, `+`, `/`, `\`, `|`, `?`, `#`, `>`, `<`)
497
+ - Not be "." or ".."
498
+ - Not start with "." (deprecated except for system/hidden indices)
499
+ - Example: `my-index-123` is valid
500
+ - Example: `My_Index` is not valid (contains uppercase letters)
501
+ - Example: `_myindex` is not valid (begins with underscore)
502
+ - Example: `.myindex` is not valid (begins with dot, deprecated)
503
+
504
+ **s3vectors:**
505
+
506
+ Index names must:
507
+ - Be unique within the same vector bucket
508
+ - Be 3–63 characters long
509
+ - Use only lowercase letters (`a–z`), numbers (`0–9`), hyphens (`-`), and dots (`.`)
510
+ - Begin and end with a letter or number
511
+ - Example: `my-index.123` is valid
512
+ - Example: `my_index` is not valid (contains underscore)
513
+ - Example: `-myindex` is not valid (begins with hyphen)
514
+ - Example: `myindex-` is not valid (ends with hyphen)
515
+ - Example: `MyIndex` is not valid (contains uppercase letters)
516
+
517
+
518
+ ### Upserting Embeddings
519
+
520
+ After creating an index, you can store embeddings along with their basic metadata:
521
+
522
+ ```ts title="store-embeddings.ts"
523
+ // Store embeddings with their corresponding metadata
524
+ await store.upsert({
525
+ indexName: "myCollection", // index name
526
+ vectors: embeddings, // array of embedding vectors
527
+ metadata: chunks.map((chunk) => ({
528
+ text: chunk.text, // The original text content
529
+ id: chunk.id, // Optional unique identifier
530
+ })),
531
+ });
532
+ ```
533
+
534
+ The upsert operation:
535
+
536
+ - Takes an array of embedding vectors and their corresponding metadata
537
+ - Updates existing vectors if they share the same ID
538
+ - Creates new vectors if they don't exist
539
+ - Automatically handles batching for large datasets
540
+
541
+ ## Adding Metadata
542
+
543
+ Vector stores support rich metadata (any JSON-serializable fields) for filtering and organization. Since metadata is stored with no fixed schema, use consistent field naming to avoid unexpected query results.
544
+
545
+ > **Note:**
546
+ Metadata is crucial for vector storage - without it, you'd only have numerical embeddings with no way to return the original text or filter results. Always store at least the source text as metadata.
547
+
548
+ ```ts
549
+ // Store embeddings with rich metadata for better organization and filtering
550
+ await store.upsert({
551
+ indexName: "myCollection",
552
+ vectors: embeddings,
553
+ metadata: chunks.map((chunk) => ({
554
+ // Basic content
555
+ text: chunk.text,
556
+ id: chunk.id,
557
+
558
+ // Document organization
559
+ source: chunk.source,
560
+ category: chunk.category,
561
+
562
+ // Temporal metadata
563
+ createdAt: new Date().toISOString(),
564
+ version: "1.0",
565
+
566
+ // Custom fields
567
+ language: chunk.language,
568
+ author: chunk.author,
569
+ confidenceScore: chunk.score,
570
+ })),
571
+ });
572
+ ```
573
+
574
+ Key metadata considerations:
575
+
576
+ - Be strict with field naming - inconsistencies like 'category' vs 'Category' will affect queries
577
+ - Only include fields you plan to filter or sort by - extra fields add overhead
578
+ - Add timestamps (e.g., 'createdAt', 'lastUpdated') to track content freshness
579
+
580
+ ## Deleting Vectors
581
+
582
+ When building RAG applications, you often need to clean up stale vectors when documents are deleted or updated. Mastra provides the `deleteVectors` method that supports deleting vectors by metadata filters, making it easy to remove all embeddings associated with a specific document.
583
+
584
+ ### Delete by Metadata Filter
585
+
586
+ The most common use case is deleting all vectors for a specific document when a user deletes it:
587
+
588
+ ```ts title="delete-vectors.ts"
589
+ // Delete all vectors for a specific document
590
+ await store.deleteVectors({
591
+ indexName: "myCollection",
592
+ filter: { docId: "document-123" },
593
+ });
594
+ ```
595
+
596
+ This is particularly useful when:
597
+ - A user deletes a document and you need to remove all its chunks
598
+ - You're re-indexing a document and want to remove old vectors first
599
+ - You need to clean up vectors for a specific user or tenant
600
+
601
+ ### Delete Multiple Documents
602
+
603
+ You can also use complex filters to delete vectors matching multiple conditions:
604
+
605
+ ```ts title="delete-vectors-advanced.ts"
606
+ // Delete all vectors for multiple documents
607
+ await store.deleteVectors({
608
+ indexName: "myCollection",
609
+ filter: {
610
+ docId: { $in: ["doc-1", "doc-2", "doc-3"] },
611
+ },
612
+ });
613
+
614
+ // Delete vectors for a specific user's documents
615
+ await store.deleteVectors({
616
+ indexName: "myCollection",
617
+ filter: {
618
+ $and: [
619
+ { userId: "user-123" },
620
+ { status: "archived" },
621
+ ],
622
+ },
623
+ });
624
+ ```
625
+
626
+ ### Delete by Vector IDs
627
+
628
+ If you have specific vector IDs to delete, you can pass them directly:
629
+
630
+ ```ts title="delete-by-ids.ts"
631
+ // Delete specific vectors by their IDs
632
+ await store.deleteVectors({
633
+ indexName: "myCollection",
634
+ ids: ["vec-1", "vec-2", "vec-3"],
635
+ });
636
+ ```
637
+
638
+ ## Best Practices
639
+
640
+ - Create indexes before bulk insertions
641
+ - Use batch operations for large insertions (the upsert method handles batching automatically)
642
+ - Only store metadata you'll query against
643
+ - Match embedding dimensions to your model (e.g., 1536 for `text-embedding-3-small`)