@mastra/pg 0.16.1-alpha.0 → 0.16.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @mastra/pg
2
2
 
3
+ ## 0.16.1-alpha.1
4
+
5
+ ### Patch Changes
6
+
7
+ - Fix PostgreSQL vector index recreation issue and add optional index configuration ([#8020](https://github.com/mastra-ai/mastra/pull/8020))
8
+ - Fixed critical bug where memory vector indexes were unnecessarily recreated on every operation
9
+ - Added support for configuring vector index types (HNSW, IVFFlat, flat) and parameters
10
+
11
+ - fix(pg-vector): Fix vector type qualification for custom schemas on RDS ([#8070](https://github.com/mastra-ai/mastra/pull/8070))
12
+
13
+ - Updated dependencies [[`4b339b8`](https://github.com/mastra-ai/mastra/commit/4b339b8141c20d6a6d80583c7e8c5c05d8c19492), [`c591dfc`](https://github.com/mastra-ai/mastra/commit/c591dfc1e600fae1dedffe239357d250e146378f), [`1920c5c`](https://github.com/mastra-ai/mastra/commit/1920c5c6d666f687785c73021196aa551e579e0d), [`b6a3b65`](https://github.com/mastra-ai/mastra/commit/b6a3b65d830fa0ca7754ad6481661d1f2c878f21), [`af3abb6`](https://github.com/mastra-ai/mastra/commit/af3abb6f7c7585d856e22d27f4e7d2ece2186b9a)]:
14
+ - @mastra/core@0.18.0-alpha.3
15
+
3
16
  ## 0.16.1-alpha.0
4
17
 
5
18
  ### Patch Changes
package/README.md CHANGED
@@ -27,6 +27,14 @@ await vectorStore.createIndex({
27
27
  indexName: 'my_vectors',
28
28
  dimension: 1536,
29
29
  metric: 'cosine',
30
+ // Optional: Configure index type and parameters
31
+ indexConfig: {
32
+ type: 'hnsw', // 'ivfflat' (default), 'hnsw', or 'flat'
33
+ hnsw: {
34
+ m: 16, // Number of connections per layer (default: 8)
35
+ efConstruction: 64 // Size of dynamic list (default: 32)
36
+ }
37
+ }
30
38
  });
31
39
 
32
40
  // Add vectors
@@ -104,14 +112,15 @@ Connection pool settings:
104
112
 
105
113
  ### Vector Store Features
106
114
 
107
- - Vector similarity search with cosine, euclidean, and dot product metrics
115
+ - Vector similarity search with cosine, euclidean, and dot product (inner) metrics
108
116
  - Advanced metadata filtering with MongoDB-like query syntax
109
117
  - Minimum score threshold for queries
110
118
  - Automatic UUID generation for vectors
111
119
  - Table management (create, list, describe, delete, truncate)
112
- - Uses pgvector's IVFFLAT indexing with 100 lists by default
113
- - Supports HNSW indexing with configurable parameters
114
- - Supports flat indexing
120
+ - Configurable vector index types:
121
+ - **IVFFlat** (default): Balanced speed/accuracy, auto-calculates optimal lists parameter
122
+ - **HNSW**: Fastest queries, higher memory usage, best for large datasets
123
+ - **Flat**: No index, 100% accuracy, best for small datasets (<1000 vectors)
115
124
 
116
125
  ### Storage Features
117
126
 
@@ -139,14 +148,111 @@ Example filter:
139
148
  }
140
149
  ```
141
150
 
151
+ ## Vector Index Configuration
152
+
153
+ pgvector supports three index types, each with different performance characteristics:
154
+
155
+ ### IVFFlat Index (Default)
156
+
157
+ IVFFlat groups vectors into clusters for efficient searching:
158
+
159
+ ```typescript
160
+ await vectorStore.createIndex({
161
+ indexName: 'my_vectors',
162
+ dimension: 1536,
163
+ metric: 'cosine',
164
+ indexConfig: {
165
+ type: 'ivfflat',
166
+ ivf: {
167
+ lists: 1000, // Number of clusters (default: auto-calculated as sqrt(rows) * 2)
168
+ },
169
+ },
170
+ });
171
+ ```
172
+
173
+ - **Best for:** Medium to large datasets (10K-1M vectors)
174
+ - **Build time:** Minutes for millions of vectors
175
+ - **Query speed:** Fast (tens of milliseconds)
176
+ - **Memory:** Moderate
177
+ - **Accuracy:** ~95-99%
178
+
179
+ ### HNSW Index
180
+
181
+ HNSW builds a graph structure for extremely fast searches:
182
+
183
+ ```typescript
184
+ await vectorStore.createIndex({
185
+ indexName: 'my_vectors',
186
+ dimension: 1536,
187
+ metric: 'dotproduct', // Recommended for normalized embeddings (OpenAI, etc.)
188
+ indexConfig: {
189
+ type: 'hnsw',
190
+ hnsw: {
191
+ m: 16, // Connections per layer (default: 8, range: 2-100)
192
+ efConstruction: 64, // Dynamic list size (default: 32, range: 4-1000)
193
+ },
194
+ },
195
+ });
196
+ ```
197
+
198
+ - **Best for:** Large datasets (100K+ vectors) requiring fastest searches
199
+ - **Build time:** Can take hours for large datasets
200
+ - **Query speed:** Very fast (milliseconds even for millions)
201
+ - **Memory:** High (can be 2-3x vector size)
202
+ - **Accuracy:** ~99%
203
+
204
+ **Tuning HNSW:**
205
+
206
+ - Higher `m`: Better accuracy, more memory (16-32 for high accuracy)
207
+ - Higher `efConstruction`: Better index quality, slower builds (64-200 for quality)
208
+
209
+ ### Flat Index (No Index)
210
+
211
+ Uses sequential scan for 100% accuracy:
212
+
213
+ ```typescript
214
+ await vectorStore.createIndex({
215
+ indexName: 'my_vectors',
216
+ dimension: 1536,
217
+ metric: 'cosine',
218
+ indexConfig: {
219
+ type: 'flat',
220
+ },
221
+ });
222
+ ```
223
+
224
+ - **Best for:** Small datasets (<1000 vectors) or when 100% accuracy is required
225
+ - **Build time:** None
226
+ - **Query speed:** Slow for large datasets (linear scan)
227
+ - **Memory:** Minimal (just vectors)
228
+ - **Accuracy:** 100%
229
+
230
+ ### Distance Metrics
231
+
232
+ Choose the appropriate metric for your embeddings:
233
+
234
+ - **`cosine`** (default): Angular similarity, good for text embeddings
235
+ - **`euclidean`**: L2 distance, for unnormalized embeddings
236
+ - **`dotproduct`**: Dot product, optimal for normalized embeddings (OpenAI, Cohere)
237
+
238
+ ### Index Recreation
239
+
240
+ The system automatically detects configuration changes and only rebuilds indexes when necessary, preventing the performance issues from unnecessary recreations.
241
+
242
+ **Important behaviors:**
243
+
244
+ - If no `indexConfig` is provided, existing indexes are preserved as-is
245
+ - If `indexConfig` is provided, indexes are only rebuilt if the configuration differs
246
+ - New indexes default to IVFFlat with cosine distance when no config is specified
247
+
142
248
  ## Vector Store Methods
143
249
 
144
- - `createIndex({indexName, dimension, metric?, indexConfig?, defineIndex?})`: Create a new table with vector support
250
+ - `createIndex({indexName, dimension, metric?, indexConfig?, buildIndex?})`: Create a new table with vector support
251
+ - `buildIndex({indexName, metric?, indexConfig?})`: Build or rebuild vector index
145
252
  - `upsert({indexName, vectors, metadata?, ids?})`: Add or update vectors
146
253
  - `query({indexName, queryVector, topK?, filter?, includeVector?, minScore?})`: Search for similar vectors
147
- - `defineIndex({indexName, metric?, indexConfig?})`: Define an index
148
254
  - `listIndexes()`: List all vector-enabled tables
149
- - `describeIndex(indexName)`: Get table statistics
255
+ - `describeIndex(indexName)`: Get table statistics and index configuration
150
256
  - `deleteIndex(indexName)`: Delete a table
151
257
  - `truncateIndex(indexName)`: Remove all data from a table
152
258
  - `disconnect()`: Close all database connections
package/dist/index.cjs CHANGED
@@ -371,7 +371,9 @@ var PgVector = class extends vector.MastraVector {
371
371
  setupSchemaPromise = null;
372
372
  installVectorExtensionPromise = null;
373
373
  vectorExtensionInstalled = void 0;
374
+ vectorExtensionSchema = null;
374
375
  schemaSetupComplete = void 0;
376
+ cacheWarmupPromise = null;
375
377
  constructor({
376
378
  connectionString,
377
379
  schemaName,
@@ -402,18 +404,24 @@ var PgVector = class extends vector.MastraVector {
402
404
  "vector.type": "postgres"
403
405
  }
404
406
  }) ?? basePool;
405
- void (async () => {
406
- const existingIndexes = await this.listIndexes();
407
- void existingIndexes.map(async (indexName) => {
408
- const info = await this.getIndexInfo({ indexName });
409
- const key = await this.getIndexCacheKey({
410
- indexName,
411
- metric: info.metric,
412
- dimension: info.dimension,
413
- type: info.type
414
- });
415
- this.createdIndexes.set(indexName, key);
416
- });
407
+ this.cacheWarmupPromise = (async () => {
408
+ try {
409
+ const existingIndexes = await this.listIndexes();
410
+ await Promise.all(
411
+ existingIndexes.map(async (indexName) => {
412
+ const info = await this.getIndexInfo({ indexName });
413
+ const key = await this.getIndexCacheKey({
414
+ indexName,
415
+ metric: info.metric,
416
+ dimension: info.dimension,
417
+ type: info.type
418
+ });
419
+ this.createdIndexes.set(indexName, key);
420
+ })
421
+ );
422
+ } catch (error) {
423
+ this.logger?.debug("Cache warming skipped or failed", { error });
424
+ }
417
425
  })();
418
426
  } catch (error$1) {
419
427
  throw new error.MastraError(
@@ -433,6 +441,45 @@ var PgVector = class extends vector.MastraVector {
433
441
  if (!this.mutexesByName.has(indexName)) this.mutexesByName.set(indexName, new asyncMutex.Mutex());
434
442
  return this.mutexesByName.get(indexName);
435
443
  }
444
+ /**
445
+ * Detects which schema contains the vector extension
446
+ */
447
+ async detectVectorExtensionSchema(client) {
448
+ try {
449
+ const result = await client.query(`
450
+ SELECT n.nspname as schema_name
451
+ FROM pg_extension e
452
+ JOIN pg_namespace n ON e.extnamespace = n.oid
453
+ WHERE e.extname = 'vector'
454
+ LIMIT 1;
455
+ `);
456
+ if (result.rows.length > 0) {
457
+ this.vectorExtensionSchema = result.rows[0].schema_name;
458
+ this.logger.debug("Vector extension found in schema", { schema: this.vectorExtensionSchema });
459
+ return this.vectorExtensionSchema;
460
+ }
461
+ return null;
462
+ } catch (error) {
463
+ this.logger.debug("Could not detect vector extension schema", { error });
464
+ return null;
465
+ }
466
+ }
467
+ /**
468
+ * Gets the properly qualified vector type name
469
+ */
470
+ getVectorTypeName() {
471
+ if (this.vectorExtensionSchema) {
472
+ if (this.vectorExtensionSchema === "pg_catalog") {
473
+ return "vector";
474
+ }
475
+ if (this.vectorExtensionSchema === (this.schema || "public")) {
476
+ return "vector";
477
+ }
478
+ const validatedSchema = utils.parseSqlIdentifier(this.vectorExtensionSchema, "vector extension schema");
479
+ return `${validatedSchema}.vector`;
480
+ }
481
+ return "vector";
482
+ }
436
483
  getTableName(indexName) {
437
484
  const parsedIndexName = utils.parseSqlIdentifier(indexName, "index name");
438
485
  const quotedIndexName = `"${parsedIndexName}"`;
@@ -504,11 +551,12 @@ var PgVector = class extends vector.MastraVector {
504
551
  await client.query(`SET LOCAL ivfflat.probes = ${probes}`);
505
552
  }
506
553
  const { tableName } = this.getTableName(indexName);
554
+ const vectorType = this.getVectorTypeName();
507
555
  const query = `
508
556
  WITH vector_scores AS (
509
557
  SELECT
510
558
  vector_id as id,
511
- 1 - (embedding <=> '${vectorStr}'::vector) as score,
559
+ 1 - (embedding <=> '${vectorStr}'::${vectorType}) as score,
512
560
  metadata
513
561
  ${includeVector ? ", embedding" : ""}
514
562
  FROM ${tableName}
@@ -552,13 +600,14 @@ var PgVector = class extends vector.MastraVector {
552
600
  try {
553
601
  await client.query("BEGIN");
554
602
  const vectorIds = ids || vectors.map(() => crypto.randomUUID());
603
+ const vectorType = this.getVectorTypeName();
555
604
  for (let i = 0; i < vectors.length; i++) {
556
605
  const query = `
557
606
  INSERT INTO ${tableName} (vector_id, embedding, metadata)
558
- VALUES ($1, $2::vector, $3::jsonb)
607
+ VALUES ($1, $2::${vectorType}, $3::jsonb)
559
608
  ON CONFLICT (vector_id)
560
609
  DO UPDATE SET
561
- embedding = $2::vector,
610
+ embedding = $2::${vectorType},
562
611
  metadata = $3::jsonb
563
612
  RETURNING embedding::text
564
613
  `;
@@ -705,11 +754,15 @@ var PgVector = class extends vector.MastraVector {
705
754
  try {
706
755
  await this.setupSchema(client);
707
756
  await this.installVectorExtension(client);
757
+ if (this.schema && this.vectorExtensionSchema && this.schema !== this.vectorExtensionSchema && this.vectorExtensionSchema !== "pg_catalog") {
758
+ await client.query(`SET search_path TO ${this.getSchemaName()}, "${this.vectorExtensionSchema}"`);
759
+ }
760
+ const vectorType = this.getVectorTypeName();
708
761
  await client.query(`
709
762
  CREATE TABLE IF NOT EXISTS ${tableName} (
710
763
  id SERIAL PRIMARY KEY,
711
764
  vector_id TEXT UNIQUE NOT NULL,
712
- embedding vector(${dimension}),
765
+ embedding ${vectorType}(${dimension}),
713
766
  metadata JSONB DEFAULT '{}'::jsonb
714
767
  );
715
768
  `);
@@ -764,17 +817,63 @@ var PgVector = class extends vector.MastraVector {
764
817
  async setupIndex({ indexName, metric, indexConfig }, client) {
765
818
  const mutex = this.getMutexByName(`build-${indexName}`);
766
819
  await mutex.runExclusive(async () => {
820
+ const isConfigEmpty = !indexConfig || Object.keys(indexConfig).length === 0 || !indexConfig.type && !indexConfig.ivf && !indexConfig.hnsw;
821
+ const indexType = isConfigEmpty ? "ivfflat" : indexConfig.type || "ivfflat";
767
822
  const { tableName, vectorIndexName } = this.getTableName(indexName);
768
- if (this.createdIndexes.has(indexName)) {
823
+ let existingIndexInfo = null;
824
+ let dimension = 0;
825
+ try {
826
+ existingIndexInfo = await this.getIndexInfo({ indexName });
827
+ dimension = existingIndexInfo.dimension;
828
+ if (isConfigEmpty && existingIndexInfo.metric === metric) {
829
+ if (existingIndexInfo.type === "flat") {
830
+ this.logger?.debug(`No index exists for ${vectorIndexName}, will create default ivfflat index`);
831
+ } else {
832
+ this.logger?.debug(
833
+ `Index ${vectorIndexName} already exists (type: ${existingIndexInfo.type}, metric: ${existingIndexInfo.metric}), preserving existing configuration`
834
+ );
835
+ const cacheKey = await this.getIndexCacheKey({
836
+ indexName,
837
+ dimension,
838
+ type: existingIndexInfo.type,
839
+ metric: existingIndexInfo.metric
840
+ });
841
+ this.createdIndexes.set(indexName, cacheKey);
842
+ return;
843
+ }
844
+ }
845
+ let configMatches = existingIndexInfo.metric === metric && existingIndexInfo.type === indexType;
846
+ if (indexType === "hnsw") {
847
+ configMatches = configMatches && existingIndexInfo.config.m === (indexConfig.hnsw?.m ?? 8) && existingIndexInfo.config.efConstruction === (indexConfig.hnsw?.efConstruction ?? 32);
848
+ } else if (indexType === "flat") {
849
+ configMatches = configMatches && existingIndexInfo.type === "flat";
850
+ } else if (indexType === "ivfflat" && indexConfig.ivf?.lists) {
851
+ configMatches = configMatches && existingIndexInfo.config.lists === indexConfig.ivf?.lists;
852
+ }
853
+ if (configMatches) {
854
+ this.logger?.debug(`Index ${vectorIndexName} already exists with same configuration, skipping recreation`);
855
+ const cacheKey = await this.getIndexCacheKey({
856
+ indexName,
857
+ dimension,
858
+ type: existingIndexInfo.type,
859
+ metric: existingIndexInfo.metric
860
+ });
861
+ this.createdIndexes.set(indexName, cacheKey);
862
+ return;
863
+ }
864
+ this.logger?.info(`Index ${vectorIndexName} configuration changed, rebuilding index`);
769
865
  await client.query(`DROP INDEX IF EXISTS ${vectorIndexName}`);
866
+ this.describeIndexCache.delete(indexName);
867
+ } catch {
868
+ this.logger?.debug(`Index ${indexName} doesn't exist yet, will create it`);
770
869
  }
771
- if (indexConfig.type === "flat") {
870
+ if (indexType === "flat") {
772
871
  this.describeIndexCache.delete(indexName);
773
872
  return;
774
873
  }
775
874
  const metricOp = metric === "cosine" ? "vector_cosine_ops" : metric === "euclidean" ? "vector_l2_ops" : "vector_ip_ops";
776
875
  let indexSQL;
777
- if (indexConfig.type === "hnsw") {
876
+ if (indexType === "hnsw") {
778
877
  const m = indexConfig.hnsw?.m ?? 8;
779
878
  const efConstruction = indexConfig.hnsw?.efConstruction ?? 32;
780
879
  indexSQL = `
@@ -811,27 +910,48 @@ var PgVector = class extends vector.MastraVector {
811
910
  if (!this.installVectorExtensionPromise) {
812
911
  this.installVectorExtensionPromise = (async () => {
813
912
  try {
814
- const extensionCheck = await client.query(`
815
- SELECT EXISTS (
816
- SELECT 1 FROM pg_extension WHERE extname = 'vector'
913
+ const existingSchema = await this.detectVectorExtensionSchema(client);
914
+ if (existingSchema) {
915
+ this.vectorExtensionInstalled = true;
916
+ this.vectorExtensionSchema = existingSchema;
917
+ this.logger.info(`Vector extension already installed in schema: ${existingSchema}`);
918
+ return;
919
+ }
920
+ try {
921
+ if (this.schema && this.schema !== "public") {
922
+ try {
923
+ await client.query(`CREATE EXTENSION IF NOT EXISTS vector SCHEMA ${this.getSchemaName()}`);
924
+ this.vectorExtensionInstalled = true;
925
+ this.vectorExtensionSchema = this.schema;
926
+ this.logger.info(`Vector extension installed in schema: ${this.schema}`);
927
+ return;
928
+ } catch (schemaError) {
929
+ this.logger.debug(`Could not install vector extension in schema ${this.schema}, trying public schema`, {
930
+ error: schemaError
931
+ });
932
+ }
933
+ }
934
+ await client.query("CREATE EXTENSION IF NOT EXISTS vector");
935
+ const installedSchema = await this.detectVectorExtensionSchema(client);
936
+ if (installedSchema) {
937
+ this.vectorExtensionInstalled = true;
938
+ this.vectorExtensionSchema = installedSchema;
939
+ this.logger.info(`Vector extension installed in schema: ${installedSchema}`);
940
+ }
941
+ } catch (error) {
942
+ this.logger.warn(
943
+ "Could not install vector extension. This requires superuser privileges. If the extension is already installed, you can ignore this warning.",
944
+ { error }
817
945
  );
818
- `);
819
- this.vectorExtensionInstalled = extensionCheck.rows[0].exists;
820
- if (!this.vectorExtensionInstalled) {
821
- try {
822
- await client.query("CREATE EXTENSION IF NOT EXISTS vector");
946
+ const existingSchema2 = await this.detectVectorExtensionSchema(client);
947
+ if (existingSchema2) {
823
948
  this.vectorExtensionInstalled = true;
824
- this.logger.info("Vector extension installed successfully");
825
- } catch {
826
- this.logger.warn(
827
- "Could not install vector extension. This requires superuser privileges. If the extension is already installed globally, you can ignore this warning."
828
- );
949
+ this.vectorExtensionSchema = existingSchema2;
950
+ this.logger.info(`Vector extension found in schema: ${existingSchema2}`);
829
951
  }
830
- } else {
831
- this.logger.debug("Vector extension already installed, skipping installation");
832
952
  }
833
953
  } catch (error) {
834
- this.logger.error("Error checking vector extension status", { error });
954
+ this.logger.error("Error setting up vector extension", { error });
835
955
  this.vectorExtensionInstalled = void 0;
836
956
  this.installVectorExtensionPromise = null;
837
957
  throw error;
@@ -1033,6 +1153,12 @@ var PgVector = class extends vector.MastraVector {
1033
1153
  }
1034
1154
  }
1035
1155
  async disconnect() {
1156
+ if (this.cacheWarmupPromise) {
1157
+ try {
1158
+ await this.cacheWarmupPromise;
1159
+ } catch {
1160
+ }
1161
+ }
1036
1162
  await this.pool.end();
1037
1163
  }
1038
1164
  /**
@@ -1055,8 +1181,9 @@ var PgVector = class extends vector.MastraVector {
1055
1181
  let updateParts = [];
1056
1182
  let values = [id];
1057
1183
  let valueIndex = 2;
1184
+ const vectorType = this.getVectorTypeName();
1058
1185
  if (update.vector) {
1059
- updateParts.push(`embedding = $${valueIndex}::vector`);
1186
+ updateParts.push(`embedding = $${valueIndex}::${vectorType}`);
1060
1187
  values.push(`[${update.vector.join(",")}]`);
1061
1188
  valueIndex++;
1062
1189
  }