@mastra/pg 0.14.5 → 0.14.6-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,367 +0,0 @@
1
- import pg from 'pg';
2
- import { describe, it, beforeAll, afterAll, beforeEach, afterEach } from 'vitest';
3
-
4
- import type { TestConfig, TestResult } from './performance.helpers';
5
- import {
6
- baseTestConfigs,
7
- calculateTimeout,
8
- generateRandomVectors,
9
- findNearestBruteForce,
10
- calculateRecall,
11
- formatTable,
12
- groupBy,
13
- measureLatency,
14
- getListCount,
15
- getSearchEf,
16
- generateClusteredVectors,
17
- generateSkewedVectors,
18
- getHNSWConfig,
19
- getIndexDescription,
20
- warmupQuery,
21
- } from './performance.helpers';
22
- import type { IndexConfig, IndexType } from './types';
23
-
24
- import { PgVector } from '.';
25
-
26
- interface IndexTestConfig extends IndexConfig {
27
- type: IndexType;
28
- rebuild?: boolean;
29
- }
30
-
31
- class PGPerformanceVector extends PgVector {
32
- private perfPool: pg.Pool;
33
-
34
- constructor(connectionString: string) {
35
- super(connectionString);
36
-
37
- const basePool = new pg.Pool({
38
- connectionString,
39
- max: 20, // Maximum number of clients in the pool
40
- idleTimeoutMillis: 30000, // Close idle connections after 30 seconds
41
- connectionTimeoutMillis: 2000, // Fail fast if can't connect
42
- });
43
-
44
- this.perfPool = basePool;
45
- }
46
-
47
- async bulkUpsert(indexName: string, vectors: number[][], metadata?: any[], ids?: string[]) {
48
- const client = await this.perfPool.connect();
49
- try {
50
- await client.query('BEGIN');
51
- const vectorIds = ids || vectors.map(() => crypto.randomUUID());
52
-
53
- // Same query structure as upsert, just using unnest for bulk operation
54
- const query = `
55
- INSERT INTO ${indexName} (vector_id, embedding, metadata)
56
- SELECT * FROM unnest(
57
- $1::text[],
58
- $2::vector[],
59
- $3::jsonb[]
60
- )
61
- ON CONFLICT (vector_id)
62
- DO UPDATE SET
63
- embedding = EXCLUDED.embedding,
64
- metadata = EXCLUDED.metadata
65
- RETURNING embedding::text
66
- `;
67
-
68
- // Same parameter structure as upsert, just as arrays
69
- await client.query(query, [
70
- vectorIds,
71
- vectors.map(v => `[${v.join(',')}]`),
72
- (metadata || vectors.map(() => ({}))).map(m => JSON.stringify(m)),
73
- ]);
74
- await client.query('COMMIT');
75
- return vectorIds;
76
- } catch (error) {
77
- await client.query('ROLLBACK');
78
- throw error;
79
- } finally {
80
- client.release();
81
- }
82
- }
83
- }
84
-
85
- const warmupCache = new Map<string, boolean>();
86
- async function smartWarmup(
87
- vectorDB: PGPerformanceVector,
88
- testIndexName: string,
89
- indexType: string,
90
- dimension: number,
91
- k: number,
92
- ) {
93
- const cacheKey = `${dimension}-${k}-${indexType}`;
94
- if (!warmupCache.has(cacheKey)) {
95
- console.log(`Warming up ${indexType} index for ${dimension}d vectors, k=${k}`);
96
- await warmupQuery(vectorDB, testIndexName, dimension, k);
97
- warmupCache.set(cacheKey, true);
98
- }
99
- }
100
-
101
- const connectionString = process.env.DB_URL || `postgresql://postgres:postgres@localhost:5435/mastra`;
102
- describe('PostgreSQL Index Performance', () => {
103
- let vectorDB: PGPerformanceVector;
104
- const testIndexName = 'test_index_performance';
105
- const results: TestResult[] = [];
106
-
107
- const indexConfigs: IndexTestConfig[] = [
108
- { type: 'flat' }, // Test flat/linear search as baseline
109
- { type: 'ivfflat', ivf: { lists: 100 } }, // Test IVF with fixed lists
110
- { type: 'ivfflat', rebuild: true }, // Test IVF with calculated lists and rebuild
111
- { type: 'hnsw' }, // Test HNSW with default parameters
112
- { type: 'hnsw', hnsw: { m: 16, efConstruction: 64 } }, // Test HNSW with custom parameters
113
- ];
114
- beforeAll(async () => {
115
- // Initialize PGPerformanceVector
116
- vectorDB = new PGPerformanceVector(connectionString);
117
- });
118
- beforeEach(async () => {
119
- await vectorDB.deleteIndex({ indexName: testIndexName });
120
- });
121
-
122
- afterEach(async () => {
123
- await vectorDB.deleteIndex({ indexName: testIndexName });
124
- });
125
-
126
- afterAll(async () => {
127
- await vectorDB.disconnect();
128
- analyzeResults(results);
129
- });
130
-
131
- // Combine all test configs
132
- const allConfigs: TestConfig[] = [
133
- ...baseTestConfigs['64'],
134
- ...baseTestConfigs['384'],
135
- ...baseTestConfigs['1024'],
136
- ...baseTestConfigs.smokeTests,
137
- ...baseTestConfigs.stressTests,
138
- ];
139
-
140
- // For each index config
141
- for (const indexConfig of indexConfigs) {
142
- const indexType = indexConfig.type;
143
- const rebuild = indexConfig.rebuild ?? false;
144
- const hnswConfig = getHNSWConfig(indexConfig);
145
- const indexDescription = getIndexDescription({
146
- type: indexType,
147
- hnsw: hnswConfig,
148
- });
149
-
150
- describe(`Index: ${indexDescription}`, () => {
151
- for (const testConfig of allConfigs) {
152
- const timeout = calculateTimeout(testConfig.dimension, testConfig.size, testConfig.k);
153
- const testDesc = `dim=${testConfig.dimension} size=${testConfig.size} k=${testConfig.k}`;
154
-
155
- for (const [distType, generator] of Object.entries(distributions)) {
156
- it(
157
- testDesc,
158
- async () => {
159
- const testVectors = generator(testConfig.size, testConfig.dimension);
160
- const queryVectors = generator(testConfig.queryCount, testConfig.dimension);
161
-
162
- // Create index and insert vectors
163
- const lists = getListCount(indexConfig, testConfig.size);
164
-
165
- await vectorDB.createIndex({
166
- indexName: testIndexName,
167
- dimension: testConfig.dimension,
168
- metric: 'cosine',
169
- indexConfig,
170
- buildIndex: indexType === 'ivfflat',
171
- });
172
-
173
- console.log(
174
- `Batched bulk upserting ${testVectors.length} ${distType} vectors into index ${testIndexName}`,
175
- );
176
- const batchSizes = splitIntoRandomBatches(testConfig.size, testConfig.dimension);
177
- await batchedBulkUpsert(vectorDB, testIndexName, testVectors, batchSizes);
178
- if (indexType === 'hnsw' || rebuild) {
179
- console.log('rebuilding index');
180
- await vectorDB.buildIndex({ indexName: testIndexName, metric: 'cosine', indexConfig });
181
- console.log('index rebuilt');
182
- }
183
- await smartWarmup(vectorDB, testIndexName, indexType, testConfig.dimension, testConfig.k);
184
-
185
- // For HNSW, test different EF values
186
- const efValues = indexType === 'hnsw' ? getSearchEf(testConfig.k, hnswConfig.m) : { default: undefined };
187
-
188
- for (const [efType, ef] of Object.entries(efValues)) {
189
- const recalls: number[] = [];
190
- const latencies: number[] = [];
191
-
192
- for (const queryVector of queryVectors) {
193
- const expectedNeighbors = findNearestBruteForce(queryVector, testVectors, testConfig.k);
194
-
195
- const [latency, actualResults] = await measureLatency(async () =>
196
- vectorDB.query({
197
- indexName: testIndexName,
198
- queryVector,
199
- topK: testConfig.k,
200
- ef, // For HNSW
201
- }),
202
- );
203
-
204
- const actualNeighbors = actualResults.map(r => r.metadata?.index);
205
- const recall = calculateRecall(actualNeighbors, expectedNeighbors, testConfig.k);
206
- recalls.push(recall);
207
- latencies.push(latency);
208
- }
209
-
210
- const sorted = [...latencies].sort((a, b) => a - b);
211
- results.push({
212
- distribution: distType,
213
- dimension: testConfig.dimension,
214
- size: testConfig.size,
215
- k: testConfig.k,
216
- type: indexType,
217
- metrics: {
218
- recall: recalls.length > 0 ? recalls.reduce((a, b) => a + b, 0) / recalls.length : 0,
219
- minRecall: Math.min(...recalls),
220
- maxRecall: Math.max(...recalls),
221
- latency: {
222
- p50: sorted[Math.floor(sorted.length * 0.5)],
223
- p95: sorted[Math.floor(sorted.length * 0.95)],
224
- ...(indexType === 'ivfflat' && {
225
- lists,
226
- vectorsPerList: Math.round(testConfig.size / (lists || 1)),
227
- }),
228
- ...(indexType === 'hnsw' && {
229
- m: hnswConfig.m,
230
- efConstruction: hnswConfig.efConstruction,
231
- ef,
232
- efType,
233
- }),
234
- },
235
- ...(indexType === 'ivfflat' && {
236
- clustering: {
237
- numLists: lists,
238
- avgVectorsPerList: testConfig.size / (lists || 1),
239
- recommendedLists: Math.floor(Math.sqrt(testConfig.size)),
240
- distribution: distType,
241
- },
242
- }),
243
- },
244
- });
245
- }
246
- },
247
- timeout,
248
- );
249
- }
250
- }
251
- });
252
- }
253
- });
254
-
255
- function analyzeResults(results: TestResult[]) {
256
- const byType = groupBy(results, (r: TestResult) => r.type);
257
- Object.entries(byType).forEach(([type, typeResults]) => {
258
- console.log(`\n=== ${type.toUpperCase()} Index Analysis ===\n`);
259
-
260
- const byDimension = groupBy(typeResults, (r: TestResult) => r.dimension.toString());
261
- Object.entries(byDimension).forEach(([dim, dimResults]) => {
262
- console.log(`\n--- Analysis for ${dim} dimensions ---\n`);
263
-
264
- // Combined Performance Analysis
265
- const columns = ['Distribution', 'Dataset Size', 'K'];
266
- if (type === 'hnsw') {
267
- columns.push('M', 'EF Construction', 'EF', 'EF Type');
268
- } else if (type === 'ivfflat') {
269
- columns.push('Lists', 'Vectors/List');
270
- }
271
- columns.push('Min Recall', 'Avg Recall', 'Max Recall', 'P50 (ms)', 'P95 (ms)');
272
-
273
- const performanceData = Object.values(
274
- groupBy(
275
- dimResults,
276
- (r: any) => `${r.size}-${r.k}-${type === 'ivfflat' ? r.metrics.latency.lists : r.metrics.latency.m}`,
277
- (results: any[]) => {
278
- const sortedResults = [...results].sort(
279
- (a, b) =>
280
- ['random', 'clustered', 'skewed', 'mixed'].indexOf(a.distribution) -
281
- ['random', 'clustered', 'skewed', 'mixed'].indexOf(b.distribution),
282
- );
283
- return sortedResults.map(result => ({
284
- Distribution: result.distribution,
285
- 'Dataset Size': result.size,
286
- K: result.k,
287
- ...(type === 'ivfflat'
288
- ? {
289
- Lists: result.metrics.latency.lists,
290
- 'Vectors/List': result.metrics.latency.vectorsPerList,
291
- }
292
- : {}),
293
- ...(type === 'hnsw'
294
- ? {
295
- M: result.metrics.latency.m,
296
- 'EF Construction': result.metrics.latency.efConstruction,
297
- EF: result.metrics.latency.ef,
298
- 'EF Type': result.metrics.latency.efType,
299
- }
300
- : {}),
301
- 'Min Recall': result.metrics.minRecall.toFixed(3),
302
- 'Avg Recall': result.metrics.recall.toFixed(3),
303
- 'Max Recall': result.metrics.maxRecall.toFixed(3),
304
- 'P50 (ms)': result.metrics.latency.p50.toFixed(2),
305
- 'P95 (ms)': result.metrics.latency.p95.toFixed(2),
306
- }));
307
- },
308
- ),
309
- ).flat();
310
-
311
- console.log(formatTable(performanceData, columns));
312
- });
313
- });
314
- }
315
-
316
- function splitIntoRandomBatches(total: number, dimension: number): number[] {
317
- const batches: number[] = [];
318
- let remaining = total;
319
-
320
- const batchRange = dimension === 1024 ? 5000 : 15000;
321
-
322
- while (remaining > 0) {
323
- const batchSize = Math.min(remaining, batchRange + Math.floor(Math.random() * batchRange));
324
- batches.push(batchSize);
325
- remaining -= batchSize;
326
- }
327
-
328
- return batches;
329
- }
330
-
331
- async function batchedBulkUpsert(
332
- vectorDB: PGPerformanceVector,
333
- testIndexName: string,
334
- vectors: number[][],
335
- batchSizes: number[],
336
- ) {
337
- let offset = 0;
338
- const vectorIds = vectors.map((_, idx) => `vec_${idx}`);
339
- const metadata = vectors.map((_, idx) => ({ index: idx }));
340
-
341
- for (const size of batchSizes) {
342
- const batch = vectors.slice(offset, offset + size);
343
- const batchIds = vectorIds.slice(offset, offset + size);
344
- const batchMetadata = metadata.slice(offset, offset + size);
345
- await vectorDB.bulkUpsert(testIndexName, batch, batchMetadata, batchIds);
346
- offset += size;
347
- console.log(`${offset} of ${vectors.length} vectors upserted`);
348
- }
349
- }
350
-
351
- const distributions = {
352
- random: generateRandomVectors,
353
- clustered: generateClusteredVectors,
354
- skewed: generateSkewedVectors,
355
- mixed: (size: number, dimension: number) => {
356
- const generators = [generateRandomVectors, generateClusteredVectors, generateSkewedVectors];
357
- const batchSizes = splitIntoRandomBatches(size, dimension);
358
-
359
- let vectors: number[][] = [];
360
- for (const batchSize of batchSizes) {
361
- const generator = generators[Math.floor(Math.random() * generators.length)];
362
- vectors = vectors.concat(generator(batchSize, dimension));
363
- }
364
-
365
- return vectors;
366
- },
367
- };
@@ -1,9 +0,0 @@
1
- {
2
- "extends": ["./tsconfig.json", "../../tsconfig.build.json"],
3
- "compilerOptions": {
4
- "outDir": "./dist",
5
- "rootDir": "./src"
6
- },
7
- "include": ["src/**/*"],
8
- "exclude": ["node_modules", "**/*.test.ts", "src/**/*.mock.ts"]
9
- }
package/tsconfig.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "extends": "../../tsconfig.node.json",
3
- "include": ["src/**/*", "tsup.config.ts"],
4
- "exclude": ["node_modules", "**/*.test.ts"]
5
- }
package/tsup.config.ts DELETED
@@ -1,17 +0,0 @@
1
- import { generateTypes } from '@internal/types-builder';
2
- import { defineConfig } from 'tsup';
3
-
4
- export default defineConfig({
5
- entry: ['src/index.ts'],
6
- format: ['esm', 'cjs'],
7
- clean: true,
8
- dts: false,
9
- splitting: true,
10
- treeshake: {
11
- preset: 'smallest',
12
- },
13
- sourcemap: true,
14
- onSuccess: async () => {
15
- await generateTypes(process.cwd());
16
- },
17
- });
package/vitest.config.ts DELETED
@@ -1,12 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- environment: 'node',
6
- include: ['src/**/*.test.ts'],
7
- exclude: ['src/**/*.performance.test.ts'],
8
- coverage: {
9
- reporter: ['text', 'json', 'html'],
10
- },
11
- },
12
- });
@@ -1,8 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- environment: 'node',
6
- include: ['src/**/*.performance.test.ts'],
7
- },
8
- });