@mastra/pg 0.0.0-commonjs-20250227130920

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,285 @@
1
+ import type {
2
+ BasicOperator,
3
+ NumericOperator,
4
+ ArrayOperator,
5
+ ElementOperator,
6
+ LogicalOperator,
7
+ RegexOperator,
8
+ Filter,
9
+ } from '@mastra/core/filter';
10
+
11
+ export type OperatorType =
12
+ | BasicOperator
13
+ | NumericOperator
14
+ | ArrayOperator
15
+ | ElementOperator
16
+ | LogicalOperator
17
+ | '$contains'
18
+ | Exclude<RegexOperator, '$options'>;
19
+
20
+ type FilterOperator = {
21
+ sql: string;
22
+ needsValue: boolean;
23
+ transformValue?: (value: any) => any;
24
+ };
25
+
26
+ type OperatorFn = (key: string, paramIndex: number, value?: any) => FilterOperator;
27
+
28
+ // Helper functions to create operators
29
+ const createBasicOperator = (symbol: string) => {
30
+ return (key: string, paramIndex: number) => ({
31
+ sql: `CASE
32
+ WHEN $${paramIndex}::text IS NULL THEN metadata#>>'{${handleKey(key)}}' IS ${symbol === '=' ? '' : 'NOT'} NULL
33
+ ELSE metadata#>>'{${handleKey(key)}}' ${symbol} $${paramIndex}::text
34
+ END`,
35
+ needsValue: true,
36
+ });
37
+ };
38
+
39
+ const createNumericOperator = (symbol: string) => {
40
+ return (key: string, paramIndex: number) => ({
41
+ sql: `(metadata#>>'{${handleKey(key)}}')::numeric ${symbol} $${paramIndex}`,
42
+ needsValue: true,
43
+ });
44
+ };
45
+
46
+ function buildElemMatchConditions(value: any, paramIndex: number): { sql: string; values: any[] } {
47
+ if (typeof value !== 'object' || Array.isArray(value)) {
48
+ throw new Error('$elemMatch requires an object with conditions');
49
+ }
50
+
51
+ const conditions: string[] = [];
52
+ const values: any[] = [];
53
+
54
+ Object.entries(value).forEach(([field, val]) => {
55
+ const nextParamIndex = paramIndex + values.length;
56
+
57
+ let paramOperator;
58
+ let paramKey;
59
+ let paramValue;
60
+
61
+ if (field.startsWith('$')) {
62
+ paramOperator = field;
63
+ paramKey = '';
64
+ paramValue = val;
65
+ } else if (typeof val === 'object' && !Array.isArray(val)) {
66
+ const [op, opValue] = Object.entries(val || {})[0] || [];
67
+ paramOperator = op;
68
+ paramKey = field;
69
+ paramValue = opValue;
70
+ } else {
71
+ paramOperator = '$eq';
72
+ paramKey = field;
73
+ paramValue = val;
74
+ }
75
+
76
+ const operatorFn = FILTER_OPERATORS[paramOperator as keyof typeof FILTER_OPERATORS];
77
+ if (!operatorFn) {
78
+ throw new Error(`Invalid operator: ${paramOperator}`);
79
+ }
80
+ const result = operatorFn(paramKey, nextParamIndex, paramValue);
81
+
82
+ const sql = result.sql.replaceAll('metadata#>>', 'elem#>>');
83
+ conditions.push(sql);
84
+ if (result.needsValue) {
85
+ values.push(paramValue);
86
+ }
87
+ });
88
+
89
+ return {
90
+ sql: conditions.join(' AND '),
91
+ values,
92
+ };
93
+ }
94
+
95
+ // Define all filter operators
96
+ export const FILTER_OPERATORS: Record<string, OperatorFn> = {
97
+ $eq: createBasicOperator('='),
98
+ $ne: createBasicOperator('!='),
99
+ $gt: createNumericOperator('>'),
100
+ $gte: createNumericOperator('>='),
101
+ $lt: createNumericOperator('<'),
102
+ $lte: createNumericOperator('<='),
103
+
104
+ // Array Operators
105
+ $in: (key, paramIndex) => ({
106
+ sql: `metadata#>>'{${handleKey(key)}}' = ANY($${paramIndex}::text[])`,
107
+ needsValue: true,
108
+ }),
109
+ $nin: (key, paramIndex) => ({
110
+ sql: `metadata#>>'{${handleKey(key)}}' != ALL($${paramIndex}::text[])`,
111
+ needsValue: true,
112
+ }),
113
+ $all: (key, paramIndex) => ({
114
+ sql: `CASE WHEN array_length($${paramIndex}::text[], 1) IS NULL THEN false
115
+ ELSE (metadata#>'{${handleKey(key)}}')::jsonb ?& $${paramIndex}::text[] END`,
116
+ needsValue: true,
117
+ }),
118
+ $elemMatch: (key: string, paramIndex: number, value: any): FilterOperator => {
119
+ const { sql, values } = buildElemMatchConditions(value, paramIndex);
120
+ return {
121
+ sql: `(
122
+ CASE
123
+ WHEN jsonb_typeof(metadata->'${handleKey(key)}') = 'array' THEN
124
+ EXISTS (
125
+ SELECT 1
126
+ FROM jsonb_array_elements(metadata->'${handleKey(key)}') as elem
127
+ WHERE ${sql}
128
+ )
129
+ ELSE FALSE
130
+ END
131
+ )`,
132
+ needsValue: true,
133
+ transformValue: () => values,
134
+ };
135
+ },
136
+ // Element Operators
137
+ $exists: key => ({
138
+ sql: `metadata ? '${key}'`,
139
+ needsValue: false,
140
+ }),
141
+
142
+ // Logical Operators
143
+ $and: key => ({ sql: `(${key})`, needsValue: false }),
144
+ $or: key => ({ sql: `(${key})`, needsValue: false }),
145
+ $not: key => ({ sql: `NOT (${key})`, needsValue: false }),
146
+ $nor: key => ({ sql: `NOT (${key})`, needsValue: false }),
147
+
148
+ // Regex Operators
149
+ $regex: (key, paramIndex) => ({
150
+ sql: `metadata#>>'{${handleKey(key)}}' ~ $${paramIndex}`,
151
+ needsValue: true,
152
+ }),
153
+
154
+ $contains: (key, paramIndex) => ({
155
+ sql: `metadata @> $${paramIndex}::jsonb`,
156
+ needsValue: true,
157
+ transformValue: value => {
158
+ const parts = key.split('.');
159
+ return JSON.stringify(parts.reduceRight((value, key) => ({ [key]: value }), value));
160
+ },
161
+ }),
162
+ $size: (key: string, paramIndex: number) => ({
163
+ sql: `(
164
+ CASE
165
+ WHEN jsonb_typeof(metadata#>'{${handleKey(key)}}') = 'array' THEN
166
+ jsonb_array_length(metadata#>'{${handleKey(key)}}') = $${paramIndex}
167
+ ELSE FALSE
168
+ END
169
+ )`,
170
+ needsValue: true,
171
+ }),
172
+ };
173
+
174
+ export interface FilterResult {
175
+ sql: string;
176
+ values: any[];
177
+ }
178
+
179
+ export const handleKey = (key: string) => {
180
+ return key.replace(/\./g, ',');
181
+ };
182
+
183
+ export function buildFilterQuery(filter: Filter, minScore: number): FilterResult {
184
+ const values = [minScore];
185
+
186
+ function buildCondition(key: string, value: any, parentPath: string): string {
187
+ // Handle logical operators ($and/$or)
188
+ if (['$and', '$or', '$not', '$nor'].includes(key)) {
189
+ return handleLogicalOperator(key as '$and' | '$or' | '$not' | '$nor', value, parentPath);
190
+ }
191
+
192
+ // If condition is not a FilterCondition object, assume it's an equality check
193
+ if (!value || typeof value !== 'object') {
194
+ values.push(value);
195
+ return `metadata#>>'{${handleKey(key)}}' = $${values.length}`;
196
+ }
197
+
198
+ // Handle operator conditions
199
+ const [[operator, operatorValue] = []] = Object.entries(value);
200
+
201
+ // Special handling for nested $not
202
+ if (operator === '$not') {
203
+ const entries = Object.entries(operatorValue as Record<string, unknown>);
204
+ const conditions = entries
205
+ .map(([nestedOp, nestedValue]) => {
206
+ if (!FILTER_OPERATORS[nestedOp as keyof typeof FILTER_OPERATORS]) {
207
+ throw new Error(`Invalid operator in $not condition: ${nestedOp}`);
208
+ }
209
+ const operatorFn = FILTER_OPERATORS[nestedOp]!;
210
+ const operatorResult = operatorFn(key, values.length + 1);
211
+ if (operatorResult.needsValue) {
212
+ values.push(nestedValue as number);
213
+ }
214
+ return operatorResult.sql;
215
+ })
216
+ .join(' AND ');
217
+
218
+ return `NOT (${conditions})`;
219
+ }
220
+ const operatorFn = FILTER_OPERATORS[operator as string]!;
221
+ const operatorResult = operatorFn(key, values.length + 1, operatorValue);
222
+ if (operatorResult.needsValue) {
223
+ const transformedValue = operatorResult.transformValue
224
+ ? operatorResult.transformValue(operatorValue)
225
+ : operatorValue;
226
+ if (Array.isArray(transformedValue) && operator === '$elemMatch') {
227
+ values.push(...transformedValue);
228
+ } else {
229
+ values.push(transformedValue);
230
+ }
231
+ }
232
+ return operatorResult.sql;
233
+ }
234
+
235
+ function handleLogicalOperator(key: '$and' | '$or' | '$not' | '$nor', value: Filter[], parentPath: string): string {
236
+ if (key === '$not') {
237
+ // For top-level $not
238
+ const entries = Object.entries(value);
239
+ const conditions = entries
240
+ .map(([fieldKey, fieldValue]) => buildCondition(fieldKey, fieldValue, key))
241
+ .join(' AND ');
242
+ return `NOT (${conditions})`;
243
+ }
244
+
245
+ // Handle empty conditions
246
+ if (!value || value.length === 0) {
247
+ switch (key) {
248
+ case '$and':
249
+ case '$nor':
250
+ return 'true'; // Empty $and/$nor match everything
251
+ case '$or':
252
+ return 'false'; // Empty $or matches nothing
253
+ default:
254
+ return 'true';
255
+ }
256
+ }
257
+
258
+ const joinOperator = key === '$or' || key === '$nor' ? 'OR' : 'AND';
259
+ const conditions = value.map((f: Filter) => {
260
+ const entries = Object.entries(f);
261
+ if (entries.length === 0) return '';
262
+
263
+ const [firstKey, firstValue] = entries[0] || [];
264
+ if (['$and', '$or', '$not', '$nor'].includes(firstKey as string)) {
265
+ return buildCondition(firstKey as string, firstValue, parentPath);
266
+ }
267
+ return entries.map(([k, v]) => buildCondition(k, v, parentPath)).join(` ${joinOperator} `);
268
+ });
269
+
270
+ const joined = conditions.join(` ${joinOperator} `);
271
+ const operatorFn = FILTER_OPERATORS[key]!;
272
+ return operatorFn(joined, 0, value).sql;
273
+ }
274
+
275
+ if (!filter) {
276
+ return { sql: '', values };
277
+ }
278
+
279
+ const conditions = Object.entries(filter)
280
+ .map(([key, value]) => buildCondition(key, value, ''))
281
+ .filter(Boolean)
282
+ .join(' AND ');
283
+
284
+ return { sql: conditions ? `WHERE ${conditions}` : '', values };
285
+ }
@@ -0,0 +1,16 @@
1
+ export type IndexType = 'ivfflat' | 'hnsw' | 'flat';
2
+
3
+ interface IVFConfig {
4
+ lists?: number;
5
+ }
6
+
7
+ interface HNSWConfig {
8
+ m?: number; // Max number of connections (default: 16)
9
+ efConstruction?: number; // Build-time complexity (default: 64)
10
+ }
11
+
12
+ export interface IndexConfig {
13
+ type?: IndexType;
14
+ ivf?: IVFConfig;
15
+ hnsw?: HNSWConfig;
16
+ }
@@ -0,0 +1,370 @@
1
+ import pg from 'pg';
2
+ import { describe, it, beforeAll, afterAll, beforeEach, afterEach } from 'vitest';
3
+
4
+ import type { TestConfig, TestResult } from './performance.helpers';
5
+ import {
6
+ baseTestConfigs,
7
+ calculateTimeout,
8
+ generateRandomVectors,
9
+ findNearestBruteForce,
10
+ calculateRecall,
11
+ formatTable,
12
+ groupBy,
13
+ measureLatency,
14
+ getListCount,
15
+ getSearchEf,
16
+ generateClusteredVectors,
17
+ generateSkewedVectors,
18
+ getHNSWConfig,
19
+ getIndexDescription,
20
+ } from './performance.helpers';
21
+ import type { IndexConfig, IndexType } from './types';
22
+
23
+ import { PgVector } from '.';
24
+
25
+ interface IndexTestConfig extends IndexConfig {
26
+ type: IndexType;
27
+ rebuild?: boolean;
28
+ }
29
+
30
+ class PGPerformanceVector extends PgVector {
31
+ private perfPool: pg.Pool;
32
+
33
+ constructor(connectionString: string) {
34
+ super(connectionString);
35
+
36
+ const basePool = new pg.Pool({
37
+ connectionString,
38
+ max: 20, // Maximum number of clients in the pool
39
+ idleTimeoutMillis: 30000, // Close idle connections after 30 seconds
40
+ connectionTimeoutMillis: 2000, // Fail fast if can't connect
41
+ });
42
+
43
+ this.perfPool = basePool;
44
+ }
45
+
46
+ async bulkUpsert(indexName: string, vectors: number[][], metadata?: any[], ids?: string[]) {
47
+ const client = await this.perfPool.connect();
48
+ try {
49
+ await client.query('BEGIN');
50
+ const vectorIds = ids || vectors.map(() => crypto.randomUUID());
51
+
52
+ // Same query structure as upsert, just using unnest for bulk operation
53
+ const query = `
54
+ INSERT INTO ${indexName} (vector_id, embedding, metadata)
55
+ SELECT * FROM unnest(
56
+ $1::text[],
57
+ $2::vector[],
58
+ $3::jsonb[]
59
+ )
60
+ ON CONFLICT (vector_id)
61
+ DO UPDATE SET
62
+ embedding = EXCLUDED.embedding,
63
+ metadata = EXCLUDED.metadata
64
+ RETURNING embedding::text
65
+ `;
66
+
67
+ // Same parameter structure as upsert, just as arrays
68
+ await client.query(query, [
69
+ vectorIds,
70
+ vectors.map(v => `[${v.join(',')}]`),
71
+ (metadata || vectors.map(() => ({}))).map(m => JSON.stringify(m)),
72
+ ]);
73
+ await client.query('COMMIT');
74
+ return vectorIds;
75
+ } catch (error) {
76
+ await client.query('ROLLBACK');
77
+ throw error;
78
+ } finally {
79
+ client.release();
80
+ }
81
+ }
82
+ }
83
+
84
+ const warmupCache = new Map<string, boolean>();
85
+ async function smartWarmup(
86
+ vectorDB: PGPerformanceVector,
87
+ testIndexName: string,
88
+ indexType: string,
89
+ dimension: number,
90
+ k: number,
91
+ ) {
92
+ const cacheKey = `${dimension}-${k}-${indexType}`;
93
+ if (!warmupCache.has(cacheKey)) {
94
+ console.log(`Warming up ${indexType} index for ${dimension}d vectors, k=${k}`);
95
+ const warmupVector = generateRandomVectors(1, dimension)[0] as number[];
96
+ await vectorDB.query(testIndexName, warmupVector, k);
97
+ warmupCache.set(cacheKey, true);
98
+ }
99
+ }
100
+
101
+ const connectionString = process.env.DB_URL || `postgresql://postgres:postgres@localhost:5435/mastra`;
102
+ describe('PostgreSQL Index Performance', () => {
103
+ let vectorDB: PGPerformanceVector;
104
+ const testIndexName = 'test_index_performance';
105
+ const results: TestResult[] = [];
106
+
107
+ const indexConfigs: IndexTestConfig[] = [
108
+ { type: 'flat' }, // Test flat/linear search as baseline
109
+ { type: 'ivfflat', ivf: { lists: 100 } }, // Test IVF with fixed lists
110
+ { type: 'ivfflat', rebuild: true }, // Test IVF with calculated lists and rebuild
111
+ { type: 'hnsw' }, // Test HNSW with default parameters
112
+ { type: 'hnsw', hnsw: { m: 16, efConstruction: 64 } }, // Test HNSW with custom parameters
113
+ ];
114
+ beforeAll(async () => {
115
+ // Initialize PGPerformanceVector
116
+ vectorDB = new PGPerformanceVector(connectionString);
117
+ });
118
+ beforeEach(async () => {
119
+ await vectorDB.deleteIndex(testIndexName);
120
+ });
121
+
122
+ afterEach(async () => {
123
+ await vectorDB.deleteIndex(testIndexName);
124
+ });
125
+
126
+ afterAll(async () => {
127
+ await vectorDB.disconnect();
128
+ analyzeResults(results);
129
+ });
130
+
131
+ // Combine all test configs
132
+ const allConfigs: TestConfig[] = [
133
+ ...baseTestConfigs['64'],
134
+ ...baseTestConfigs['384'],
135
+ ...baseTestConfigs['1024'],
136
+ ...baseTestConfigs.smokeTests,
137
+ ...baseTestConfigs.stressTests,
138
+ ];
139
+
140
+ // For each index config
141
+ for (const indexConfig of indexConfigs) {
142
+ const indexType = indexConfig.type;
143
+ const rebuild = indexConfig.rebuild ?? false;
144
+ const hnswConfig = getHNSWConfig(indexConfig);
145
+ const indexDescription = getIndexDescription({
146
+ type: indexType,
147
+ hnsw: hnswConfig,
148
+ });
149
+
150
+ describe(`Index: ${indexDescription}`, () => {
151
+ for (const testConfig of allConfigs) {
152
+ const timeout = calculateTimeout(testConfig.dimension, testConfig.size, testConfig.k);
153
+ const testDesc = `dim=${testConfig.dimension} size=${testConfig.size} k=${testConfig.k}`;
154
+
155
+ for (const [distType, generator] of Object.entries(distributions)) {
156
+ it(
157
+ testDesc,
158
+ async () => {
159
+ const testVectors = generator(testConfig.size, testConfig.dimension);
160
+ const queryVectors = generator(testConfig.queryCount, testConfig.dimension);
161
+
162
+ // Create index and insert vectors
163
+ const lists = getListCount(indexConfig, testConfig.size);
164
+
165
+ await vectorDB.createIndex(
166
+ testIndexName,
167
+ testConfig.dimension,
168
+ 'cosine',
169
+ indexConfig,
170
+ indexType === 'ivfflat',
171
+ );
172
+
173
+ console.log(
174
+ `Batched bulk upserting ${testVectors.length} ${distType} vectors into index ${testIndexName}`,
175
+ );
176
+ const batchSizes = splitIntoRandomBatches(testConfig.size, testConfig.dimension);
177
+ await batchedBulkUpsert(vectorDB, testIndexName, testVectors, batchSizes);
178
+ if (indexType === 'hnsw' || rebuild) {
179
+ console.log('rebuilding index');
180
+ await vectorDB.buildIndex(testIndexName, 'cosine', indexConfig);
181
+ console.log('index rebuilt');
182
+ }
183
+ await smartWarmup(vectorDB, testIndexName, indexType, testConfig.dimension, testConfig.k);
184
+
185
+ // For HNSW, test different EF values
186
+ const efValues = indexType === 'hnsw' ? getSearchEf(testConfig.k, hnswConfig.m) : { default: undefined };
187
+
188
+ for (const [efType, ef] of Object.entries(efValues)) {
189
+ const recalls: number[] = [];
190
+ const latencies: number[] = [];
191
+
192
+ for (const queryVector of queryVectors) {
193
+ const expectedNeighbors = findNearestBruteForce(queryVector, testVectors, testConfig.k);
194
+
195
+ const [latency, actualResults] = await measureLatency(async () =>
196
+ vectorDB.query(
197
+ testIndexName,
198
+ queryVector,
199
+ testConfig.k,
200
+ undefined,
201
+ false,
202
+ 0,
203
+ { ef }, // For HNSW
204
+ ),
205
+ );
206
+
207
+ const actualNeighbors = actualResults.map(r => r.metadata?.index);
208
+ const recall = calculateRecall(actualNeighbors, expectedNeighbors, testConfig.k);
209
+ recalls.push(recall);
210
+ latencies.push(latency);
211
+ }
212
+
213
+ const sorted = [...latencies].sort((a, b) => a - b);
214
+ results.push({
215
+ distribution: distType,
216
+ dimension: testConfig.dimension,
217
+ size: testConfig.size,
218
+ k: testConfig.k,
219
+ type: indexType,
220
+ metrics: {
221
+ recall: recalls.length > 0 ? recalls.reduce((a, b) => a + b, 0) / recalls.length : 0,
222
+ minRecall: Math.min(...recalls),
223
+ maxRecall: Math.max(...recalls),
224
+ latency: {
225
+ p50: sorted[Math.floor(sorted.length * 0.5)],
226
+ p95: sorted[Math.floor(sorted.length * 0.95)],
227
+ ...(indexType === 'ivfflat' && {
228
+ lists,
229
+ vectorsPerList: Math.round(testConfig.size / (lists || 1)),
230
+ }),
231
+ ...(indexType === 'hnsw' && {
232
+ m: hnswConfig.m,
233
+ efConstruction: hnswConfig.efConstruction,
234
+ ef,
235
+ efType,
236
+ }),
237
+ },
238
+ ...(indexType === 'ivfflat' && {
239
+ clustering: {
240
+ numLists: lists,
241
+ avgVectorsPerList: testConfig.size / (lists || 1),
242
+ recommendedLists: Math.floor(Math.sqrt(testConfig.size)),
243
+ distribution: distType,
244
+ },
245
+ }),
246
+ },
247
+ });
248
+ }
249
+ },
250
+ timeout,
251
+ );
252
+ }
253
+ }
254
+ });
255
+ }
256
+ });
257
+
258
+ function analyzeResults(results: TestResult[]) {
259
+ const byType = groupBy(results, (r: TestResult) => r.type);
260
+ Object.entries(byType).forEach(([type, typeResults]) => {
261
+ console.log(`\n=== ${type.toUpperCase()} Index Analysis ===\n`);
262
+
263
+ const byDimension = groupBy(typeResults, (r: TestResult) => r.dimension.toString());
264
+ Object.entries(byDimension).forEach(([dim, dimResults]) => {
265
+ console.log(`\n--- Analysis for ${dim} dimensions ---\n`);
266
+
267
+ // Combined Performance Analysis
268
+ const columns = ['Distribution', 'Dataset Size', 'K'];
269
+ if (type === 'hnsw') {
270
+ columns.push('M', 'EF Construction', 'EF', 'EF Type');
271
+ } else if (type === 'ivfflat') {
272
+ columns.push('Lists', 'Vectors/List');
273
+ }
274
+ columns.push('Min Recall', 'Avg Recall', 'Max Recall', 'P50 (ms)', 'P95 (ms)');
275
+
276
+ const performanceData = Object.values(
277
+ groupBy(
278
+ dimResults,
279
+ (r: any) => `${r.size}-${r.k}-${type === 'ivfflat' ? r.metrics.latency.lists : r.metrics.latency.m}`,
280
+ (results: any[]) => {
281
+ const sortedResults = [...results].sort(
282
+ (a, b) =>
283
+ ['random', 'clustered', 'skewed', 'mixed'].indexOf(a.distribution) -
284
+ ['random', 'clustered', 'skewed', 'mixed'].indexOf(b.distribution),
285
+ );
286
+ return sortedResults.map(result => ({
287
+ Distribution: result.distribution,
288
+ 'Dataset Size': result.size,
289
+ K: result.k,
290
+ ...(type === 'ivfflat'
291
+ ? {
292
+ Lists: result.metrics.latency.lists,
293
+ 'Vectors/List': result.metrics.latency.vectorsPerList,
294
+ }
295
+ : {}),
296
+ ...(type === 'hnsw'
297
+ ? {
298
+ M: result.metrics.latency.m,
299
+ 'EF Construction': result.metrics.latency.efConstruction,
300
+ EF: result.metrics.latency.ef,
301
+ 'EF Type': result.metrics.latency.efType,
302
+ }
303
+ : {}),
304
+ 'Min Recall': result.metrics.minRecall.toFixed(3),
305
+ 'Avg Recall': result.metrics.recall.toFixed(3),
306
+ 'Max Recall': result.metrics.maxRecall.toFixed(3),
307
+ 'P50 (ms)': result.metrics.latency.p50.toFixed(2),
308
+ 'P95 (ms)': result.metrics.latency.p95.toFixed(2),
309
+ }));
310
+ },
311
+ ),
312
+ ).flat();
313
+
314
+ console.log(formatTable(performanceData, columns));
315
+ });
316
+ });
317
+ }
318
+
319
+ function splitIntoRandomBatches(total: number, dimension: number): number[] {
320
+ const batches: number[] = [];
321
+ let remaining = total;
322
+
323
+ const batchRange = dimension === 1024 ? 5000 : 15000;
324
+
325
+ while (remaining > 0) {
326
+ const batchSize = Math.min(remaining, batchRange + Math.floor(Math.random() * batchRange));
327
+ batches.push(batchSize);
328
+ remaining -= batchSize;
329
+ }
330
+
331
+ return batches;
332
+ }
333
+
334
+ async function batchedBulkUpsert(
335
+ vectorDB: PGPerformanceVector,
336
+ testIndexName: string,
337
+ vectors: number[][],
338
+ batchSizes: number[],
339
+ ) {
340
+ let offset = 0;
341
+ const vectorIds = vectors.map((_, idx) => `vec_${idx}`);
342
+ const metadata = vectors.map((_, idx) => ({ index: idx }));
343
+
344
+ for (const size of batchSizes) {
345
+ const batch = vectors.slice(offset, offset + size);
346
+ const batchIds = vectorIds.slice(offset, offset + size);
347
+ const batchMetadata = metadata.slice(offset, offset + size);
348
+ await vectorDB.bulkUpsert(testIndexName, batch, batchMetadata, batchIds);
349
+ offset += size;
350
+ console.log(`${offset} of ${vectors.length} vectors upserted`);
351
+ }
352
+ }
353
+
354
+ const distributions = {
355
+ random: generateRandomVectors,
356
+ clustered: generateClusteredVectors,
357
+ skewed: generateSkewedVectors,
358
+ mixed: (size: number, dimension: number) => {
359
+ const generators = [generateRandomVectors, generateClusteredVectors, generateSkewedVectors];
360
+ const batchSizes = splitIntoRandomBatches(size, dimension);
361
+
362
+ let vectors: number[][] = [];
363
+ for (const batchSize of batchSizes) {
364
+ const generator = generators[Math.floor(Math.random() * generators.length)];
365
+ vectors = vectors.concat(generator(batchSize, dimension));
366
+ }
367
+
368
+ return vectors;
369
+ },
370
+ };
package/tsconfig.json ADDED
@@ -0,0 +1,5 @@
1
+ {
2
+ "extends": "../../tsconfig.node.json",
3
+ "include": ["src/**/*"],
4
+ "exclude": ["node_modules", "**/*.test.ts"]
5
+ }
@@ -0,0 +1,12 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ environment: 'node',
6
+ include: ['src/**/*.test.ts'],
7
+ exclude: ['src/**/*.performance.test.ts'],
8
+ coverage: {
9
+ reporter: ['text', 'json', 'html'],
10
+ },
11
+ },
12
+ });
@@ -0,0 +1,8 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ environment: 'node',
6
+ include: ['src/**/*.performance.test.ts'],
7
+ },
8
+ });