@mastra/pg 0.0.0-commonjs-20250227130920
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +23 -0
- package/CHANGELOG.md +503 -0
- package/LICENSE +44 -0
- package/README.md +161 -0
- package/dist/_tsup-dts-rollup.d.cts +304 -0
- package/dist/_tsup-dts-rollup.d.ts +304 -0
- package/dist/index.cjs +1043 -0
- package/dist/index.d.cts +4 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +1035 -0
- package/docker-compose.perf.yaml +21 -0
- package/docker-compose.yaml +14 -0
- package/eslint.config.js +6 -0
- package/package.json +50 -0
- package/src/index.ts +2 -0
- package/src/storage/index.test.ts +380 -0
- package/src/storage/index.ts +592 -0
- package/src/vector/filter.test.ts +967 -0
- package/src/vector/filter.ts +107 -0
- package/src/vector/index.test.ts +1302 -0
- package/src/vector/index.ts +391 -0
- package/src/vector/performance.helpers.ts +286 -0
- package/src/vector/sql-builder.ts +285 -0
- package/src/vector/types.ts +16 -0
- package/src/vector/vector.performance.test.ts +370 -0
- package/tsconfig.json +5 -0
- package/vitest.config.ts +12 -0
- package/vitest.perf.config.ts +8 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
BasicOperator,
|
|
3
|
+
NumericOperator,
|
|
4
|
+
ArrayOperator,
|
|
5
|
+
ElementOperator,
|
|
6
|
+
LogicalOperator,
|
|
7
|
+
RegexOperator,
|
|
8
|
+
Filter,
|
|
9
|
+
} from '@mastra/core/filter';
|
|
10
|
+
|
|
11
|
+
export type OperatorType =
|
|
12
|
+
| BasicOperator
|
|
13
|
+
| NumericOperator
|
|
14
|
+
| ArrayOperator
|
|
15
|
+
| ElementOperator
|
|
16
|
+
| LogicalOperator
|
|
17
|
+
| '$contains'
|
|
18
|
+
| Exclude<RegexOperator, '$options'>;
|
|
19
|
+
|
|
20
|
+
type FilterOperator = {
|
|
21
|
+
sql: string;
|
|
22
|
+
needsValue: boolean;
|
|
23
|
+
transformValue?: (value: any) => any;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
type OperatorFn = (key: string, paramIndex: number, value?: any) => FilterOperator;
|
|
27
|
+
|
|
28
|
+
// Helper functions to create operators
|
|
29
|
+
const createBasicOperator = (symbol: string) => {
|
|
30
|
+
return (key: string, paramIndex: number) => ({
|
|
31
|
+
sql: `CASE
|
|
32
|
+
WHEN $${paramIndex}::text IS NULL THEN metadata#>>'{${handleKey(key)}}' IS ${symbol === '=' ? '' : 'NOT'} NULL
|
|
33
|
+
ELSE metadata#>>'{${handleKey(key)}}' ${symbol} $${paramIndex}::text
|
|
34
|
+
END`,
|
|
35
|
+
needsValue: true,
|
|
36
|
+
});
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const createNumericOperator = (symbol: string) => {
|
|
40
|
+
return (key: string, paramIndex: number) => ({
|
|
41
|
+
sql: `(metadata#>>'{${handleKey(key)}}')::numeric ${symbol} $${paramIndex}`,
|
|
42
|
+
needsValue: true,
|
|
43
|
+
});
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
function buildElemMatchConditions(value: any, paramIndex: number): { sql: string; values: any[] } {
|
|
47
|
+
if (typeof value !== 'object' || Array.isArray(value)) {
|
|
48
|
+
throw new Error('$elemMatch requires an object with conditions');
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const conditions: string[] = [];
|
|
52
|
+
const values: any[] = [];
|
|
53
|
+
|
|
54
|
+
Object.entries(value).forEach(([field, val]) => {
|
|
55
|
+
const nextParamIndex = paramIndex + values.length;
|
|
56
|
+
|
|
57
|
+
let paramOperator;
|
|
58
|
+
let paramKey;
|
|
59
|
+
let paramValue;
|
|
60
|
+
|
|
61
|
+
if (field.startsWith('$')) {
|
|
62
|
+
paramOperator = field;
|
|
63
|
+
paramKey = '';
|
|
64
|
+
paramValue = val;
|
|
65
|
+
} else if (typeof val === 'object' && !Array.isArray(val)) {
|
|
66
|
+
const [op, opValue] = Object.entries(val || {})[0] || [];
|
|
67
|
+
paramOperator = op;
|
|
68
|
+
paramKey = field;
|
|
69
|
+
paramValue = opValue;
|
|
70
|
+
} else {
|
|
71
|
+
paramOperator = '$eq';
|
|
72
|
+
paramKey = field;
|
|
73
|
+
paramValue = val;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const operatorFn = FILTER_OPERATORS[paramOperator as keyof typeof FILTER_OPERATORS];
|
|
77
|
+
if (!operatorFn) {
|
|
78
|
+
throw new Error(`Invalid operator: ${paramOperator}`);
|
|
79
|
+
}
|
|
80
|
+
const result = operatorFn(paramKey, nextParamIndex, paramValue);
|
|
81
|
+
|
|
82
|
+
const sql = result.sql.replaceAll('metadata#>>', 'elem#>>');
|
|
83
|
+
conditions.push(sql);
|
|
84
|
+
if (result.needsValue) {
|
|
85
|
+
values.push(paramValue);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
sql: conditions.join(' AND '),
|
|
91
|
+
values,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Define all filter operators
|
|
96
|
+
export const FILTER_OPERATORS: Record<string, OperatorFn> = {
|
|
97
|
+
$eq: createBasicOperator('='),
|
|
98
|
+
$ne: createBasicOperator('!='),
|
|
99
|
+
$gt: createNumericOperator('>'),
|
|
100
|
+
$gte: createNumericOperator('>='),
|
|
101
|
+
$lt: createNumericOperator('<'),
|
|
102
|
+
$lte: createNumericOperator('<='),
|
|
103
|
+
|
|
104
|
+
// Array Operators
|
|
105
|
+
$in: (key, paramIndex) => ({
|
|
106
|
+
sql: `metadata#>>'{${handleKey(key)}}' = ANY($${paramIndex}::text[])`,
|
|
107
|
+
needsValue: true,
|
|
108
|
+
}),
|
|
109
|
+
$nin: (key, paramIndex) => ({
|
|
110
|
+
sql: `metadata#>>'{${handleKey(key)}}' != ALL($${paramIndex}::text[])`,
|
|
111
|
+
needsValue: true,
|
|
112
|
+
}),
|
|
113
|
+
$all: (key, paramIndex) => ({
|
|
114
|
+
sql: `CASE WHEN array_length($${paramIndex}::text[], 1) IS NULL THEN false
|
|
115
|
+
ELSE (metadata#>'{${handleKey(key)}}')::jsonb ?& $${paramIndex}::text[] END`,
|
|
116
|
+
needsValue: true,
|
|
117
|
+
}),
|
|
118
|
+
$elemMatch: (key: string, paramIndex: number, value: any): FilterOperator => {
|
|
119
|
+
const { sql, values } = buildElemMatchConditions(value, paramIndex);
|
|
120
|
+
return {
|
|
121
|
+
sql: `(
|
|
122
|
+
CASE
|
|
123
|
+
WHEN jsonb_typeof(metadata->'${handleKey(key)}') = 'array' THEN
|
|
124
|
+
EXISTS (
|
|
125
|
+
SELECT 1
|
|
126
|
+
FROM jsonb_array_elements(metadata->'${handleKey(key)}') as elem
|
|
127
|
+
WHERE ${sql}
|
|
128
|
+
)
|
|
129
|
+
ELSE FALSE
|
|
130
|
+
END
|
|
131
|
+
)`,
|
|
132
|
+
needsValue: true,
|
|
133
|
+
transformValue: () => values,
|
|
134
|
+
};
|
|
135
|
+
},
|
|
136
|
+
// Element Operators
|
|
137
|
+
$exists: key => ({
|
|
138
|
+
sql: `metadata ? '${key}'`,
|
|
139
|
+
needsValue: false,
|
|
140
|
+
}),
|
|
141
|
+
|
|
142
|
+
// Logical Operators
|
|
143
|
+
$and: key => ({ sql: `(${key})`, needsValue: false }),
|
|
144
|
+
$or: key => ({ sql: `(${key})`, needsValue: false }),
|
|
145
|
+
$not: key => ({ sql: `NOT (${key})`, needsValue: false }),
|
|
146
|
+
$nor: key => ({ sql: `NOT (${key})`, needsValue: false }),
|
|
147
|
+
|
|
148
|
+
// Regex Operators
|
|
149
|
+
$regex: (key, paramIndex) => ({
|
|
150
|
+
sql: `metadata#>>'{${handleKey(key)}}' ~ $${paramIndex}`,
|
|
151
|
+
needsValue: true,
|
|
152
|
+
}),
|
|
153
|
+
|
|
154
|
+
$contains: (key, paramIndex) => ({
|
|
155
|
+
sql: `metadata @> $${paramIndex}::jsonb`,
|
|
156
|
+
needsValue: true,
|
|
157
|
+
transformValue: value => {
|
|
158
|
+
const parts = key.split('.');
|
|
159
|
+
return JSON.stringify(parts.reduceRight((value, key) => ({ [key]: value }), value));
|
|
160
|
+
},
|
|
161
|
+
}),
|
|
162
|
+
$size: (key: string, paramIndex: number) => ({
|
|
163
|
+
sql: `(
|
|
164
|
+
CASE
|
|
165
|
+
WHEN jsonb_typeof(metadata#>'{${handleKey(key)}}') = 'array' THEN
|
|
166
|
+
jsonb_array_length(metadata#>'{${handleKey(key)}}') = $${paramIndex}
|
|
167
|
+
ELSE FALSE
|
|
168
|
+
END
|
|
169
|
+
)`,
|
|
170
|
+
needsValue: true,
|
|
171
|
+
}),
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
export interface FilterResult {
|
|
175
|
+
sql: string;
|
|
176
|
+
values: any[];
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export const handleKey = (key: string) => {
|
|
180
|
+
return key.replace(/\./g, ',');
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
export function buildFilterQuery(filter: Filter, minScore: number): FilterResult {
|
|
184
|
+
const values = [minScore];
|
|
185
|
+
|
|
186
|
+
function buildCondition(key: string, value: any, parentPath: string): string {
|
|
187
|
+
// Handle logical operators ($and/$or)
|
|
188
|
+
if (['$and', '$or', '$not', '$nor'].includes(key)) {
|
|
189
|
+
return handleLogicalOperator(key as '$and' | '$or' | '$not' | '$nor', value, parentPath);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// If condition is not a FilterCondition object, assume it's an equality check
|
|
193
|
+
if (!value || typeof value !== 'object') {
|
|
194
|
+
values.push(value);
|
|
195
|
+
return `metadata#>>'{${handleKey(key)}}' = $${values.length}`;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Handle operator conditions
|
|
199
|
+
const [[operator, operatorValue] = []] = Object.entries(value);
|
|
200
|
+
|
|
201
|
+
// Special handling for nested $not
|
|
202
|
+
if (operator === '$not') {
|
|
203
|
+
const entries = Object.entries(operatorValue as Record<string, unknown>);
|
|
204
|
+
const conditions = entries
|
|
205
|
+
.map(([nestedOp, nestedValue]) => {
|
|
206
|
+
if (!FILTER_OPERATORS[nestedOp as keyof typeof FILTER_OPERATORS]) {
|
|
207
|
+
throw new Error(`Invalid operator in $not condition: ${nestedOp}`);
|
|
208
|
+
}
|
|
209
|
+
const operatorFn = FILTER_OPERATORS[nestedOp]!;
|
|
210
|
+
const operatorResult = operatorFn(key, values.length + 1);
|
|
211
|
+
if (operatorResult.needsValue) {
|
|
212
|
+
values.push(nestedValue as number);
|
|
213
|
+
}
|
|
214
|
+
return operatorResult.sql;
|
|
215
|
+
})
|
|
216
|
+
.join(' AND ');
|
|
217
|
+
|
|
218
|
+
return `NOT (${conditions})`;
|
|
219
|
+
}
|
|
220
|
+
const operatorFn = FILTER_OPERATORS[operator as string]!;
|
|
221
|
+
const operatorResult = operatorFn(key, values.length + 1, operatorValue);
|
|
222
|
+
if (operatorResult.needsValue) {
|
|
223
|
+
const transformedValue = operatorResult.transformValue
|
|
224
|
+
? operatorResult.transformValue(operatorValue)
|
|
225
|
+
: operatorValue;
|
|
226
|
+
if (Array.isArray(transformedValue) && operator === '$elemMatch') {
|
|
227
|
+
values.push(...transformedValue);
|
|
228
|
+
} else {
|
|
229
|
+
values.push(transformedValue);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
return operatorResult.sql;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function handleLogicalOperator(key: '$and' | '$or' | '$not' | '$nor', value: Filter[], parentPath: string): string {
|
|
236
|
+
if (key === '$not') {
|
|
237
|
+
// For top-level $not
|
|
238
|
+
const entries = Object.entries(value);
|
|
239
|
+
const conditions = entries
|
|
240
|
+
.map(([fieldKey, fieldValue]) => buildCondition(fieldKey, fieldValue, key))
|
|
241
|
+
.join(' AND ');
|
|
242
|
+
return `NOT (${conditions})`;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Handle empty conditions
|
|
246
|
+
if (!value || value.length === 0) {
|
|
247
|
+
switch (key) {
|
|
248
|
+
case '$and':
|
|
249
|
+
case '$nor':
|
|
250
|
+
return 'true'; // Empty $and/$nor match everything
|
|
251
|
+
case '$or':
|
|
252
|
+
return 'false'; // Empty $or matches nothing
|
|
253
|
+
default:
|
|
254
|
+
return 'true';
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const joinOperator = key === '$or' || key === '$nor' ? 'OR' : 'AND';
|
|
259
|
+
const conditions = value.map((f: Filter) => {
|
|
260
|
+
const entries = Object.entries(f);
|
|
261
|
+
if (entries.length === 0) return '';
|
|
262
|
+
|
|
263
|
+
const [firstKey, firstValue] = entries[0] || [];
|
|
264
|
+
if (['$and', '$or', '$not', '$nor'].includes(firstKey as string)) {
|
|
265
|
+
return buildCondition(firstKey as string, firstValue, parentPath);
|
|
266
|
+
}
|
|
267
|
+
return entries.map(([k, v]) => buildCondition(k, v, parentPath)).join(` ${joinOperator} `);
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
const joined = conditions.join(` ${joinOperator} `);
|
|
271
|
+
const operatorFn = FILTER_OPERATORS[key]!;
|
|
272
|
+
return operatorFn(joined, 0, value).sql;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (!filter) {
|
|
276
|
+
return { sql: '', values };
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const conditions = Object.entries(filter)
|
|
280
|
+
.map(([key, value]) => buildCondition(key, value, ''))
|
|
281
|
+
.filter(Boolean)
|
|
282
|
+
.join(' AND ');
|
|
283
|
+
|
|
284
|
+
return { sql: conditions ? `WHERE ${conditions}` : '', values };
|
|
285
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export type IndexType = 'ivfflat' | 'hnsw' | 'flat';
|
|
2
|
+
|
|
3
|
+
interface IVFConfig {
|
|
4
|
+
lists?: number;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
interface HNSWConfig {
|
|
8
|
+
m?: number; // Max number of connections (default: 16)
|
|
9
|
+
efConstruction?: number; // Build-time complexity (default: 64)
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface IndexConfig {
|
|
13
|
+
type?: IndexType;
|
|
14
|
+
ivf?: IVFConfig;
|
|
15
|
+
hnsw?: HNSWConfig;
|
|
16
|
+
}
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
import pg from 'pg';
|
|
2
|
+
import { describe, it, beforeAll, afterAll, beforeEach, afterEach } from 'vitest';
|
|
3
|
+
|
|
4
|
+
import type { TestConfig, TestResult } from './performance.helpers';
|
|
5
|
+
import {
|
|
6
|
+
baseTestConfigs,
|
|
7
|
+
calculateTimeout,
|
|
8
|
+
generateRandomVectors,
|
|
9
|
+
findNearestBruteForce,
|
|
10
|
+
calculateRecall,
|
|
11
|
+
formatTable,
|
|
12
|
+
groupBy,
|
|
13
|
+
measureLatency,
|
|
14
|
+
getListCount,
|
|
15
|
+
getSearchEf,
|
|
16
|
+
generateClusteredVectors,
|
|
17
|
+
generateSkewedVectors,
|
|
18
|
+
getHNSWConfig,
|
|
19
|
+
getIndexDescription,
|
|
20
|
+
} from './performance.helpers';
|
|
21
|
+
import type { IndexConfig, IndexType } from './types';
|
|
22
|
+
|
|
23
|
+
import { PgVector } from '.';
|
|
24
|
+
|
|
25
|
+
interface IndexTestConfig extends IndexConfig {
|
|
26
|
+
type: IndexType;
|
|
27
|
+
rebuild?: boolean;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
class PGPerformanceVector extends PgVector {
|
|
31
|
+
private perfPool: pg.Pool;
|
|
32
|
+
|
|
33
|
+
constructor(connectionString: string) {
|
|
34
|
+
super(connectionString);
|
|
35
|
+
|
|
36
|
+
const basePool = new pg.Pool({
|
|
37
|
+
connectionString,
|
|
38
|
+
max: 20, // Maximum number of clients in the pool
|
|
39
|
+
idleTimeoutMillis: 30000, // Close idle connections after 30 seconds
|
|
40
|
+
connectionTimeoutMillis: 2000, // Fail fast if can't connect
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
this.perfPool = basePool;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async bulkUpsert(indexName: string, vectors: number[][], metadata?: any[], ids?: string[]) {
|
|
47
|
+
const client = await this.perfPool.connect();
|
|
48
|
+
try {
|
|
49
|
+
await client.query('BEGIN');
|
|
50
|
+
const vectorIds = ids || vectors.map(() => crypto.randomUUID());
|
|
51
|
+
|
|
52
|
+
// Same query structure as upsert, just using unnest for bulk operation
|
|
53
|
+
const query = `
|
|
54
|
+
INSERT INTO ${indexName} (vector_id, embedding, metadata)
|
|
55
|
+
SELECT * FROM unnest(
|
|
56
|
+
$1::text[],
|
|
57
|
+
$2::vector[],
|
|
58
|
+
$3::jsonb[]
|
|
59
|
+
)
|
|
60
|
+
ON CONFLICT (vector_id)
|
|
61
|
+
DO UPDATE SET
|
|
62
|
+
embedding = EXCLUDED.embedding,
|
|
63
|
+
metadata = EXCLUDED.metadata
|
|
64
|
+
RETURNING embedding::text
|
|
65
|
+
`;
|
|
66
|
+
|
|
67
|
+
// Same parameter structure as upsert, just as arrays
|
|
68
|
+
await client.query(query, [
|
|
69
|
+
vectorIds,
|
|
70
|
+
vectors.map(v => `[${v.join(',')}]`),
|
|
71
|
+
(metadata || vectors.map(() => ({}))).map(m => JSON.stringify(m)),
|
|
72
|
+
]);
|
|
73
|
+
await client.query('COMMIT');
|
|
74
|
+
return vectorIds;
|
|
75
|
+
} catch (error) {
|
|
76
|
+
await client.query('ROLLBACK');
|
|
77
|
+
throw error;
|
|
78
|
+
} finally {
|
|
79
|
+
client.release();
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const warmupCache = new Map<string, boolean>();
|
|
85
|
+
async function smartWarmup(
|
|
86
|
+
vectorDB: PGPerformanceVector,
|
|
87
|
+
testIndexName: string,
|
|
88
|
+
indexType: string,
|
|
89
|
+
dimension: number,
|
|
90
|
+
k: number,
|
|
91
|
+
) {
|
|
92
|
+
const cacheKey = `${dimension}-${k}-${indexType}`;
|
|
93
|
+
if (!warmupCache.has(cacheKey)) {
|
|
94
|
+
console.log(`Warming up ${indexType} index for ${dimension}d vectors, k=${k}`);
|
|
95
|
+
const warmupVector = generateRandomVectors(1, dimension)[0] as number[];
|
|
96
|
+
await vectorDB.query(testIndexName, warmupVector, k);
|
|
97
|
+
warmupCache.set(cacheKey, true);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const connectionString = process.env.DB_URL || `postgresql://postgres:postgres@localhost:5435/mastra`;
|
|
102
|
+
describe('PostgreSQL Index Performance', () => {
|
|
103
|
+
let vectorDB: PGPerformanceVector;
|
|
104
|
+
const testIndexName = 'test_index_performance';
|
|
105
|
+
const results: TestResult[] = [];
|
|
106
|
+
|
|
107
|
+
const indexConfigs: IndexTestConfig[] = [
|
|
108
|
+
{ type: 'flat' }, // Test flat/linear search as baseline
|
|
109
|
+
{ type: 'ivfflat', ivf: { lists: 100 } }, // Test IVF with fixed lists
|
|
110
|
+
{ type: 'ivfflat', rebuild: true }, // Test IVF with calculated lists and rebuild
|
|
111
|
+
{ type: 'hnsw' }, // Test HNSW with default parameters
|
|
112
|
+
{ type: 'hnsw', hnsw: { m: 16, efConstruction: 64 } }, // Test HNSW with custom parameters
|
|
113
|
+
];
|
|
114
|
+
beforeAll(async () => {
|
|
115
|
+
// Initialize PGPerformanceVector
|
|
116
|
+
vectorDB = new PGPerformanceVector(connectionString);
|
|
117
|
+
});
|
|
118
|
+
beforeEach(async () => {
|
|
119
|
+
await vectorDB.deleteIndex(testIndexName);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
afterEach(async () => {
|
|
123
|
+
await vectorDB.deleteIndex(testIndexName);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
afterAll(async () => {
|
|
127
|
+
await vectorDB.disconnect();
|
|
128
|
+
analyzeResults(results);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
// Combine all test configs
|
|
132
|
+
const allConfigs: TestConfig[] = [
|
|
133
|
+
...baseTestConfigs['64'],
|
|
134
|
+
...baseTestConfigs['384'],
|
|
135
|
+
...baseTestConfigs['1024'],
|
|
136
|
+
...baseTestConfigs.smokeTests,
|
|
137
|
+
...baseTestConfigs.stressTests,
|
|
138
|
+
];
|
|
139
|
+
|
|
140
|
+
// For each index config
|
|
141
|
+
for (const indexConfig of indexConfigs) {
|
|
142
|
+
const indexType = indexConfig.type;
|
|
143
|
+
const rebuild = indexConfig.rebuild ?? false;
|
|
144
|
+
const hnswConfig = getHNSWConfig(indexConfig);
|
|
145
|
+
const indexDescription = getIndexDescription({
|
|
146
|
+
type: indexType,
|
|
147
|
+
hnsw: hnswConfig,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
describe(`Index: ${indexDescription}`, () => {
|
|
151
|
+
for (const testConfig of allConfigs) {
|
|
152
|
+
const timeout = calculateTimeout(testConfig.dimension, testConfig.size, testConfig.k);
|
|
153
|
+
const testDesc = `dim=${testConfig.dimension} size=${testConfig.size} k=${testConfig.k}`;
|
|
154
|
+
|
|
155
|
+
for (const [distType, generator] of Object.entries(distributions)) {
|
|
156
|
+
it(
|
|
157
|
+
testDesc,
|
|
158
|
+
async () => {
|
|
159
|
+
const testVectors = generator(testConfig.size, testConfig.dimension);
|
|
160
|
+
const queryVectors = generator(testConfig.queryCount, testConfig.dimension);
|
|
161
|
+
|
|
162
|
+
// Create index and insert vectors
|
|
163
|
+
const lists = getListCount(indexConfig, testConfig.size);
|
|
164
|
+
|
|
165
|
+
await vectorDB.createIndex(
|
|
166
|
+
testIndexName,
|
|
167
|
+
testConfig.dimension,
|
|
168
|
+
'cosine',
|
|
169
|
+
indexConfig,
|
|
170
|
+
indexType === 'ivfflat',
|
|
171
|
+
);
|
|
172
|
+
|
|
173
|
+
console.log(
|
|
174
|
+
`Batched bulk upserting ${testVectors.length} ${distType} vectors into index ${testIndexName}`,
|
|
175
|
+
);
|
|
176
|
+
const batchSizes = splitIntoRandomBatches(testConfig.size, testConfig.dimension);
|
|
177
|
+
await batchedBulkUpsert(vectorDB, testIndexName, testVectors, batchSizes);
|
|
178
|
+
if (indexType === 'hnsw' || rebuild) {
|
|
179
|
+
console.log('rebuilding index');
|
|
180
|
+
await vectorDB.buildIndex(testIndexName, 'cosine', indexConfig);
|
|
181
|
+
console.log('index rebuilt');
|
|
182
|
+
}
|
|
183
|
+
await smartWarmup(vectorDB, testIndexName, indexType, testConfig.dimension, testConfig.k);
|
|
184
|
+
|
|
185
|
+
// For HNSW, test different EF values
|
|
186
|
+
const efValues = indexType === 'hnsw' ? getSearchEf(testConfig.k, hnswConfig.m) : { default: undefined };
|
|
187
|
+
|
|
188
|
+
for (const [efType, ef] of Object.entries(efValues)) {
|
|
189
|
+
const recalls: number[] = [];
|
|
190
|
+
const latencies: number[] = [];
|
|
191
|
+
|
|
192
|
+
for (const queryVector of queryVectors) {
|
|
193
|
+
const expectedNeighbors = findNearestBruteForce(queryVector, testVectors, testConfig.k);
|
|
194
|
+
|
|
195
|
+
const [latency, actualResults] = await measureLatency(async () =>
|
|
196
|
+
vectorDB.query(
|
|
197
|
+
testIndexName,
|
|
198
|
+
queryVector,
|
|
199
|
+
testConfig.k,
|
|
200
|
+
undefined,
|
|
201
|
+
false,
|
|
202
|
+
0,
|
|
203
|
+
{ ef }, // For HNSW
|
|
204
|
+
),
|
|
205
|
+
);
|
|
206
|
+
|
|
207
|
+
const actualNeighbors = actualResults.map(r => r.metadata?.index);
|
|
208
|
+
const recall = calculateRecall(actualNeighbors, expectedNeighbors, testConfig.k);
|
|
209
|
+
recalls.push(recall);
|
|
210
|
+
latencies.push(latency);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const sorted = [...latencies].sort((a, b) => a - b);
|
|
214
|
+
results.push({
|
|
215
|
+
distribution: distType,
|
|
216
|
+
dimension: testConfig.dimension,
|
|
217
|
+
size: testConfig.size,
|
|
218
|
+
k: testConfig.k,
|
|
219
|
+
type: indexType,
|
|
220
|
+
metrics: {
|
|
221
|
+
recall: recalls.length > 0 ? recalls.reduce((a, b) => a + b, 0) / recalls.length : 0,
|
|
222
|
+
minRecall: Math.min(...recalls),
|
|
223
|
+
maxRecall: Math.max(...recalls),
|
|
224
|
+
latency: {
|
|
225
|
+
p50: sorted[Math.floor(sorted.length * 0.5)],
|
|
226
|
+
p95: sorted[Math.floor(sorted.length * 0.95)],
|
|
227
|
+
...(indexType === 'ivfflat' && {
|
|
228
|
+
lists,
|
|
229
|
+
vectorsPerList: Math.round(testConfig.size / (lists || 1)),
|
|
230
|
+
}),
|
|
231
|
+
...(indexType === 'hnsw' && {
|
|
232
|
+
m: hnswConfig.m,
|
|
233
|
+
efConstruction: hnswConfig.efConstruction,
|
|
234
|
+
ef,
|
|
235
|
+
efType,
|
|
236
|
+
}),
|
|
237
|
+
},
|
|
238
|
+
...(indexType === 'ivfflat' && {
|
|
239
|
+
clustering: {
|
|
240
|
+
numLists: lists,
|
|
241
|
+
avgVectorsPerList: testConfig.size / (lists || 1),
|
|
242
|
+
recommendedLists: Math.floor(Math.sqrt(testConfig.size)),
|
|
243
|
+
distribution: distType,
|
|
244
|
+
},
|
|
245
|
+
}),
|
|
246
|
+
},
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
timeout,
|
|
251
|
+
);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
function analyzeResults(results: TestResult[]) {
|
|
259
|
+
const byType = groupBy(results, (r: TestResult) => r.type);
|
|
260
|
+
Object.entries(byType).forEach(([type, typeResults]) => {
|
|
261
|
+
console.log(`\n=== ${type.toUpperCase()} Index Analysis ===\n`);
|
|
262
|
+
|
|
263
|
+
const byDimension = groupBy(typeResults, (r: TestResult) => r.dimension.toString());
|
|
264
|
+
Object.entries(byDimension).forEach(([dim, dimResults]) => {
|
|
265
|
+
console.log(`\n--- Analysis for ${dim} dimensions ---\n`);
|
|
266
|
+
|
|
267
|
+
// Combined Performance Analysis
|
|
268
|
+
const columns = ['Distribution', 'Dataset Size', 'K'];
|
|
269
|
+
if (type === 'hnsw') {
|
|
270
|
+
columns.push('M', 'EF Construction', 'EF', 'EF Type');
|
|
271
|
+
} else if (type === 'ivfflat') {
|
|
272
|
+
columns.push('Lists', 'Vectors/List');
|
|
273
|
+
}
|
|
274
|
+
columns.push('Min Recall', 'Avg Recall', 'Max Recall', 'P50 (ms)', 'P95 (ms)');
|
|
275
|
+
|
|
276
|
+
const performanceData = Object.values(
|
|
277
|
+
groupBy(
|
|
278
|
+
dimResults,
|
|
279
|
+
(r: any) => `${r.size}-${r.k}-${type === 'ivfflat' ? r.metrics.latency.lists : r.metrics.latency.m}`,
|
|
280
|
+
(results: any[]) => {
|
|
281
|
+
const sortedResults = [...results].sort(
|
|
282
|
+
(a, b) =>
|
|
283
|
+
['random', 'clustered', 'skewed', 'mixed'].indexOf(a.distribution) -
|
|
284
|
+
['random', 'clustered', 'skewed', 'mixed'].indexOf(b.distribution),
|
|
285
|
+
);
|
|
286
|
+
return sortedResults.map(result => ({
|
|
287
|
+
Distribution: result.distribution,
|
|
288
|
+
'Dataset Size': result.size,
|
|
289
|
+
K: result.k,
|
|
290
|
+
...(type === 'ivfflat'
|
|
291
|
+
? {
|
|
292
|
+
Lists: result.metrics.latency.lists,
|
|
293
|
+
'Vectors/List': result.metrics.latency.vectorsPerList,
|
|
294
|
+
}
|
|
295
|
+
: {}),
|
|
296
|
+
...(type === 'hnsw'
|
|
297
|
+
? {
|
|
298
|
+
M: result.metrics.latency.m,
|
|
299
|
+
'EF Construction': result.metrics.latency.efConstruction,
|
|
300
|
+
EF: result.metrics.latency.ef,
|
|
301
|
+
'EF Type': result.metrics.latency.efType,
|
|
302
|
+
}
|
|
303
|
+
: {}),
|
|
304
|
+
'Min Recall': result.metrics.minRecall.toFixed(3),
|
|
305
|
+
'Avg Recall': result.metrics.recall.toFixed(3),
|
|
306
|
+
'Max Recall': result.metrics.maxRecall.toFixed(3),
|
|
307
|
+
'P50 (ms)': result.metrics.latency.p50.toFixed(2),
|
|
308
|
+
'P95 (ms)': result.metrics.latency.p95.toFixed(2),
|
|
309
|
+
}));
|
|
310
|
+
},
|
|
311
|
+
),
|
|
312
|
+
).flat();
|
|
313
|
+
|
|
314
|
+
console.log(formatTable(performanceData, columns));
|
|
315
|
+
});
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function splitIntoRandomBatches(total: number, dimension: number): number[] {
|
|
320
|
+
const batches: number[] = [];
|
|
321
|
+
let remaining = total;
|
|
322
|
+
|
|
323
|
+
const batchRange = dimension === 1024 ? 5000 : 15000;
|
|
324
|
+
|
|
325
|
+
while (remaining > 0) {
|
|
326
|
+
const batchSize = Math.min(remaining, batchRange + Math.floor(Math.random() * batchRange));
|
|
327
|
+
batches.push(batchSize);
|
|
328
|
+
remaining -= batchSize;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return batches;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
async function batchedBulkUpsert(
|
|
335
|
+
vectorDB: PGPerformanceVector,
|
|
336
|
+
testIndexName: string,
|
|
337
|
+
vectors: number[][],
|
|
338
|
+
batchSizes: number[],
|
|
339
|
+
) {
|
|
340
|
+
let offset = 0;
|
|
341
|
+
const vectorIds = vectors.map((_, idx) => `vec_${idx}`);
|
|
342
|
+
const metadata = vectors.map((_, idx) => ({ index: idx }));
|
|
343
|
+
|
|
344
|
+
for (const size of batchSizes) {
|
|
345
|
+
const batch = vectors.slice(offset, offset + size);
|
|
346
|
+
const batchIds = vectorIds.slice(offset, offset + size);
|
|
347
|
+
const batchMetadata = metadata.slice(offset, offset + size);
|
|
348
|
+
await vectorDB.bulkUpsert(testIndexName, batch, batchMetadata, batchIds);
|
|
349
|
+
offset += size;
|
|
350
|
+
console.log(`${offset} of ${vectors.length} vectors upserted`);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
const distributions = {
|
|
355
|
+
random: generateRandomVectors,
|
|
356
|
+
clustered: generateClusteredVectors,
|
|
357
|
+
skewed: generateSkewedVectors,
|
|
358
|
+
mixed: (size: number, dimension: number) => {
|
|
359
|
+
const generators = [generateRandomVectors, generateClusteredVectors, generateSkewedVectors];
|
|
360
|
+
const batchSizes = splitIntoRandomBatches(size, dimension);
|
|
361
|
+
|
|
362
|
+
let vectors: number[][] = [];
|
|
363
|
+
for (const batchSize of batchSizes) {
|
|
364
|
+
const generator = generators[Math.floor(Math.random() * generators.length)];
|
|
365
|
+
vectors = vectors.concat(generator(batchSize, dimension));
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
return vectors;
|
|
369
|
+
},
|
|
370
|
+
};
|
package/tsconfig.json
ADDED
package/vitest.config.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { defineConfig } from 'vitest/config';
|
|
2
|
+
|
|
3
|
+
export default defineConfig({
|
|
4
|
+
test: {
|
|
5
|
+
environment: 'node',
|
|
6
|
+
include: ['src/**/*.test.ts'],
|
|
7
|
+
exclude: ['src/**/*.performance.test.ts'],
|
|
8
|
+
coverage: {
|
|
9
|
+
reporter: ['text', 'json', 'html'],
|
|
10
|
+
},
|
|
11
|
+
},
|
|
12
|
+
});
|