@mastra/pg 0.14.6-alpha.0 → 0.14.6-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,286 +0,0 @@
1
- import type { IndexConfig, IndexType } from './types';
2
-
3
- import type { PgVector } from '.';
4
-
5
- export interface TestResult {
6
- distribution: string;
7
- dimension: number;
8
- type: IndexType;
9
- size: number;
10
- k?: number;
11
- metrics: {
12
- recall?: number;
13
- minRecall?: number;
14
- maxRecall?: number;
15
- latency?: {
16
- p50: number;
17
- p95: number;
18
- lists?: number;
19
- vectorsPerList?: number;
20
- m?: number;
21
- ef?: number;
22
- };
23
- clustering?: {
24
- numLists?: number;
25
- avgVectorsPerList?: number;
26
- recommendedLists?: number;
27
- distribution?: string;
28
- };
29
- };
30
- }
31
-
32
- export const generateRandomVectors = (count: number, dim: number) => {
33
- return Array.from({ length: count }, () => {
34
- return Array.from({ length: dim }, () => Math.random() * 2 - 1);
35
- });
36
- };
37
-
38
- export const generateClusteredVectors = (count: number, dim: number, numClusters: number = 10) => {
39
- // Generate cluster centers
40
- const centers = Array.from({ length: numClusters }, () => Array.from({ length: dim }, () => Math.random() * 2 - 1));
41
-
42
- // Generate vectors around centers with varying spread
43
- return Array.from({ length: count }, () => {
44
- // Pick a random cluster, with some clusters being more popular
45
- const centerIdx = Math.floor(Math.pow(Math.random(), 2) * numClusters);
46
- const center = centers[centerIdx] as number[];
47
-
48
- // Add noise, with some vectors being further from centers
49
- const spread = Math.random() < 0.8 ? 0.1 : 0.5; // 80% close, 20% far
50
- return center.map(c => c + (Math.random() * spread - spread / 2));
51
- });
52
- };
53
-
54
- // Or even more extreme:
55
- export const generateSkewedVectors = (count: number, dim: number) => {
56
- // Create dense clusters with sparse regions
57
- const vectors: number[][] = [];
58
-
59
- const denseCount = Math.floor(count * 0.6);
60
- const sparseCount = count - denseCount;
61
-
62
- // Dense cluster (60% of vectors)
63
- const denseCenter = Array.from({ length: dim }, () => Math.random() * 0.2);
64
- for (let i = 0; i < denseCount; i++) {
65
- vectors.push(denseCenter.map(c => c + (Math.random() * 0.1 - 0.05)));
66
- }
67
-
68
- // Scattered vectors (40%)
69
- for (let i = 0; i < sparseCount; i++) {
70
- vectors.push(Array.from({ length: dim }, () => Math.random() * 2 - 1));
71
- }
72
-
73
- return vectors.sort(() => Math.random() - 0.5); // Shuffle
74
- };
75
-
76
- export const findNearestBruteForce = (query: number[], vectors: number[][], k: number) => {
77
- const similarities = vectors.map((vector, idx) => {
78
- const similarity = cosineSimilarity(query, vector);
79
- return { idx, dist: similarity };
80
- });
81
-
82
- const sorted = similarities.sort((a, b) => b.dist - a.dist);
83
- return sorted.slice(0, k).map(x => x.idx);
84
- };
85
-
86
- export const calculateRecall = (actual: number[], expected: number[], k: number): number => {
87
- let score = 0;
88
- for (let i = 0; i < k; i++) {
89
- if (actual[i] === expected[i]) {
90
- score += 1;
91
- } else if (expected.includes(actual[i] ?? 0)) {
92
- score += 0.5;
93
- }
94
- }
95
- return score / k;
96
- };
97
-
98
- export function cosineSimilarity(a: number[], b: number[]): number {
99
- const dotProduct = a.reduce((sum, val, i) => sum + (val ?? 0) * (b[i] ?? 0), 0);
100
- const normA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
101
- const normB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
102
- return dotProduct / (normA * normB);
103
- }
104
-
105
- export const formatTable = (data: any[], columns: string[]) => {
106
- const colWidths = columns.map(col =>
107
- Math.max(
108
- col.length,
109
- ...data.map(row => {
110
- const value = row[col];
111
- return value === undefined || value === null ? '-'.length : value.toString().length;
112
- }),
113
- ),
114
- );
115
-
116
- const topBorder = '┌' + colWidths.map(w => '─'.repeat(w)).join('┬') + '┐';
117
- const headerSeparator = '├' + colWidths.map(w => '─'.repeat(w)).join('┼') + '┤';
118
- const bottomBorder = '└' + colWidths.map(w => '─'.repeat(w)).join('┴') + '┘';
119
-
120
- const header = '│' + columns.map((col, i) => col.padEnd(colWidths[i] ?? 0)).join('│') + '│';
121
- const rows = data.map(
122
- row =>
123
- '│' +
124
- columns
125
- .map((col, i) => {
126
- const value = row[col];
127
- const displayValue = value === undefined || value === null ? '-' : value.toString();
128
- return displayValue.padEnd(colWidths[i]);
129
- })
130
- .join('│') +
131
- '│',
132
- );
133
-
134
- return [topBorder, header, headerSeparator, ...rows, bottomBorder].join('\n');
135
- };
136
-
137
- export const groupBy = <T, K extends keyof T>(
138
- array: T[],
139
- key: K | ((item: T) => string),
140
- reducer?: (group: T[]) => any,
141
- ): Record<string, any> => {
142
- const grouped = array.reduce(
143
- (acc, item) => {
144
- const value = typeof key === 'function' ? key(item) : item[key];
145
- if (!acc[value as any]) acc[value as any] = [];
146
- acc[value as any]?.push(item);
147
- return acc;
148
- },
149
- {} as Record<string, T[]>,
150
- );
151
-
152
- if (reducer) {
153
- return Object.fromEntries(Object.entries(grouped).map(([key, group]) => [key, reducer(group)]));
154
- }
155
-
156
- return grouped;
157
- };
158
-
159
- export const calculateTimeout = (dimension: number, size: number, k: number) => {
160
- let timeout = 600000;
161
- if (dimension >= 1024) timeout *= 3;
162
- else if (dimension >= 384) timeout *= 1.5;
163
- if (size >= 10000) timeout *= 2;
164
- if (k >= 75) timeout *= 1.5;
165
- return timeout * 5;
166
- };
167
-
168
- export const baseTestConfigs = {
169
- smokeTests: [{ dimension: 384, size: 1_000, k: 10, queryCount: 10 }],
170
- '64': [
171
- { dimension: 64, size: 100, k: 10, queryCount: 30 },
172
- { dimension: 64, size: 100, k: 25, queryCount: 30 },
173
- { dimension: 64, size: 100, k: 50, queryCount: 30 },
174
- { dimension: 64, size: 100, k: 100, queryCount: 30 },
175
- { dimension: 64, size: 1_000, k: 10, queryCount: 30 },
176
- { dimension: 64, size: 1_000, k: 25, queryCount: 30 },
177
- { dimension: 64, size: 1_000, k: 50, queryCount: 30 },
178
- { dimension: 64, size: 1_000, k: 100, queryCount: 30 },
179
- { dimension: 64, size: 10_000, k: 10, queryCount: 30 },
180
- { dimension: 64, size: 100_000, k: 10, queryCount: 30 },
181
- { dimension: 64, size: 100_000, k: 25, queryCount: 30 },
182
- { dimension: 64, size: 100_000, k: 50, queryCount: 30 },
183
- { dimension: 64, size: 100_000, k: 100, queryCount: 30 },
184
- { dimension: 64, size: 500_000, k: 10, queryCount: 30 },
185
- { dimension: 64, size: 1_000_000, k: 10, queryCount: 30 },
186
- ],
187
- '384': [
188
- { dimension: 384, size: 100, k: 10, queryCount: 30 },
189
- { dimension: 384, size: 100, k: 25, queryCount: 30 },
190
- { dimension: 384, size: 100, k: 50, queryCount: 30 },
191
- { dimension: 384, size: 100, k: 100, queryCount: 30 },
192
- { dimension: 384, size: 1_000, k: 10, queryCount: 30 },
193
- { dimension: 384, size: 1_000, k: 25, queryCount: 30 },
194
- { dimension: 384, size: 1_000, k: 50, queryCount: 30 },
195
- { dimension: 384, size: 1_000, k: 100, queryCount: 30 },
196
- { dimension: 384, size: 10_000, k: 10, queryCount: 30 },
197
- { dimension: 384, size: 100_000, k: 10, queryCount: 30 },
198
- { dimension: 384, size: 100_000, k: 25, queryCount: 30 },
199
- { dimension: 384, size: 100_000, k: 50, queryCount: 30 },
200
- { dimension: 384, size: 100_000, k: 100, queryCount: 30 },
201
- { dimension: 384, size: 500_000, k: 10, queryCount: 30 },
202
- ],
203
- '1024': [
204
- { dimension: 1024, size: 100, k: 10, queryCount: 30 },
205
- { dimension: 1024, size: 100, k: 25, queryCount: 30 },
206
- { dimension: 1024, size: 100, k: 50, queryCount: 30 },
207
- { dimension: 1024, size: 100, k: 100, queryCount: 30 },
208
- { dimension: 1024, size: 1_000, k: 10, queryCount: 30 },
209
- { dimension: 1024, size: 1_000, k: 25, queryCount: 30 },
210
- { dimension: 1024, size: 1_000, k: 50, queryCount: 30 },
211
- { dimension: 1024, size: 1_000, k: 100, queryCount: 30 },
212
- { dimension: 1024, size: 10_000, k: 10, queryCount: 30 },
213
- { dimension: 1024, size: 10_000, k: 25, queryCount: 30 },
214
- { dimension: 1024, size: 10_000, k: 50, queryCount: 30 },
215
- { dimension: 1024, size: 10_000, k: 100, queryCount: 30 },
216
- { dimension: 1024, size: 50_000, k: 10, queryCount: 30 },
217
- { dimension: 1024, size: 50_000, k: 25, queryCount: 30 },
218
- ],
219
- stressTests: [
220
- // Maximum load
221
- { dimension: 512, size: 1_000_000, k: 50, queryCount: 5 },
222
-
223
- // Dense search
224
- { dimension: 256, size: 1_000_000, k: 100, queryCount: 5 },
225
-
226
- { dimension: 1024, size: 500_000, k: 50, queryCount: 5 },
227
- ],
228
- };
229
-
230
- export interface TestConfig {
231
- dimension: number;
232
- size: number;
233
- k: number;
234
- queryCount: number;
235
- }
236
-
237
- export async function warmupQuery(vectorDB: PgVector, indexName: string, dimension: number, k: number) {
238
- const warmupVector = generateRandomVectors(1, dimension)[0] as number[];
239
- await vectorDB.query({ indexName, queryVector: warmupVector, topK: k });
240
- }
241
-
242
- export async function measureLatency<T>(fn: () => Promise<T>): Promise<[number, T]> {
243
- const start = performance.now();
244
- const result = await fn();
245
- const end = performance.now();
246
- return [end - start, result];
247
- }
248
-
249
- export const getListCount = (indexConfig: IndexConfig, size: number): number | undefined => {
250
- if (indexConfig.type !== 'ivfflat') return undefined;
251
- if (indexConfig.ivf?.lists) return indexConfig.ivf.lists;
252
- return Math.max(100, Math.min(4000, Math.floor(Math.sqrt(size) * 2)));
253
- };
254
-
255
- export const getHNSWConfig = (indexConfig: IndexConfig): { m: number; efConstruction: number } => {
256
- return {
257
- m: indexConfig.hnsw?.m ?? 8,
258
- efConstruction: indexConfig.hnsw?.efConstruction ?? 32,
259
- };
260
- };
261
-
262
- export function getSearchEf(k: number, m: number) {
263
- return {
264
- default: Math.max(k, m * k), // Default calculation
265
- lower: Math.max(k, (m * k) / 2), // Lower quality, faster
266
- higher: Math.max(k, m * k * 2), // Higher quality, slower
267
- };
268
- }
269
-
270
- export function getIndexDescription({
271
- type,
272
- hnsw,
273
- }: {
274
- type: IndexType;
275
- hnsw: { m: number; efConstruction: number };
276
- }): string {
277
- if (type === 'hnsw') {
278
- return `HNSW(m=${hnsw.m},ef=${hnsw.efConstruction})`;
279
- }
280
-
281
- if (type === 'ivfflat') {
282
- return `IVF`;
283
- }
284
-
285
- return 'Flat';
286
- }
@@ -1,101 +0,0 @@
1
- /**
2
- * Vector store specific prompt that details supported operators and examples.
3
- * This prompt helps users construct valid filters for PG Vector.
4
- */
5
- export const PGVECTOR_PROMPT = `When querying PG Vector, you can ONLY use the operators listed below. Any other operators will be rejected.
6
- Important: Don't explain how to construct the filter - use the specified operators and fields to search the content and return relevant results.
7
- If a user tries to give an explicit operator that is not supported, reject the filter entirely and let them know that the operator is not supported.
8
-
9
- Basic Comparison Operators:
10
- - $eq: Exact match (default when using field: value)
11
- Example: { "category": "electronics" }
12
- - $ne: Not equal
13
- Example: { "category": { "$ne": "electronics" } }
14
- - $gt: Greater than
15
- Example: { "price": { "$gt": 100 } }
16
- - $gte: Greater than or equal
17
- Example: { "price": { "$gte": 100 } }
18
- - $lt: Less than
19
- Example: { "price": { "$lt": 100 } }
20
- - $lte: Less than or equal
21
- Example: { "price": { "$lte": 100 } }
22
-
23
- Array Operators:
24
- - $in: Match any value in array
25
- Example: { "category": { "$in": ["electronics", "books"] } }
26
- - $nin: Does not match any value in array
27
- Example: { "category": { "$nin": ["electronics", "books"] } }
28
- - $all: Match all values in array
29
- Example: { "tags": { "$all": ["premium", "sale"] } }
30
- - $elemMatch: Match array elements that meet all specified conditions
31
- Example: { "items": { "$elemMatch": { "price": { "$gt": 100 } } } }
32
- - $contains: Check if array contains value
33
- Example: { "tags": { "$contains": "premium" } }
34
-
35
- Logical Operators:
36
- - $and: Logical AND (implicit when using multiple conditions)
37
- Example: { "$and": [{ "price": { "$gt": 100 } }, { "category": "electronics" }] }
38
- - $or: Logical OR
39
- Example: { "$or": [{ "price": { "$lt": 50 } }, { "category": "books" }] }
40
- - $not: Logical NOT
41
- Example: { "$not": { "category": "electronics" } }
42
- - $nor: Logical NOR
43
- Example: { "$nor": [{ "price": { "$lt": 50 } }, { "category": "books" }] }
44
-
45
- Element Operators:
46
- - $exists: Check if field exists
47
- Example: { "rating": { "$exists": true } }
48
-
49
- Special Operators:
50
- - $size: Array length check
51
- Example: { "tags": { "$size": 2 } }
52
-
53
- Restrictions:
54
- - Regex patterns are not supported
55
- - Direct RegExp patterns will throw an error
56
- - Nested fields are supported using dot notation
57
- - Multiple conditions on the same field are supported with both implicit and explicit $and
58
- - Array operations work on array fields only
59
- - Basic operators handle array values as JSON strings
60
- - Empty arrays in conditions are handled gracefully
61
- - Only logical operators ($and, $or, $not, $nor) can be used at the top level
62
- - All other operators must be used within a field condition
63
- Valid: { "field": { "$gt": 100 } }
64
- Valid: { "$and": [...] }
65
- Invalid: { "$gt": 100 }
66
- Invalid: { "$contains": "value" }
67
- - Logical operators must contain field conditions, not direct operators
68
- Valid: { "$and": [{ "field": { "$gt": 100 } }] }
69
- Invalid: { "$and": [{ "$gt": 100 }] }
70
- - $not operator:
71
- - Must be an object
72
- - Cannot be empty
73
- - Can be used at field level or top level
74
- - Valid: { "$not": { "field": "value" } }
75
- - Valid: { "field": { "$not": { "$eq": "value" } } }
76
- - Other logical operators ($and, $or, $nor):
77
- - Can only be used at top level or nested within other logical operators
78
- - Can not be used on a field level, or be nested inside a field
79
- - Can not be used inside an operator
80
- - Valid: { "$and": [{ "field": { "$gt": 100 } }] }
81
- - Valid: { "$or": [{ "$and": [{ "field": { "$gt": 100 } }] }] }
82
- - Invalid: { "field": { "$and": [{ "$gt": 100 }] } }
83
- - Invalid: { "field": { "$or": [{ "$gt": 100 }] } }
84
- - Invalid: { "field": { "$gt": { "$and": [{...}] } } }
85
- - $elemMatch requires an object with conditions
86
- Valid: { "array": { "$elemMatch": { "field": "value" } } }
87
- Invalid: { "array": { "$elemMatch": "value" } }
88
-
89
- Example Complex Query:
90
- {
91
- "$and": [
92
- { "category": { "$in": ["electronics", "computers"] } },
93
- { "price": { "$gte": 100, "$lte": 1000 } },
94
- { "tags": { "$all": ["premium"] } },
95
- { "rating": { "$exists": true, "$gt": 4 } },
96
- { "$or": [
97
- { "stock": { "$gt": 0 } },
98
- { "preorder": true }
99
- ]}
100
- ]
101
- }`;