pgserve 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,368 @@
1
+ #!/usr/bin/env bun
2
+
3
+ /**
4
+ * Vector Embedding Generator for Benchmarks
5
+ *
6
+ * Generates pre-computed random unit vectors for consistent benchmarking.
7
+ * Excludes embedding API latency from database performance measurements.
8
+ *
9
+ * Usage:
10
+ * bun tests/benchmarks/vector-generator.js [--count=10000] [--dim=1536]
11
+ * bun tests/benchmarks/vector-generator.js --all # Generate all standard fixtures
12
+ */
13
+
14
+ import fs from 'fs';
15
+ import path from 'path';
16
+
17
+ const FIXTURES_DIR = path.join(import.meta.dirname, 'fixtures');
18
+
19
+ /**
20
+ * Generate a random unit vector (normalized L2)
21
+ * @param {number} dimension - Vector dimension
22
+ * @returns {number[]} Normalized vector
23
+ */
24
+ function generateUnitVector(dimension) {
25
+ // Generate random values from normal distribution (Box-Muller transform)
26
+ const vector = [];
27
+ for (let i = 0; i < dimension; i++) {
28
+ // Use uniform random and transform to approximate normal
29
+ const u1 = Math.random();
30
+ const u2 = Math.random();
31
+ const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
32
+ vector.push(z);
33
+ }
34
+
35
+ // Normalize to unit length (L2 norm = 1)
36
+ const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
37
+ return vector.map(v => v / norm);
38
+ }
39
+
40
+ /**
41
+ * Generate embeddings with metadata
42
+ * @param {number} count - Number of embeddings
43
+ * @param {number} dimension - Vector dimension
44
+ * @param {number} seed - Random seed for reproducibility (resets Math.random)
45
+ * @returns {{ vectors: number[][], metadata: object[] }}
46
+ */
47
+ function generateEmbeddings(count, dimension, seed = 42) {
48
+ // Simple seeded random (LCG)
49
+ let state = seed;
50
+ const seededRandom = () => {
51
+ state = (state * 1664525 + 1013904223) % 4294967296;
52
+ return state / 4294967296;
53
+ };
54
+
55
+ // Override Math.random temporarily
56
+ const originalRandom = Math.random;
57
+ Math.random = seededRandom;
58
+
59
+ const vectors = [];
60
+ const metadata = [];
61
+
62
+ const categories = ['technology', 'science', 'business', 'health', 'sports', 'entertainment'];
63
+ const tenants = ['tenant_a', 'tenant_b', 'tenant_c', 'tenant_d', 'tenant_e'];
64
+
65
+ console.log(`Generating ${count.toLocaleString()} vectors of dimension ${dimension}...`);
66
+ const startTime = performance.now();
67
+
68
+ for (let i = 0; i < count; i++) {
69
+ vectors.push(generateUnitVector(dimension));
70
+ metadata.push({
71
+ id: i + 1,
72
+ category: categories[Math.floor(Math.random() * categories.length)],
73
+ tenant_id: tenants[Math.floor(Math.random() * tenants.length)],
74
+ timestamp: new Date(Date.now() - Math.floor(Math.random() * 30 * 24 * 60 * 60 * 1000)).toISOString(),
75
+ score: Math.random()
76
+ });
77
+
78
+ // Progress indicator
79
+ if ((i + 1) % 1000 === 0) {
80
+ process.stdout.write(`\r Generated ${(i + 1).toLocaleString()} / ${count.toLocaleString()} vectors`);
81
+ }
82
+ }
83
+
84
+ // Restore original Math.random
85
+ Math.random = originalRandom;
86
+
87
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(2);
88
+ console.log(`\n Completed in ${elapsed}s`);
89
+
90
+ return { vectors, metadata };
91
+ }
92
+
93
+ /**
94
+ * Save embeddings to JSON file
95
+ * @param {string} filename - Output filename
96
+ * @param {{ vectors: number[][], metadata: object[] }} data - Embeddings data
97
+ */
98
+ function saveEmbeddings(filename, data) {
99
+ const filepath = path.join(FIXTURES_DIR, filename);
100
+
101
+ // Ensure fixtures directory exists
102
+ if (!fs.existsSync(FIXTURES_DIR)) {
103
+ fs.mkdirSync(FIXTURES_DIR, { recursive: true });
104
+ }
105
+
106
+ // Save with minimal formatting for smaller files
107
+ const json = JSON.stringify(data);
108
+ fs.writeFileSync(filepath, json);
109
+
110
+ const sizeMB = (Buffer.byteLength(json) / 1024 / 1024).toFixed(2);
111
+ console.log(` Saved to ${filepath} (${sizeMB} MB)`);
112
+
113
+ return filepath;
114
+ }
115
+
116
+ /**
117
+ * Load embeddings from JSON file
118
+ * @param {string} filename - Input filename
119
+ * @returns {{ vectors: number[][], metadata: object[] }}
120
+ */
121
+ export function loadEmbeddings(filename) {
122
+ const filepath = path.join(FIXTURES_DIR, filename);
123
+
124
+ if (!fs.existsSync(filepath)) {
125
+ throw new Error(`Embeddings file not found: ${filepath}. Run: bun tests/benchmarks/vector-generator.js --all`);
126
+ }
127
+
128
+ const data = JSON.parse(fs.readFileSync(filepath, 'utf-8'));
129
+ console.log(`Loaded ${data.vectors.length.toLocaleString()} vectors from ${filename}`);
130
+ return data;
131
+ }
132
+
133
+ /**
134
+ * Get embeddings file path (generates if missing)
135
+ * @param {number} count - Number of embeddings
136
+ * @param {number} dimension - Vector dimension
137
+ * @returns {string} Path to embeddings file
138
+ */
139
+ export function getEmbeddingsPath(count, dimension) {
140
+ const filename = `embeddings-${count}-${dimension}.json`;
141
+ const filepath = path.join(FIXTURES_DIR, filename);
142
+
143
+ if (!fs.existsSync(filepath)) {
144
+ console.log(`\nGenerating missing embeddings file: ${filename}`);
145
+ const data = generateEmbeddings(count, dimension);
146
+ saveEmbeddings(filename, data);
147
+ }
148
+
149
+ return filepath;
150
+ }
151
+
152
+ /**
153
+ * Generate query vectors (separate from corpus)
154
+ * @param {number} count - Number of query vectors
155
+ * @param {number} dimension - Vector dimension
156
+ * @returns {number[][]} Query vectors
157
+ */
158
+ export function generateQueryVectors(count, dimension, seed = 12345) {
159
+ const data = generateEmbeddings(count, dimension, seed);
160
+ return data.vectors;
161
+ }
162
+
163
+ /**
164
+ * Format vector for PostgreSQL pgvector
165
+ * @param {number[]} vector - Vector array
166
+ * @returns {string} PostgreSQL vector literal
167
+ */
168
+ export function formatPgVector(vector) {
169
+ return `[${vector.join(',')}]`;
170
+ }
171
+
172
+ // ============================================================================
173
+ // RECALL MEASUREMENT (Industry-standard methodology)
174
+ // Based on: ANN-Benchmarks, Qdrant, VectorDBBench
175
+ // ============================================================================
176
+
177
+ /**
178
+ * Compute L2 (Euclidean) distance squared between two vectors
179
+ * We use squared distance for efficiency (avoids sqrt, maintains ordering)
180
+ * @param {number[]} a - First vector
181
+ * @param {number[]} b - Second vector
182
+ * @returns {number} Squared L2 distance
183
+ */
184
+ export function l2DistanceSquared(a, b) {
185
+ let sum = 0;
186
+ for (let i = 0; i < a.length; i++) {
187
+ const diff = a[i] - b[i];
188
+ sum += diff * diff;
189
+ }
190
+ return sum;
191
+ }
192
+
193
+ /**
194
+ * Compute ground truth (exact k-NN via brute force)
195
+ * This is the gold standard that approximate results are compared against.
196
+ *
197
+ * @param {number[][]} corpus - All vectors in the database
198
+ * @param {number[][]} queries - Query vectors
199
+ * @param {number} k - Number of nearest neighbors
200
+ * @returns {number[][]} Ground truth: array of k neighbor IDs for each query
201
+ */
202
+ export function computeGroundTruth(corpus, queries, k) {
203
+ console.log(` Computing ground truth (brute-force k=${k} for ${queries.length} queries)...`);
204
+ const startTime = performance.now();
205
+
206
+ const groundTruth = [];
207
+
208
+ for (let q = 0; q < queries.length; q++) {
209
+ const query = queries[q];
210
+
211
+ // Compute distance to all corpus vectors
212
+ const distances = [];
213
+ for (let i = 0; i < corpus.length; i++) {
214
+ distances.push({
215
+ id: i + 1, // 1-indexed to match database IDs
216
+ distance: l2DistanceSquared(query, corpus[i])
217
+ });
218
+ }
219
+
220
+ // Sort by distance and take top-k
221
+ distances.sort((a, b) => a.distance - b.distance);
222
+ groundTruth.push(distances.slice(0, k).map(d => d.id));
223
+
224
+ // Progress
225
+ if ((q + 1) % 10 === 0 || q === queries.length - 1) {
226
+ process.stdout.write(`\r Ground truth: ${q + 1}/${queries.length} queries`);
227
+ }
228
+ }
229
+
230
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(2);
231
+ console.log(`\n Ground truth computed in ${elapsed}s`);
232
+
233
+ return groundTruth;
234
+ }
235
+
236
+ /**
237
+ * Calculate Recall@k
238
+ * Recall = (# of ground truth neighbors found) / k
239
+ *
240
+ * @param {number[][]} approximateResults - IDs returned by approximate search
241
+ * @param {number[][]} groundTruth - IDs from exact brute-force search
242
+ * @param {number} k - Number of neighbors (for normalization)
243
+ * @returns {{ recall: number, perQuery: number[] }} Average recall and per-query recalls
244
+ */
245
+ export function calculateRecall(approximateResults, groundTruth, k) {
246
+ if (approximateResults.length !== groundTruth.length) {
247
+ throw new Error(`Result count mismatch: ${approximateResults.length} vs ${groundTruth.length}`);
248
+ }
249
+
250
+ const perQuery = [];
251
+ let totalRecall = 0;
252
+
253
+ for (let i = 0; i < approximateResults.length; i++) {
254
+ const approxSet = new Set(approximateResults[i]);
255
+ const truthSet = groundTruth[i];
256
+
257
+ // Count how many ground truth neighbors were found
258
+ let found = 0;
259
+ for (const truthId of truthSet) {
260
+ if (approxSet.has(truthId)) {
261
+ found++;
262
+ }
263
+ }
264
+
265
+ const queryRecall = found / k;
266
+ perQuery.push(queryRecall);
267
+ totalRecall += queryRecall;
268
+ }
269
+
270
+ return {
271
+ recall: totalRecall / approximateResults.length,
272
+ perQuery
273
+ };
274
+ }
275
+
276
+ /**
277
+ * Get or compute ground truth for a dataset
278
+ * Caches the result to avoid recomputation
279
+ *
280
+ * @param {number[][]} corpus - Corpus vectors
281
+ * @param {number[][]} queries - Query vectors
282
+ * @param {number} k - Number of neighbors
283
+ * @param {string} cacheKey - Unique key for caching
284
+ * @returns {number[][]} Ground truth neighbor IDs
285
+ */
286
+ const groundTruthCache = new Map();
287
+
288
+ export function getGroundTruth(corpus, queries, k, cacheKey) {
289
+ const fullKey = `${cacheKey}-k${k}`;
290
+
291
+ if (groundTruthCache.has(fullKey)) {
292
+ console.log(` Using cached ground truth for ${fullKey}`);
293
+ return groundTruthCache.get(fullKey);
294
+ }
295
+
296
+ const groundTruth = computeGroundTruth(corpus, queries, k);
297
+ groundTruthCache.set(fullKey, groundTruth);
298
+ return groundTruth;
299
+ }
300
+
301
+ // Standard fixture configurations
302
+ const STANDARD_FIXTURES = [
303
+ { count: 1000, dimension: 384, desc: 'Small corpus, small model (all-MiniLM)' },
304
+ { count: 1000, dimension: 1536, desc: 'Small corpus, OpenAI embeddings' },
305
+ { count: 10000, dimension: 384, desc: 'Medium corpus, small model' },
306
+ { count: 10000, dimension: 1536, desc: 'Medium corpus, OpenAI embeddings' },
307
+ ];
308
+
309
+ // CLI interface
310
+ if (import.meta.main) {
311
+ const args = process.argv.slice(2);
312
+
313
+ if (args.includes('--help') || args.includes('-h')) {
314
+ console.log(`
315
+ Vector Embedding Generator for Benchmarks
316
+
317
+ Usage:
318
+ bun tests/benchmarks/vector-generator.js [options]
319
+
320
+ Options:
321
+ --all Generate all standard fixtures
322
+ --count=N Number of embeddings (default: 10000)
323
+ --dim=N Vector dimension (default: 1536)
324
+ --help, -h Show this help
325
+
326
+ Standard Fixtures (--all):
327
+ ${STANDARD_FIXTURES.map(f => ` - ${f.count} x ${f.dimension}-dim: ${f.desc}`).join('\n')}
328
+
329
+ Examples:
330
+ bun tests/benchmarks/vector-generator.js --all
331
+ bun tests/benchmarks/vector-generator.js --count=5000 --dim=768
332
+ `);
333
+ process.exit(0);
334
+ }
335
+
336
+ if (args.includes('--all')) {
337
+ console.log('\n=== Generating All Standard Fixtures ===\n');
338
+
339
+ for (const { count, dimension, desc } of STANDARD_FIXTURES) {
340
+ console.log(`\n[${count} x ${dimension}-dim] ${desc}`);
341
+ const data = generateEmbeddings(count, dimension);
342
+ saveEmbeddings(`embeddings-${count}-${dimension}.json`, data);
343
+ }
344
+
345
+ console.log('\n✓ All fixtures generated successfully\n');
346
+ } else {
347
+ // Parse individual options
348
+ let count = 10000;
349
+ let dimension = 1536;
350
+
351
+ for (const arg of args) {
352
+ if (arg.startsWith('--count=')) {
353
+ count = parseInt(arg.split('=')[1], 10);
354
+ } else if (arg.startsWith('--dim=')) {
355
+ dimension = parseInt(arg.split('=')[1], 10);
356
+ }
357
+ }
358
+
359
+ console.log(`\n=== Generating Embeddings ===\n`);
360
+ console.log(`Count: ${count.toLocaleString()}`);
361
+ console.log(`Dimension: ${dimension}`);
362
+
363
+ const data = generateEmbeddings(count, dimension);
364
+ saveEmbeddings(`embeddings-${count}-${dimension}.json`, data);
365
+
366
+ console.log('\n✓ Done\n');
367
+ }
368
+ }
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env bun
2
+
3
+ /**
4
+ * Quick Benchmark - Run against external pgserve instance
5
+ * Usage: bun tests/quick-bench.js [port] [duration_seconds]
6
+ */
7
+
8
+ import pg from 'pg';
9
+ const { Pool } = pg;
10
+
11
+ const PORT = parseInt(process.argv[2]) || 8433;
12
+ const DURATION_SEC = parseInt(process.argv[3]) || 30; // Run for 30 seconds by default
13
+ const CONNECTIONS = 20;
14
+
15
+ console.log(`
16
+ Quick Benchmark
17
+ ===============
18
+ Target: postgresql://127.0.0.1:${PORT}/bench
19
+ Connections: ${CONNECTIONS} concurrent
20
+ Duration: ${DURATION_SEC} seconds
21
+ `);
22
+
23
+ const pool = new Pool({
24
+ host: '127.0.0.1',
25
+ port: PORT,
26
+ database: 'bench',
27
+ user: 'postgres',
28
+ password: 'postgres',
29
+ max: CONNECTIONS
30
+ });
31
+
32
+ let running = true;
33
+ let totalQueries = 0;
34
+ let errors = 0;
35
+ const latencies = [];
36
+
37
+ async function setup() {
38
+ const client = await pool.connect();
39
+ try {
40
+ await client.query(`
41
+ CREATE TABLE IF NOT EXISTS bench_test (
42
+ id SERIAL PRIMARY KEY,
43
+ data TEXT,
44
+ created_at TIMESTAMP DEFAULT NOW()
45
+ )
46
+ `);
47
+ await client.query('TRUNCATE bench_test');
48
+ } finally {
49
+ client.release();
50
+ }
51
+ }
52
+
53
+ async function runWorker(workerId) {
54
+ let i = 0;
55
+ while (running) {
56
+ const start = performance.now();
57
+ try {
58
+ // Mix of operations
59
+ const op = i % 3;
60
+ if (op === 0) {
61
+ await pool.query(
62
+ 'INSERT INTO bench_test (data) VALUES ($1)',
63
+ [`worker-${workerId}-item-${i}-${Date.now()}`]
64
+ );
65
+ } else if (op === 1) {
66
+ await pool.query('SELECT * FROM bench_test ORDER BY id DESC LIMIT 10');
67
+ } else {
68
+ await pool.query('SELECT COUNT(*) FROM bench_test');
69
+ }
70
+ latencies.push(performance.now() - start);
71
+ totalQueries++;
72
+ i++;
73
+ } catch (err) {
74
+ errors++;
75
+ // Small delay on error to avoid tight loop
76
+ await new Promise(r => setTimeout(r, 10));
77
+ }
78
+ }
79
+ }
80
+
81
+ async function run() {
82
+ console.log('Setting up...');
83
+ await setup();
84
+
85
+ console.log(`Running for ${DURATION_SEC} seconds...\n`);
86
+ const start = performance.now();
87
+
88
+ // Start workers
89
+ const workers = Array.from({ length: CONNECTIONS }, (_, i) => runWorker(i));
90
+
91
+ // Progress updates every second
92
+ const progressInterval = setInterval(() => {
93
+ const elapsed = ((performance.now() - start) / 1000).toFixed(0);
94
+ const qps = totalQueries / (elapsed || 1);
95
+ process.stdout.write(`\r ${elapsed}s elapsed | ${totalQueries} queries | ${qps.toFixed(0)} QPS | ${errors} errors `);
96
+ }, 1000);
97
+
98
+ // Wait for duration
99
+ await new Promise(r => setTimeout(r, DURATION_SEC * 1000));
100
+ running = false;
101
+
102
+ // Wait for workers to finish current query
103
+ await Promise.all(workers.map(w => w.catch(() => {})));
104
+ clearInterval(progressInterval);
105
+
106
+ const totalTime = performance.now() - start;
107
+
108
+ // Calculate stats
109
+ latencies.sort((a, b) => a - b);
110
+ const sum = latencies.reduce((a, b) => a + b, 0);
111
+ const avg = latencies.length > 0 ? sum / latencies.length : 0;
112
+ const p50 = latencies[Math.floor(latencies.length * 0.5)] || 0;
113
+ const p95 = latencies[Math.floor(latencies.length * 0.95)] || 0;
114
+ const p99 = latencies[Math.floor(latencies.length * 0.99)] || 0;
115
+ const qps = (totalQueries / totalTime) * 1000;
116
+
117
+ console.log(`\n
118
+ Results
119
+ =======
120
+ Total time: ${(totalTime / 1000).toFixed(2)}s
121
+ Queries: ${totalQueries}
122
+ Errors: ${errors}
123
+ QPS: ${qps.toFixed(0)} queries/sec
124
+
125
+ Latency:
126
+ avg: ${avg.toFixed(2)}ms
127
+ p50: ${p50.toFixed(2)}ms
128
+ p95: ${p95.toFixed(2)}ms
129
+ p99: ${p99.toFixed(2)}ms
130
+ `);
131
+
132
+ await pool.end();
133
+ }
134
+
135
+ run().catch(console.error);