singulio-postgres 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/vector.ts ADDED
@@ -0,0 +1,312 @@
1
+ /**
2
+ * @singulio/postgres - pgvector Extension Support
3
+ * Vector operations for AI/ML embeddings
4
+ */
5
+
6
+ import { sql } from 'bun';
7
+ import type {
8
+ VectorDistanceOperator,
9
+ VectorIndexType,
10
+ VectorSearchOptions,
11
+ QueryResult,
12
+ Logger,
13
+ } from './types.js';
14
+
15
+ /** Vector type (array of numbers) */
16
+ export type Vector = number[] | Float32Array | Float64Array;
17
+
18
+ /**
19
+ * Format a vector for PostgreSQL insertion
20
+ * Converts array to '[1,2,3]' format expected by pgvector
21
+ */
22
+ export function formatVector(vector: Vector): string {
23
+ const arr = Array.isArray(vector) ? vector : Array.from(vector);
24
+ return `[${arr.join(',')}]`;
25
+ }
26
+
27
+ /**
28
+ * Parse a vector from PostgreSQL result
29
+ * Converts '[1,2,3]' string back to number array
30
+ */
31
+ export function parseVector(value: string | number[] | null): number[] | null {
32
+ if (value === null) return null;
33
+
34
+ // Already an array (some drivers return it parsed)
35
+ if (Array.isArray(value)) return value;
36
+
37
+ // Parse string format '[1,2,3]'
38
+ if (typeof value === 'string') {
39
+ const trimmed = value.replace(/^\[|\]$/g, '');
40
+ if (!trimmed) return [];
41
+ return trimmed.split(',').map(Number);
42
+ }
43
+
44
+ return null;
45
+ }
46
+
47
+ /**
48
+ * Calculate vector dimension
49
+ */
50
+ export function vectorDimension(vector: Vector): number {
51
+ return Array.isArray(vector) ? vector.length : vector.length;
52
+ }
53
+
54
+ /**
55
+ * Normalize a vector to unit length (for cosine similarity)
56
+ */
57
+ export function normalizeVector(vector: Vector): number[] {
58
+ const arr = Array.isArray(vector) ? vector : Array.from(vector);
59
+ const magnitude = Math.sqrt(arr.reduce((sum, val) => sum + val * val, 0));
60
+ if (magnitude === 0) return arr;
61
+ return arr.map(val => val / magnitude);
62
+ }
63
+
64
+ /**
65
+ * Get the distance operator SQL string
66
+ */
67
+ export function getDistanceOperator(op: VectorDistanceOperator): string {
68
+ return op;
69
+ }
70
+
71
+ /**
72
+ * Get human-readable name for distance operator
73
+ */
74
+ export function getDistanceOperatorName(op: VectorDistanceOperator): string {
75
+ switch (op) {
76
+ case '<->':
77
+ return 'L2 (Euclidean)';
78
+ case '<=>':
79
+ return 'Cosine';
80
+ case '<#>':
81
+ return 'Inner Product';
82
+ case '<+>':
83
+ return 'L1 (Manhattan)';
84
+ default:
85
+ return 'Unknown';
86
+ }
87
+ }
88
+
89
+ /**
90
+ * Ensure pgvector extension is installed
91
+ */
92
+ export async function ensureVectorExtension(): Promise<void> {
93
+ await sql`CREATE EXTENSION IF NOT EXISTS vector`;
94
+ }
95
+
96
+ /**
97
+ * Create a vector column on a table
98
+ */
99
+ export async function createVectorColumn(
100
+ table: string,
101
+ column: string,
102
+ dimensions: number
103
+ ): Promise<void> {
104
+ await sql.unsafe(`ALTER TABLE ${table} ADD COLUMN IF NOT EXISTS ${column} vector(${dimensions})`);
105
+ }
106
+
107
+ /**
108
+ * Create a vector index for similarity search
109
+ */
110
+ export async function createVectorIndex(
111
+ table: string,
112
+ column: string,
113
+ indexType: VectorIndexType = 'hnsw',
114
+ operator: VectorDistanceOperator = '<->',
115
+ options?: {
116
+ /** HNSW: max connections per layer (default: 16) */
117
+ m?: number;
118
+ /** HNSW: size of dynamic candidate list (default: 64) */
119
+ efConstruction?: number;
120
+ /** IVFFlat: number of lists (default: 100) */
121
+ lists?: number;
122
+ }
123
+ ): Promise<void> {
124
+ const indexName = `idx_${table}_${column}_${indexType}`;
125
+
126
+ // Determine operator class
127
+ let opClass: string;
128
+ switch (operator) {
129
+ case '<=>':
130
+ opClass = 'vector_cosine_ops';
131
+ break;
132
+ case '<#>':
133
+ opClass = 'vector_ip_ops';
134
+ break;
135
+ case '<+>':
136
+ opClass = 'vector_l1_ops';
137
+ break;
138
+ default:
139
+ opClass = 'vector_l2_ops';
140
+ }
141
+
142
+ if (indexType === 'hnsw') {
143
+ const m = options?.m ?? 16;
144
+ const efConstruction = options?.efConstruction ?? 64;
145
+ await sql.unsafe(`
146
+ CREATE INDEX IF NOT EXISTS ${indexName}
147
+ ON ${table} USING hnsw (${column} ${opClass})
148
+ WITH (m = ${m}, ef_construction = ${efConstruction})
149
+ `);
150
+ } else {
151
+ const lists = options?.lists ?? 100;
152
+ await sql.unsafe(`
153
+ CREATE INDEX IF NOT EXISTS ${indexName}
154
+ ON ${table} USING ivfflat (${column} ${opClass})
155
+ WITH (lists = ${lists})
156
+ `);
157
+ }
158
+ }
159
+
160
+ /**
161
+ * Search for similar vectors
162
+ */
163
+ export async function vectorSearch<T = Record<string, unknown>>(
164
+ table: string,
165
+ column: string,
166
+ queryVector: Vector,
167
+ options: VectorSearchOptions = {},
168
+ logger?: Logger
169
+ ): Promise<QueryResult<T & { distance: number }>> {
170
+ const { operator = '<->', limit = 10, threshold, filter, filterParams = [] } = options;
171
+
172
+ const vectorStr = formatVector(queryVector);
173
+ const params: unknown[] = [vectorStr, ...filterParams, limit];
174
+
175
+ let whereClause = '';
176
+ if (filter) {
177
+ whereClause = `WHERE ${filter}`;
178
+ }
179
+ if (threshold !== undefined) {
180
+ const thresholdClause = `${column} ${operator} $1 < ${threshold}`;
181
+ whereClause = whereClause
182
+ ? `${whereClause} AND ${thresholdClause}`
183
+ : `WHERE ${thresholdClause}`;
184
+ }
185
+
186
+ const start = performance.now();
187
+ const result = await sql.unsafe(
188
+ `
189
+ SELECT *, ${column} ${operator} $1 AS distance
190
+ FROM ${table}
191
+ ${whereClause}
192
+ ORDER BY ${column} ${operator} $1
193
+ LIMIT $${params.length}
194
+ `,
195
+ params
196
+ );
197
+
198
+ logger?.debug('Vector search completed', {
199
+ table,
200
+ column,
201
+ operator: getDistanceOperatorName(operator),
202
+ results: result.length,
203
+ latencyMs: (performance.now() - start).toFixed(2),
204
+ });
205
+
206
+ return {
207
+ rows: result as (T & { distance: number })[],
208
+ rowCount: result.length,
209
+ };
210
+ }
211
+
212
+ /**
213
+ * Insert a row with vector data
214
+ */
215
+ export async function insertWithVector(
216
+ table: string,
217
+ data: Record<string, unknown>,
218
+ vectorColumn: string,
219
+ vector: Vector
220
+ ): Promise<QueryResult> {
221
+ const columns = Object.keys(data);
222
+ const values = Object.values(data);
223
+
224
+ columns.push(vectorColumn);
225
+ values.push(formatVector(vector));
226
+
227
+ const placeholders = values.map((_, i) => `$${i + 1}`).join(', ');
228
+
229
+ const result = await sql.unsafe(
230
+ `
231
+ INSERT INTO ${table} (${columns.join(', ')})
232
+ VALUES (${placeholders})
233
+ RETURNING *
234
+ `,
235
+ values
236
+ );
237
+
238
+ return {
239
+ rows: result,
240
+ rowCount: result.length,
241
+ };
242
+ }
243
+
244
+ /**
245
+ * Update vector data for a row
246
+ */
247
+ export async function updateVector(
248
+ table: string,
249
+ idColumn: string,
250
+ idValue: unknown,
251
+ vectorColumn: string,
252
+ vector: Vector
253
+ ): Promise<QueryResult> {
254
+ const result = await sql.unsafe(
255
+ `
256
+ UPDATE ${table}
257
+ SET ${vectorColumn} = $1
258
+ WHERE ${idColumn} = $2
259
+ RETURNING *
260
+ `,
261
+ [formatVector(vector), idValue]
262
+ );
263
+
264
+ return {
265
+ rows: result,
266
+ rowCount: result.length,
267
+ };
268
+ }
269
+
270
+ /**
271
+ * Batch insert rows with vectors
272
+ */
273
+ export async function batchInsertWithVectors(
274
+ table: string,
275
+ columns: string[],
276
+ vectorColumn: string,
277
+ rows: Array<{ data: unknown[]; vector: Vector }>
278
+ ): Promise<QueryResult> {
279
+ if (rows.length === 0) {
280
+ return { rows: [], rowCount: 0 };
281
+ }
282
+
283
+ const allColumns = [...columns, vectorColumn];
284
+ const valueSets: string[] = [];
285
+ const params: unknown[] = [];
286
+ let paramIndex = 1;
287
+
288
+ for (const row of rows) {
289
+ const placeholders: string[] = [];
290
+ for (const value of row.data) {
291
+ placeholders.push(`$${paramIndex++}`);
292
+ params.push(value);
293
+ }
294
+ placeholders.push(`$${paramIndex++}`);
295
+ params.push(formatVector(row.vector));
296
+ valueSets.push(`(${placeholders.join(', ')})`);
297
+ }
298
+
299
+ const result = await sql.unsafe(
300
+ `
301
+ INSERT INTO ${table} (${allColumns.join(', ')})
302
+ VALUES ${valueSets.join(', ')}
303
+ RETURNING *
304
+ `,
305
+ params
306
+ );
307
+
308
+ return {
309
+ rows: result,
310
+ rowCount: result.length,
311
+ };
312
+ }