@sparkleideas/plugins 3.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +401 -0
- package/__tests__/collection-manager.test.ts +332 -0
- package/__tests__/dependency-graph.test.ts +434 -0
- package/__tests__/enhanced-plugin-registry.test.ts +488 -0
- package/__tests__/plugin-registry.test.ts +368 -0
- package/__tests__/ruvector-bridge.test.ts +2429 -0
- package/__tests__/ruvector-integration.test.ts +1602 -0
- package/__tests__/ruvector-migrations.test.ts +1099 -0
- package/__tests__/ruvector-quantization.test.ts +846 -0
- package/__tests__/ruvector-streaming.test.ts +1088 -0
- package/__tests__/sdk.test.ts +325 -0
- package/__tests__/security.test.ts +348 -0
- package/__tests__/utils/ruvector-test-utils.ts +860 -0
- package/examples/plugin-creator/index.ts +636 -0
- package/examples/plugin-creator/plugin-creator.test.ts +312 -0
- package/examples/ruvector/README.md +288 -0
- package/examples/ruvector/attention-patterns.ts +394 -0
- package/examples/ruvector/basic-usage.ts +288 -0
- package/examples/ruvector/docker-compose.yml +75 -0
- package/examples/ruvector/gnn-analysis.ts +501 -0
- package/examples/ruvector/hyperbolic-hierarchies.ts +557 -0
- package/examples/ruvector/init-db.sql +119 -0
- package/examples/ruvector/quantization.ts +680 -0
- package/examples/ruvector/self-learning.ts +447 -0
- package/examples/ruvector/semantic-search.ts +576 -0
- package/examples/ruvector/streaming-large-data.ts +507 -0
- package/examples/ruvector/transactions.ts +594 -0
- package/examples/ruvector-plugins/hook-pattern-library.ts +486 -0
- package/examples/ruvector-plugins/index.ts +79 -0
- package/examples/ruvector-plugins/intent-router.ts +354 -0
- package/examples/ruvector-plugins/mcp-tool-optimizer.ts +424 -0
- package/examples/ruvector-plugins/reasoning-bank.ts +657 -0
- package/examples/ruvector-plugins/ruvector-plugins.test.ts +518 -0
- package/examples/ruvector-plugins/semantic-code-search.ts +498 -0
- package/examples/ruvector-plugins/shared/index.ts +20 -0
- package/examples/ruvector-plugins/shared/vector-utils.ts +257 -0
- package/examples/ruvector-plugins/sona-learning.ts +445 -0
- package/package.json +97 -0
- package/src/collections/collection-manager.ts +661 -0
- package/src/collections/index.ts +56 -0
- package/src/collections/official/index.ts +1040 -0
- package/src/core/base-plugin.ts +416 -0
- package/src/core/plugin-interface.ts +215 -0
- package/src/hooks/index.ts +685 -0
- package/src/index.ts +378 -0
- package/src/integrations/agentic-flow.ts +743 -0
- package/src/integrations/index.ts +88 -0
- package/src/integrations/ruvector/ARCHITECTURE.md +1245 -0
- package/src/integrations/ruvector/attention-advanced.ts +1040 -0
- package/src/integrations/ruvector/attention-executor.ts +782 -0
- package/src/integrations/ruvector/attention-mechanisms.ts +757 -0
- package/src/integrations/ruvector/attention.ts +1063 -0
- package/src/integrations/ruvector/gnn.ts +3050 -0
- package/src/integrations/ruvector/hyperbolic.ts +1948 -0
- package/src/integrations/ruvector/index.ts +394 -0
- package/src/integrations/ruvector/migrations/001_create_extension.sql +135 -0
- package/src/integrations/ruvector/migrations/002_create_vector_tables.sql +259 -0
- package/src/integrations/ruvector/migrations/003_create_indices.sql +328 -0
- package/src/integrations/ruvector/migrations/004_create_functions.sql +598 -0
- package/src/integrations/ruvector/migrations/005_create_attention_functions.sql +654 -0
- package/src/integrations/ruvector/migrations/006_create_gnn_functions.sql +728 -0
- package/src/integrations/ruvector/migrations/007_create_hyperbolic_functions.sql +762 -0
- package/src/integrations/ruvector/migrations/index.ts +35 -0
- package/src/integrations/ruvector/migrations/migrations.ts +647 -0
- package/src/integrations/ruvector/quantization.ts +2036 -0
- package/src/integrations/ruvector/ruvector-bridge.ts +2000 -0
- package/src/integrations/ruvector/self-learning.ts +2376 -0
- package/src/integrations/ruvector/streaming.ts +1737 -0
- package/src/integrations/ruvector/types.ts +1945 -0
- package/src/providers/index.ts +643 -0
- package/src/registry/dependency-graph.ts +568 -0
- package/src/registry/enhanced-plugin-registry.ts +994 -0
- package/src/registry/plugin-registry.ts +604 -0
- package/src/sdk/index.ts +563 -0
- package/src/security/index.ts +594 -0
- package/src/types/index.ts +446 -0
- package/src/workers/index.ts +700 -0
- package/tmp.json +0 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +23 -0
|
@@ -0,0 +1,1737 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RuVector PostgreSQL Bridge - Streaming and Transaction Support
|
|
3
|
+
*
|
|
4
|
+
* Provides streaming capabilities for large result sets and batch operations,
|
|
5
|
+
* enhanced transaction handling with savepoints and isolation levels,
|
|
6
|
+
* and efficient batch processing with backpressure handling.
|
|
7
|
+
*
|
|
8
|
+
* @module @sparkleideas/plugins/integrations/ruvector/streaming
|
|
9
|
+
* @version 1.0.0
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { EventEmitter } from 'events';
|
|
13
|
+
import type {
|
|
14
|
+
VectorSearchOptions,
|
|
15
|
+
VectorSearchResult,
|
|
16
|
+
VectorInsertOptions,
|
|
17
|
+
VectorUpdateOptions,
|
|
18
|
+
BatchResult,
|
|
19
|
+
DistanceMetric,
|
|
20
|
+
QueryResult,
|
|
21
|
+
} from './types.js';
|
|
22
|
+
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// Type Definitions
|
|
25
|
+
// ============================================================================
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* PostgreSQL PoolClient interface (from pg package).
|
|
29
|
+
*/
|
|
30
|
+
export interface PoolClient {
|
|
31
|
+
query<T = unknown>(text: string, values?: unknown[]): Promise<PgQueryResult<T>>;
|
|
32
|
+
release(err?: Error): void;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* PostgreSQL query result interface.
|
|
37
|
+
*/
|
|
38
|
+
interface PgQueryResult<T> {
|
|
39
|
+
rows: T[];
|
|
40
|
+
rowCount: number | null;
|
|
41
|
+
command: string;
|
|
42
|
+
fields?: Array<{ name: string; dataTypeID: number }>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Pool interface for connection management.
|
|
47
|
+
*/
|
|
48
|
+
interface Pool {
|
|
49
|
+
connect(): Promise<PoolClient>;
|
|
50
|
+
query<T = unknown>(text: string, values?: unknown[]): Promise<PgQueryResult<T>>;
|
|
51
|
+
end(): Promise<void>;
|
|
52
|
+
on(event: string, callback: (...args: unknown[]) => void): this;
|
|
53
|
+
totalCount: number;
|
|
54
|
+
idleCount: number;
|
|
55
|
+
waitingCount: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Extended search options for streaming operations.
|
|
60
|
+
*/
|
|
61
|
+
export interface StreamSearchOptions extends VectorSearchOptions {
|
|
62
|
+
/** Number of results per batch (default: 1000) */
|
|
63
|
+
batchSize?: number;
|
|
64
|
+
/** Cursor name for server-side cursor */
|
|
65
|
+
cursorName?: string;
|
|
66
|
+
/** Query timeout in milliseconds */
|
|
67
|
+
timeout?: number;
|
|
68
|
+
/** Whether to use a server-side cursor */
|
|
69
|
+
useServerCursor?: boolean;
|
|
70
|
+
/** Fetch direction for cursor */
|
|
71
|
+
fetchDirection?: 'forward' | 'backward';
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Insert result for streaming operations.
|
|
76
|
+
*/
|
|
77
|
+
export interface InsertResult {
|
|
78
|
+
/** ID of the inserted vector */
|
|
79
|
+
id: string | number;
|
|
80
|
+
/** Whether the insert was successful */
|
|
81
|
+
success: boolean;
|
|
82
|
+
/** Error message if insert failed */
|
|
83
|
+
error?: string;
|
|
84
|
+
/** Batch index */
|
|
85
|
+
batchIndex: number;
|
|
86
|
+
/** Item index within batch */
|
|
87
|
+
itemIndex: number;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Vector entry for streaming inserts.
|
|
92
|
+
*/
|
|
93
|
+
export interface VectorEntry {
|
|
94
|
+
/** Optional ID (auto-generated if not provided) */
|
|
95
|
+
id?: string | number;
|
|
96
|
+
/** Vector data */
|
|
97
|
+
vector: number[] | Float32Array;
|
|
98
|
+
/** Optional metadata */
|
|
99
|
+
metadata?: Record<string, unknown>;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Transaction isolation levels.
|
|
104
|
+
*/
|
|
105
|
+
export type IsolationLevel = 'read_committed' | 'repeatable_read' | 'serializable';
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Batch processing options.
|
|
109
|
+
*/
|
|
110
|
+
export interface BatchOptions {
|
|
111
|
+
/** Batch size for processing */
|
|
112
|
+
batchSize?: number;
|
|
113
|
+
/** Maximum concurrent batches */
|
|
114
|
+
concurrency?: number;
|
|
115
|
+
/** Retry failed operations */
|
|
116
|
+
retryOnFailure?: boolean;
|
|
117
|
+
/** Maximum retry attempts */
|
|
118
|
+
maxRetries?: number;
|
|
119
|
+
/** Enable transaction mode */
|
|
120
|
+
useTransaction?: boolean;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Pool events interface.
|
|
125
|
+
*/
|
|
126
|
+
export interface PoolEvents {
|
|
127
|
+
'pool:connect': (client: PoolClient) => void;
|
|
128
|
+
'pool:acquire': (client: PoolClient) => void;
|
|
129
|
+
'pool:release': (client: PoolClient) => void;
|
|
130
|
+
'pool:remove': (client: PoolClient) => void;
|
|
131
|
+
'pool:error': (error: Error, client?: PoolClient) => void;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Stream state for backpressure handling.
|
|
136
|
+
*/
|
|
137
|
+
interface StreamState {
|
|
138
|
+
paused: boolean;
|
|
139
|
+
buffer: unknown[];
|
|
140
|
+
bufferSize: number;
|
|
141
|
+
highWaterMark: number;
|
|
142
|
+
drainPromise: Promise<void> | null;
|
|
143
|
+
drainResolve: (() => void) | null;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ============================================================================
|
|
147
|
+
// Constants
|
|
148
|
+
// ============================================================================
|
|
149
|
+
|
|
150
|
+
const DEFAULT_BATCH_SIZE = 1000;
|
|
151
|
+
const DEFAULT_CONCURRENCY = 4;
|
|
152
|
+
const DEFAULT_HIGH_WATER_MARK = 16384;
|
|
153
|
+
const DEFAULT_TIMEOUT_MS = 30000;
|
|
154
|
+
const DEFAULT_CURSOR_PREFIX = 'ruvector_cursor_';
|
|
155
|
+
|
|
156
|
+
// Distance operators mapping
|
|
157
|
+
const DISTANCE_OPERATORS: Record<DistanceMetric, string> = {
|
|
158
|
+
cosine: '<=>',
|
|
159
|
+
euclidean: '<->',
|
|
160
|
+
dot: '<#>',
|
|
161
|
+
hamming: '<~>',
|
|
162
|
+
manhattan: '<+>',
|
|
163
|
+
chebyshev: '<+>',
|
|
164
|
+
jaccard: '<~>',
|
|
165
|
+
minkowski: '<->',
|
|
166
|
+
bray_curtis: '<->',
|
|
167
|
+
canberra: '<->',
|
|
168
|
+
mahalanobis: '<->',
|
|
169
|
+
correlation: '<=>',
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
// ============================================================================
|
|
173
|
+
// RuVectorStream Class
|
|
174
|
+
// ============================================================================
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Streaming support for RuVector operations.
|
|
178
|
+
*
|
|
179
|
+
* Provides async generators for streaming large result sets and batch inserts
|
|
180
|
+
* with backpressure handling.
|
|
181
|
+
*
|
|
182
|
+
* @example
|
|
183
|
+
* ```typescript
|
|
184
|
+
* const stream = new RuVectorStream(pool, config);
|
|
185
|
+
*
|
|
186
|
+
* // Stream search results
|
|
187
|
+
* for await (const result of stream.streamSearch({ query: vector, k: 10000 })) {
|
|
188
|
+
* console.log(result);
|
|
189
|
+
* }
|
|
190
|
+
*
|
|
191
|
+
* // Stream inserts
|
|
192
|
+
* async function* vectorGenerator() {
|
|
193
|
+
* for (let i = 0; i < 100000; i++) {
|
|
194
|
+
* yield { vector: generateVector(), metadata: { index: i } };
|
|
195
|
+
* }
|
|
196
|
+
* }
|
|
197
|
+
*
|
|
198
|
+
* for await (const result of stream.streamInsert(vectorGenerator())) {
|
|
199
|
+
* console.log(`Inserted: ${result.id}`);
|
|
200
|
+
* }
|
|
201
|
+
* ```
|
|
202
|
+
*/
|
|
203
|
+
export class RuVectorStream extends EventEmitter {
|
|
204
|
+
private readonly pool: Pool;
|
|
205
|
+
private readonly schema?: string;
|
|
206
|
+
private readonly defaultTableName: string;
|
|
207
|
+
private readonly state: StreamState;
|
|
208
|
+
private activeClient: PoolClient | null = null;
|
|
209
|
+
private activeCursors: Set<string> = new Set();
|
|
210
|
+
|
|
211
|
+
constructor(
|
|
212
|
+
pool: Pool,
|
|
213
|
+
options: {
|
|
214
|
+
schema?: string;
|
|
215
|
+
defaultTableName?: string;
|
|
216
|
+
highWaterMark?: number;
|
|
217
|
+
} = {}
|
|
218
|
+
) {
|
|
219
|
+
super();
|
|
220
|
+
this.pool = pool;
|
|
221
|
+
this.schema = options.schema;
|
|
222
|
+
this.defaultTableName = options.defaultTableName ?? 'vectors';
|
|
223
|
+
this.state = {
|
|
224
|
+
paused: false,
|
|
225
|
+
buffer: [],
|
|
226
|
+
bufferSize: 0,
|
|
227
|
+
highWaterMark: options.highWaterMark ?? DEFAULT_HIGH_WATER_MARK,
|
|
228
|
+
drainPromise: null,
|
|
229
|
+
drainResolve: null,
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// ===========================================================================
|
|
234
|
+
// Stream Search
|
|
235
|
+
// ===========================================================================
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Stream large result sets using server-side cursors.
|
|
239
|
+
*
|
|
240
|
+
* @param options - Search options with streaming configuration
|
|
241
|
+
* @yields {VectorSearchResult} Individual search results
|
|
242
|
+
*/
|
|
243
|
+
async *streamSearch(options: StreamSearchOptions): AsyncGenerator<VectorSearchResult, void, undefined> {
|
|
244
|
+
const batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;
|
|
245
|
+
const cursorName = options.cursorName ?? `${DEFAULT_CURSOR_PREFIX}${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
246
|
+
const timeout = options.timeout ?? DEFAULT_TIMEOUT_MS;
|
|
247
|
+
const useServerCursor = options.useServerCursor ?? true;
|
|
248
|
+
|
|
249
|
+
const client = await this.pool.connect();
|
|
250
|
+
this.activeClient = client;
|
|
251
|
+
this.activeCursors.add(cursorName);
|
|
252
|
+
|
|
253
|
+
try {
|
|
254
|
+
// Set statement timeout
|
|
255
|
+
await client.query(`SET LOCAL statement_timeout = ${timeout}`);
|
|
256
|
+
|
|
257
|
+
if (useServerCursor) {
|
|
258
|
+
// Use server-side cursor for memory efficiency
|
|
259
|
+
yield* this.streamWithCursor(client, options, cursorName, batchSize);
|
|
260
|
+
} else {
|
|
261
|
+
// Use OFFSET/LIMIT pagination (less efficient but simpler)
|
|
262
|
+
yield* this.streamWithPagination(client, options, batchSize);
|
|
263
|
+
}
|
|
264
|
+
} finally {
|
|
265
|
+
// Cleanup
|
|
266
|
+
if (this.activeCursors.has(cursorName)) {
|
|
267
|
+
try {
|
|
268
|
+
await client.query(`CLOSE ${this.escapeIdentifier(cursorName)}`);
|
|
269
|
+
} catch {
|
|
270
|
+
// Cursor may already be closed
|
|
271
|
+
}
|
|
272
|
+
this.activeCursors.delete(cursorName);
|
|
273
|
+
}
|
|
274
|
+
client.release();
|
|
275
|
+
this.activeClient = null;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Stream results using a server-side cursor.
|
|
281
|
+
*/
|
|
282
|
+
private async *streamWithCursor(
|
|
283
|
+
client: PoolClient,
|
|
284
|
+
options: StreamSearchOptions,
|
|
285
|
+
cursorName: string,
|
|
286
|
+
batchSize: number
|
|
287
|
+
): AsyncGenerator<VectorSearchResult, void, undefined> {
|
|
288
|
+
const { sql, params } = this.buildSearchQuery(options);
|
|
289
|
+
const escapedCursor = this.escapeIdentifier(cursorName);
|
|
290
|
+
|
|
291
|
+
// Begin transaction for cursor
|
|
292
|
+
await client.query('BEGIN');
|
|
293
|
+
|
|
294
|
+
try {
|
|
295
|
+
// Declare cursor
|
|
296
|
+
await client.query(
|
|
297
|
+
`DECLARE ${escapedCursor} CURSOR WITH HOLD FOR ${sql}`,
|
|
298
|
+
params
|
|
299
|
+
);
|
|
300
|
+
|
|
301
|
+
let rank = 0;
|
|
302
|
+
let hasMore = true;
|
|
303
|
+
|
|
304
|
+
while (hasMore) {
|
|
305
|
+
// Wait if paused (backpressure)
|
|
306
|
+
await this.waitIfPaused();
|
|
307
|
+
|
|
308
|
+
// Fetch batch
|
|
309
|
+
const fetchResult = await client.query<{
|
|
310
|
+
id: string | number;
|
|
311
|
+
distance: number;
|
|
312
|
+
[key: string]: unknown;
|
|
313
|
+
}>(
|
|
314
|
+
`FETCH ${batchSize} FROM ${escapedCursor}`
|
|
315
|
+
);
|
|
316
|
+
|
|
317
|
+
if (fetchResult.rows.length === 0) {
|
|
318
|
+
hasMore = false;
|
|
319
|
+
break;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Yield individual results
|
|
323
|
+
for (const row of fetchResult.rows) {
|
|
324
|
+
rank++;
|
|
325
|
+
const result = this.transformSearchResult(row, options, rank);
|
|
326
|
+
yield result;
|
|
327
|
+
|
|
328
|
+
this.emit('result', result);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Check if we've received less than batch size (end of results)
|
|
332
|
+
if (fetchResult.rows.length < batchSize) {
|
|
333
|
+
hasMore = false;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
await client.query('COMMIT');
|
|
338
|
+
} catch (error) {
|
|
339
|
+
await client.query('ROLLBACK');
|
|
340
|
+
throw error;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Stream results using OFFSET/LIMIT pagination.
|
|
346
|
+
*/
|
|
347
|
+
private async *streamWithPagination(
|
|
348
|
+
client: PoolClient,
|
|
349
|
+
options: StreamSearchOptions,
|
|
350
|
+
batchSize: number
|
|
351
|
+
): AsyncGenerator<VectorSearchResult, void, undefined> {
|
|
352
|
+
const { sql: baseSql, params } = this.buildSearchQuery(options, true);
|
|
353
|
+
|
|
354
|
+
let offset = 0;
|
|
355
|
+
let rank = 0;
|
|
356
|
+
let hasMore = true;
|
|
357
|
+
|
|
358
|
+
while (hasMore) {
|
|
359
|
+
// Wait if paused (backpressure)
|
|
360
|
+
await this.waitIfPaused();
|
|
361
|
+
|
|
362
|
+
const sql = `${baseSql} LIMIT ${batchSize} OFFSET ${offset}`;
|
|
363
|
+
const result = await client.query<{
|
|
364
|
+
id: string | number;
|
|
365
|
+
distance: number;
|
|
366
|
+
[key: string]: unknown;
|
|
367
|
+
}>(sql, params);
|
|
368
|
+
|
|
369
|
+
if (result.rows.length === 0) {
|
|
370
|
+
hasMore = false;
|
|
371
|
+
break;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
for (const row of result.rows) {
|
|
375
|
+
rank++;
|
|
376
|
+
const searchResult = this.transformSearchResult(row, options, rank);
|
|
377
|
+
yield searchResult;
|
|
378
|
+
|
|
379
|
+
this.emit('result', searchResult);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
offset += batchSize;
|
|
383
|
+
|
|
384
|
+
if (result.rows.length < batchSize) {
|
|
385
|
+
hasMore = false;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Build the search query SQL.
|
|
392
|
+
*/
|
|
393
|
+
private buildSearchQuery(
|
|
394
|
+
options: StreamSearchOptions,
|
|
395
|
+
forPagination = false
|
|
396
|
+
): { sql: string; params: unknown[] } {
|
|
397
|
+
const tableName = options.tableName ?? this.defaultTableName;
|
|
398
|
+
const vectorColumn = options.vectorColumn ?? 'embedding';
|
|
399
|
+
const metric = options.metric ?? 'cosine';
|
|
400
|
+
const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
|
|
401
|
+
|
|
402
|
+
const queryVector = this.formatVector(options.query);
|
|
403
|
+
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
|
|
404
|
+
|
|
405
|
+
// Build SELECT columns
|
|
406
|
+
const selectColumns = options.selectColumns ?? ['id'];
|
|
407
|
+
const columnList = [...selectColumns];
|
|
408
|
+
|
|
409
|
+
if (options.includeVector) {
|
|
410
|
+
columnList.push(vectorColumn);
|
|
411
|
+
}
|
|
412
|
+
if (options.includeMetadata) {
|
|
413
|
+
columnList.push('metadata');
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
const distanceExpr = `${this.escapeIdentifier(vectorColumn)} ${operator} '${queryVector}'::vector`;
|
|
417
|
+
columnList.push(`(${distanceExpr}) as distance`);
|
|
418
|
+
|
|
419
|
+
// Build WHERE clause
|
|
420
|
+
const whereClauses: string[] = [];
|
|
421
|
+
const params: unknown[] = [];
|
|
422
|
+
let paramIndex = 1;
|
|
423
|
+
|
|
424
|
+
if (options.threshold !== undefined) {
|
|
425
|
+
if (metric === 'cosine' || metric === 'dot') {
|
|
426
|
+
whereClauses.push(`(1 - (${distanceExpr})) >= $${paramIndex++}`);
|
|
427
|
+
params.push(options.threshold);
|
|
428
|
+
} else {
|
|
429
|
+
whereClauses.push(`(${distanceExpr}) <= $${paramIndex++}`);
|
|
430
|
+
params.push(options.threshold);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
if (options.maxDistance !== undefined) {
|
|
435
|
+
whereClauses.push(`(${distanceExpr}) <= $${paramIndex++}`);
|
|
436
|
+
params.push(options.maxDistance);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
if (options.filter) {
|
|
440
|
+
for (const [key, value] of Object.entries(options.filter)) {
|
|
441
|
+
if (key === 'metadata') {
|
|
442
|
+
whereClauses.push(`metadata @> $${paramIndex++}::jsonb`);
|
|
443
|
+
params.push(JSON.stringify(value));
|
|
444
|
+
} else {
|
|
445
|
+
whereClauses.push(`${this.escapeIdentifier(key)} = $${paramIndex++}`);
|
|
446
|
+
params.push(value);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Build query
|
|
452
|
+
let sql = `SELECT ${columnList.join(', ')} FROM ${schemaPrefix}${this.escapeIdentifier(tableName)}`;
|
|
453
|
+
|
|
454
|
+
if (whereClauses.length > 0) {
|
|
455
|
+
sql += ` WHERE ${whereClauses.join(' AND ')}`;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
sql += ` ORDER BY ${distanceExpr} ASC`;
|
|
459
|
+
|
|
460
|
+
// For cursor-based streaming, don't add LIMIT (cursor handles it)
|
|
461
|
+
// For pagination, LIMIT/OFFSET will be added by the caller
|
|
462
|
+
if (!forPagination && options.k) {
|
|
463
|
+
sql += ` LIMIT ${options.k}`;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return { sql, params };
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
/**
|
|
470
|
+
* Transform a database row into a VectorSearchResult.
|
|
471
|
+
*/
|
|
472
|
+
private transformSearchResult(
|
|
473
|
+
row: { id: string | number; distance: number; [key: string]: unknown },
|
|
474
|
+
options: StreamSearchOptions,
|
|
475
|
+
rank: number
|
|
476
|
+
): VectorSearchResult {
|
|
477
|
+
const metric = options.metric ?? 'cosine';
|
|
478
|
+
const score = metric === 'cosine' || metric === 'dot'
|
|
479
|
+
? 1 - row.distance
|
|
480
|
+
: 1 / (1 + row.distance);
|
|
481
|
+
|
|
482
|
+
const result: VectorSearchResult = {
|
|
483
|
+
id: row.id,
|
|
484
|
+
score,
|
|
485
|
+
distance: row.distance,
|
|
486
|
+
rank,
|
|
487
|
+
retrievedAt: new Date(),
|
|
488
|
+
};
|
|
489
|
+
|
|
490
|
+
if (options.includeVector && row[options.vectorColumn ?? 'embedding']) {
|
|
491
|
+
(result as { vector?: number[] }).vector = this.parseVector(
|
|
492
|
+
row[options.vectorColumn ?? 'embedding'] as string
|
|
493
|
+
);
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
if (options.includeMetadata && row.metadata) {
|
|
497
|
+
(result as { metadata?: Record<string, unknown> }).metadata =
|
|
498
|
+
row.metadata as Record<string, unknown>;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
return result;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// ===========================================================================
|
|
505
|
+
// Stream Insert
|
|
506
|
+
// ===========================================================================
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Stream batch inserts for large datasets.
|
|
510
|
+
*
|
|
511
|
+
* @param vectors - Async iterable of vector entries
|
|
512
|
+
* @param options - Insert configuration options
|
|
513
|
+
* @yields {InsertResult} Individual insert results
|
|
514
|
+
*/
|
|
515
|
+
async *streamInsert(
|
|
516
|
+
vectors: AsyncIterable<VectorEntry>,
|
|
517
|
+
options: {
|
|
518
|
+
tableName?: string;
|
|
519
|
+
vectorColumn?: string;
|
|
520
|
+
batchSize?: number;
|
|
521
|
+
upsert?: boolean;
|
|
522
|
+
conflictColumns?: string[];
|
|
523
|
+
} = {}
|
|
524
|
+
): AsyncGenerator<InsertResult, void, undefined> {
|
|
525
|
+
const tableName = options.tableName ?? this.defaultTableName;
|
|
526
|
+
const vectorColumn = options.vectorColumn ?? 'embedding';
|
|
527
|
+
const batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;
|
|
528
|
+
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
|
|
529
|
+
|
|
530
|
+
let batch: VectorEntry[] = [];
|
|
531
|
+
let batchIndex = 0;
|
|
532
|
+
let totalProcessed = 0;
|
|
533
|
+
|
|
534
|
+
const client = await this.pool.connect();
|
|
535
|
+
this.activeClient = client;
|
|
536
|
+
|
|
537
|
+
try {
|
|
538
|
+
// Process vectors in batches
|
|
539
|
+
for await (const entry of vectors) {
|
|
540
|
+
// Wait if paused (backpressure)
|
|
541
|
+
await this.waitIfPaused();
|
|
542
|
+
|
|
543
|
+
batch.push(entry);
|
|
544
|
+
|
|
545
|
+
if (batch.length >= batchSize) {
|
|
546
|
+
// Process batch
|
|
547
|
+
const results = await this.insertBatch(
|
|
548
|
+
client,
|
|
549
|
+
batch,
|
|
550
|
+
tableName,
|
|
551
|
+
vectorColumn,
|
|
552
|
+
schemaPrefix,
|
|
553
|
+
batchIndex,
|
|
554
|
+
options.upsert,
|
|
555
|
+
options.conflictColumns
|
|
556
|
+
);
|
|
557
|
+
|
|
558
|
+
for (const result of results) {
|
|
559
|
+
yield result;
|
|
560
|
+
totalProcessed++;
|
|
561
|
+
this.emit('insert', result);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
batch = [];
|
|
565
|
+
batchIndex++;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// Process remaining items
|
|
570
|
+
if (batch.length > 0) {
|
|
571
|
+
const results = await this.insertBatch(
|
|
572
|
+
client,
|
|
573
|
+
batch,
|
|
574
|
+
tableName,
|
|
575
|
+
vectorColumn,
|
|
576
|
+
schemaPrefix,
|
|
577
|
+
batchIndex,
|
|
578
|
+
options.upsert,
|
|
579
|
+
options.conflictColumns
|
|
580
|
+
);
|
|
581
|
+
|
|
582
|
+
for (const result of results) {
|
|
583
|
+
yield result;
|
|
584
|
+
totalProcessed++;
|
|
585
|
+
this.emit('insert', result);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
this.emit('complete', { totalProcessed, batches: batchIndex + 1 });
|
|
590
|
+
} finally {
|
|
591
|
+
client.release();
|
|
592
|
+
this.activeClient = null;
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Insert a batch of vectors.
|
|
598
|
+
*/
|
|
599
|
+
private async insertBatch(
|
|
600
|
+
client: PoolClient,
|
|
601
|
+
batch: VectorEntry[],
|
|
602
|
+
tableName: string,
|
|
603
|
+
vectorColumn: string,
|
|
604
|
+
schemaPrefix: string,
|
|
605
|
+
batchIndex: number,
|
|
606
|
+
upsert?: boolean,
|
|
607
|
+
conflictColumns?: string[]
|
|
608
|
+
): Promise<InsertResult[]> {
|
|
609
|
+
const results: InsertResult[] = [];
|
|
610
|
+
|
|
611
|
+
// Build multi-row INSERT
|
|
612
|
+
const values: string[] = [];
|
|
613
|
+
const params: unknown[] = [];
|
|
614
|
+
let paramIndex = 1;
|
|
615
|
+
|
|
616
|
+
for (const item of batch) {
|
|
617
|
+
const vector = this.formatVector(item.vector);
|
|
618
|
+
const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
|
|
619
|
+
|
|
620
|
+
if (item.id !== undefined) {
|
|
621
|
+
values.push(`($${paramIndex++}, '${vector}'::vector, $${paramIndex++}::jsonb)`);
|
|
622
|
+
params.push(item.id, metadata);
|
|
623
|
+
} else {
|
|
624
|
+
values.push(`(gen_random_uuid(), '${vector}'::vector, $${paramIndex++}::jsonb)`);
|
|
625
|
+
params.push(metadata);
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
let sql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} `;
|
|
630
|
+
sql += `(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ${values.join(', ')}`;
|
|
631
|
+
|
|
632
|
+
if (upsert) {
|
|
633
|
+
const conflictCols = conflictColumns ?? ['id'];
|
|
634
|
+
sql += ` ON CONFLICT (${conflictCols.join(', ')}) DO UPDATE SET `;
|
|
635
|
+
sql += `${this.escapeIdentifier(vectorColumn)} = EXCLUDED.${this.escapeIdentifier(vectorColumn)}, `;
|
|
636
|
+
sql += `metadata = EXCLUDED.metadata`;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
sql += ' RETURNING id';
|
|
640
|
+
|
|
641
|
+
try {
|
|
642
|
+
const result = await client.query<{ id: string | number }>(sql, params);
|
|
643
|
+
|
|
644
|
+
for (let i = 0; i < result.rows.length; i++) {
|
|
645
|
+
results.push({
|
|
646
|
+
id: result.rows[i].id,
|
|
647
|
+
success: true,
|
|
648
|
+
batchIndex,
|
|
649
|
+
itemIndex: i,
|
|
650
|
+
});
|
|
651
|
+
}
|
|
652
|
+
} catch (error) {
|
|
653
|
+
// On batch failure, try individual inserts
|
|
654
|
+
for (let i = 0; i < batch.length; i++) {
|
|
655
|
+
const item = batch[i];
|
|
656
|
+
try {
|
|
657
|
+
const vector = this.formatVector(item.vector);
|
|
658
|
+
const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
|
|
659
|
+
|
|
660
|
+
const singleSql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} ` +
|
|
661
|
+
`(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ` +
|
|
662
|
+
`($1, '${vector}'::vector, $2::jsonb) RETURNING id`;
|
|
663
|
+
|
|
664
|
+
const singleResult = await client.query<{ id: string | number }>(
|
|
665
|
+
singleSql,
|
|
666
|
+
[item.id ?? null, metadata]
|
|
667
|
+
);
|
|
668
|
+
|
|
669
|
+
results.push({
|
|
670
|
+
id: singleResult.rows[0]?.id ?? item.id ?? 'unknown',
|
|
671
|
+
success: true,
|
|
672
|
+
batchIndex,
|
|
673
|
+
itemIndex: i,
|
|
674
|
+
});
|
|
675
|
+
} catch (itemError) {
|
|
676
|
+
results.push({
|
|
677
|
+
id: item.id ?? 'unknown',
|
|
678
|
+
success: false,
|
|
679
|
+
error: (itemError as Error).message,
|
|
680
|
+
batchIndex,
|
|
681
|
+
itemIndex: i,
|
|
682
|
+
});
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
return results;
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
// ===========================================================================
|
|
691
|
+
// Backpressure Handling
|
|
692
|
+
// ===========================================================================
|
|
693
|
+
|
|
694
|
+
/**
|
|
695
|
+
* Pause the stream (backpressure).
|
|
696
|
+
*/
|
|
697
|
+
pause(): void {
|
|
698
|
+
this.state.paused = true;
|
|
699
|
+
this.emit('pause');
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
/**
|
|
703
|
+
* Resume the stream.
|
|
704
|
+
*/
|
|
705
|
+
resume(): void {
|
|
706
|
+
this.state.paused = false;
|
|
707
|
+
if (this.state.drainResolve) {
|
|
708
|
+
this.state.drainResolve();
|
|
709
|
+
this.state.drainResolve = null;
|
|
710
|
+
this.state.drainPromise = null;
|
|
711
|
+
}
|
|
712
|
+
this.emit('resume');
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* Check if stream is paused.
|
|
717
|
+
*/
|
|
718
|
+
isPaused(): boolean {
|
|
719
|
+
return this.state.paused;
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
/**
|
|
723
|
+
* Wait if the stream is paused.
|
|
724
|
+
*/
|
|
725
|
+
private async waitIfPaused(): Promise<void> {
|
|
726
|
+
if (!this.state.paused) {
|
|
727
|
+
return;
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
if (!this.state.drainPromise) {
|
|
731
|
+
this.state.drainPromise = new Promise<void>(resolve => {
|
|
732
|
+
this.state.drainResolve = resolve;
|
|
733
|
+
});
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
await this.state.drainPromise;
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
// ===========================================================================
|
|
740
|
+
// Cleanup
|
|
741
|
+
// ===========================================================================
|
|
742
|
+
|
|
743
|
+
/**
|
|
744
|
+
* Abort all active operations.
|
|
745
|
+
*/
|
|
746
|
+
async abort(): Promise<void> {
|
|
747
|
+
// Close all active cursors
|
|
748
|
+
if (this.activeClient) {
|
|
749
|
+
const cursors = Array.from(this.activeCursors);
|
|
750
|
+
for (let i = 0; i < cursors.length; i++) {
|
|
751
|
+
const cursorName = cursors[i];
|
|
752
|
+
try {
|
|
753
|
+
await this.activeClient.query(`CLOSE ${this.escapeIdentifier(cursorName)}`);
|
|
754
|
+
} catch {
|
|
755
|
+
// Ignore errors
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
this.activeCursors.clear();
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
this.emit('abort');
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
// ===========================================================================
|
|
765
|
+
// Utility Methods
|
|
766
|
+
// ===========================================================================
|
|
767
|
+
|
|
768
|
+
/**
|
|
769
|
+
* Format vector for SQL.
|
|
770
|
+
*/
|
|
771
|
+
private formatVector(vector: number[] | Float32Array): string {
|
|
772
|
+
const arr = Array.isArray(vector) ? vector : Array.from(vector);
|
|
773
|
+
return `[${arr.join(',')}]`;
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
/**
|
|
777
|
+
* Parse vector from SQL result.
|
|
778
|
+
*/
|
|
779
|
+
private parseVector(vectorStr: string): number[] {
|
|
780
|
+
const cleaned = vectorStr.replace(/[\[\]{}]/g, '');
|
|
781
|
+
return cleaned.split(',').map(Number);
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
/**
|
|
785
|
+
* Escape SQL identifier.
|
|
786
|
+
*/
|
|
787
|
+
private escapeIdentifier(identifier: string): string {
|
|
788
|
+
return `"${identifier.replace(/"/g, '""')}"`;
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
// ============================================================================
|
|
793
|
+
// RuVectorTransaction Class
|
|
794
|
+
// ============================================================================
|
|
795
|
+
|
|
796
|
+
/**
|
|
797
|
+
* Enhanced transaction support for RuVector operations.
|
|
798
|
+
*
|
|
799
|
+
* Provides transaction management with:
|
|
800
|
+
* - Isolation levels (read_committed, repeatable_read, serializable)
|
|
801
|
+
* - Savepoints for partial rollback
|
|
802
|
+
* - Vector operations within transaction context
|
|
803
|
+
*
|
|
804
|
+
* @example
|
|
805
|
+
* ```typescript
|
|
806
|
+
* const tx = new RuVectorTransaction(client);
|
|
807
|
+
* await tx.begin('serializable');
|
|
808
|
+
*
|
|
809
|
+
* try {
|
|
810
|
+
* await tx.savepoint('before_insert');
|
|
811
|
+
* await tx.insert({ tableName: 'vectors', vectors: [...] });
|
|
812
|
+
*
|
|
813
|
+
* const results = await tx.search({ query: vector, k: 10 });
|
|
814
|
+
*
|
|
815
|
+
* if (results.length === 0) {
|
|
816
|
+
* await tx.rollbackToSavepoint('before_insert');
|
|
817
|
+
* }
|
|
818
|
+
*
|
|
819
|
+
* await tx.commit();
|
|
820
|
+
* } catch (error) {
|
|
821
|
+
* await tx.rollback();
|
|
822
|
+
* throw error;
|
|
823
|
+
* }
|
|
824
|
+
* ```
|
|
825
|
+
*/
|
|
826
|
+
export class RuVectorTransaction extends EventEmitter {
|
|
827
|
+
private readonly client: PoolClient;
|
|
828
|
+
private readonly schema?: string;
|
|
829
|
+
private readonly defaultTableName: string;
|
|
830
|
+
private transactionId: string | null = null;
|
|
831
|
+
private isActive = false;
|
|
832
|
+
private savepoints: Set<string> = new Set();
|
|
833
|
+
private queryCount = 0;
|
|
834
|
+
private startTime: number | null = null;
|
|
835
|
+
|
|
836
|
+
constructor(
|
|
837
|
+
client: PoolClient,
|
|
838
|
+
options: {
|
|
839
|
+
schema?: string;
|
|
840
|
+
defaultTableName?: string;
|
|
841
|
+
} = {}
|
|
842
|
+
) {
|
|
843
|
+
super();
|
|
844
|
+
this.client = client;
|
|
845
|
+
this.schema = options.schema;
|
|
846
|
+
this.defaultTableName = options.defaultTableName ?? 'vectors';
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
// ===========================================================================
|
|
850
|
+
// Transaction Control
|
|
851
|
+
// ===========================================================================
|
|
852
|
+
|
|
853
|
+
/**
|
|
854
|
+
* Begin a transaction with optional isolation level.
|
|
855
|
+
*
|
|
856
|
+
* @param isolation - Transaction isolation level
|
|
857
|
+
*/
|
|
858
|
+
async begin(isolation?: IsolationLevel): Promise<void> {
|
|
859
|
+
if (this.isActive) {
|
|
860
|
+
throw new Error('Transaction already active');
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
this.transactionId = `tx_${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
864
|
+
this.startTime = Date.now();
|
|
865
|
+
|
|
866
|
+
let sql = 'BEGIN';
|
|
867
|
+
if (isolation) {
|
|
868
|
+
sql += ` ISOLATION LEVEL ${isolation.replace('_', ' ').toUpperCase()}`;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
await this.client.query(sql);
|
|
872
|
+
this.isActive = true;
|
|
873
|
+
this.queryCount = 1;
|
|
874
|
+
|
|
875
|
+
this.emit('begin', { transactionId: this.transactionId, isolation });
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
/**
|
|
879
|
+
* Create a savepoint within the transaction.
|
|
880
|
+
*
|
|
881
|
+
* @param name - Savepoint name
|
|
882
|
+
*/
|
|
883
|
+
async savepoint(name: string): Promise<void> {
|
|
884
|
+
this.ensureActive();
|
|
885
|
+
|
|
886
|
+
const escapedName = this.escapeIdentifier(name);
|
|
887
|
+
await this.client.query(`SAVEPOINT ${escapedName}`);
|
|
888
|
+
this.savepoints.add(name);
|
|
889
|
+
this.queryCount++;
|
|
890
|
+
|
|
891
|
+
this.emit('savepoint', { transactionId: this.transactionId, name });
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
/**
|
|
895
|
+
* Rollback to a savepoint.
|
|
896
|
+
*
|
|
897
|
+
* @param name - Savepoint name
|
|
898
|
+
*/
|
|
899
|
+
async rollbackToSavepoint(name: string): Promise<void> {
|
|
900
|
+
this.ensureActive();
|
|
901
|
+
|
|
902
|
+
if (!this.savepoints.has(name)) {
|
|
903
|
+
throw new Error(`Savepoint '${name}' does not exist`);
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
const escapedName = this.escapeIdentifier(name);
|
|
907
|
+
await this.client.query(`ROLLBACK TO SAVEPOINT ${escapedName}`);
|
|
908
|
+
this.queryCount++;
|
|
909
|
+
|
|
910
|
+
this.emit('rollback_to_savepoint', { transactionId: this.transactionId, name });
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
/**
|
|
914
|
+
* Release a savepoint.
|
|
915
|
+
*
|
|
916
|
+
* @param name - Savepoint name
|
|
917
|
+
*/
|
|
918
|
+
async releaseSavepoint(name: string): Promise<void> {
|
|
919
|
+
this.ensureActive();
|
|
920
|
+
|
|
921
|
+
if (!this.savepoints.has(name)) {
|
|
922
|
+
throw new Error(`Savepoint '${name}' does not exist`);
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
const escapedName = this.escapeIdentifier(name);
|
|
926
|
+
await this.client.query(`RELEASE SAVEPOINT ${escapedName}`);
|
|
927
|
+
this.savepoints.delete(name);
|
|
928
|
+
this.queryCount++;
|
|
929
|
+
|
|
930
|
+
this.emit('release_savepoint', { transactionId: this.transactionId, name });
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
/**
|
|
934
|
+
* Commit the transaction.
|
|
935
|
+
*/
|
|
936
|
+
async commit(): Promise<void> {
|
|
937
|
+
this.ensureActive();
|
|
938
|
+
|
|
939
|
+
await this.client.query('COMMIT');
|
|
940
|
+
const durationMs = this.startTime ? Date.now() - this.startTime : 0;
|
|
941
|
+
|
|
942
|
+
this.emit('commit', {
|
|
943
|
+
transactionId: this.transactionId,
|
|
944
|
+
queryCount: this.queryCount,
|
|
945
|
+
durationMs,
|
|
946
|
+
});
|
|
947
|
+
|
|
948
|
+
this.cleanup();
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
/**
|
|
952
|
+
* Rollback the transaction.
|
|
953
|
+
*/
|
|
954
|
+
async rollback(): Promise<void> {
|
|
955
|
+
if (!this.isActive) {
|
|
956
|
+
return; // Already rolled back or not started
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
await this.client.query('ROLLBACK');
|
|
960
|
+
const durationMs = this.startTime ? Date.now() - this.startTime : 0;
|
|
961
|
+
|
|
962
|
+
this.emit('rollback', {
|
|
963
|
+
transactionId: this.transactionId,
|
|
964
|
+
queryCount: this.queryCount,
|
|
965
|
+
durationMs,
|
|
966
|
+
});
|
|
967
|
+
|
|
968
|
+
this.cleanup();
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
// ===========================================================================
|
|
972
|
+
// Vector Operations within Transaction
|
|
973
|
+
// ===========================================================================
|
|
974
|
+
|
|
975
|
+
/**
|
|
976
|
+
* Perform vector search within the transaction.
|
|
977
|
+
*/
|
|
978
|
+
async search(options: VectorSearchOptions): Promise<VectorSearchResult[]> {
|
|
979
|
+
this.ensureActive();
|
|
980
|
+
|
|
981
|
+
const { sql, params } = this.buildSearchQuery(options);
|
|
982
|
+
const result = await this.client.query<{
|
|
983
|
+
id: string | number;
|
|
984
|
+
distance: number;
|
|
985
|
+
[key: string]: unknown;
|
|
986
|
+
}>(sql, params);
|
|
987
|
+
|
|
988
|
+
this.queryCount++;
|
|
989
|
+
|
|
990
|
+
const metric = options.metric ?? 'cosine';
|
|
991
|
+
return result.rows.map((row, index) => {
|
|
992
|
+
const score = metric === 'cosine' || metric === 'dot'
|
|
993
|
+
? 1 - row.distance
|
|
994
|
+
: 1 / (1 + row.distance);
|
|
995
|
+
|
|
996
|
+
const searchResult: VectorSearchResult = {
|
|
997
|
+
id: row.id,
|
|
998
|
+
score,
|
|
999
|
+
distance: row.distance,
|
|
1000
|
+
rank: index + 1,
|
|
1001
|
+
retrievedAt: new Date(),
|
|
1002
|
+
};
|
|
1003
|
+
|
|
1004
|
+
if (options.includeVector && row[options.vectorColumn ?? 'embedding']) {
|
|
1005
|
+
(searchResult as { vector?: number[] }).vector = this.parseVector(
|
|
1006
|
+
row[options.vectorColumn ?? 'embedding'] as string
|
|
1007
|
+
);
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
if (options.includeMetadata && row.metadata) {
|
|
1011
|
+
(searchResult as { metadata?: Record<string, unknown> }).metadata =
|
|
1012
|
+
row.metadata as Record<string, unknown>;
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
return searchResult;
|
|
1016
|
+
});
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
/**
|
|
1020
|
+
* Insert vectors within the transaction.
|
|
1021
|
+
*/
|
|
1022
|
+
async insert(options: VectorInsertOptions): Promise<BatchResult<string>> {
|
|
1023
|
+
this.ensureActive();
|
|
1024
|
+
|
|
1025
|
+
const startTime = Date.now();
|
|
1026
|
+
const tableName = options.tableName ?? this.defaultTableName;
|
|
1027
|
+
const vectorColumn = options.vectorColumn ?? 'embedding';
|
|
1028
|
+
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
|
|
1029
|
+
|
|
1030
|
+
const successful: string[] = [];
|
|
1031
|
+
const errors: Array<{ index: number; message: string; input?: unknown }> = [];
|
|
1032
|
+
|
|
1033
|
+
// Build multi-row INSERT
|
|
1034
|
+
const values: string[] = [];
|
|
1035
|
+
const params: unknown[] = [];
|
|
1036
|
+
let paramIndex = 1;
|
|
1037
|
+
|
|
1038
|
+
for (const item of options.vectors) {
|
|
1039
|
+
const vector = this.formatVector(item.vector);
|
|
1040
|
+
const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
|
|
1041
|
+
|
|
1042
|
+
if (item.id !== undefined) {
|
|
1043
|
+
values.push(`($${paramIndex++}, '${vector}'::vector, $${paramIndex++}::jsonb)`);
|
|
1044
|
+
params.push(item.id, metadata);
|
|
1045
|
+
} else {
|
|
1046
|
+
values.push(`(gen_random_uuid(), '${vector}'::vector, $${paramIndex++}::jsonb)`);
|
|
1047
|
+
params.push(metadata);
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
let sql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} `;
|
|
1052
|
+
sql += `(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ${values.join(', ')}`;
|
|
1053
|
+
|
|
1054
|
+
if (options.upsert) {
|
|
1055
|
+
const conflictCols = options.conflictColumns ?? ['id'];
|
|
1056
|
+
sql += ` ON CONFLICT (${conflictCols.join(', ')}) DO UPDATE SET `;
|
|
1057
|
+
sql += `${this.escapeIdentifier(vectorColumn)} = EXCLUDED.${this.escapeIdentifier(vectorColumn)}, `;
|
|
1058
|
+
sql += `metadata = EXCLUDED.metadata`;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
sql += ' RETURNING id';
|
|
1062
|
+
|
|
1063
|
+
try {
|
|
1064
|
+
const result = await this.client.query<{ id: string }>(sql, params);
|
|
1065
|
+
this.queryCount++;
|
|
1066
|
+
|
|
1067
|
+
if (result.rows) {
|
|
1068
|
+
successful.push(...result.rows.map(r => String(r.id)));
|
|
1069
|
+
}
|
|
1070
|
+
} catch (error) {
|
|
1071
|
+
errors.push({
|
|
1072
|
+
index: 0,
|
|
1073
|
+
message: (error as Error).message,
|
|
1074
|
+
});
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
const durationMs = Date.now() - startTime;
|
|
1078
|
+
const insertedCount = successful.length;
|
|
1079
|
+
|
|
1080
|
+
return {
|
|
1081
|
+
total: options.vectors.length,
|
|
1082
|
+
successful: insertedCount,
|
|
1083
|
+
failed: options.vectors.length - insertedCount,
|
|
1084
|
+
results: successful,
|
|
1085
|
+
errors: errors.length > 0 ? errors : undefined,
|
|
1086
|
+
durationMs,
|
|
1087
|
+
throughput: insertedCount / (durationMs / 1000),
|
|
1088
|
+
};
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
/**
|
|
1092
|
+
* Update a vector within the transaction.
|
|
1093
|
+
*/
|
|
1094
|
+
async update(options: VectorUpdateOptions): Promise<boolean> {
|
|
1095
|
+
this.ensureActive();
|
|
1096
|
+
|
|
1097
|
+
const tableName = options.tableName ?? this.defaultTableName;
|
|
1098
|
+
const vectorColumn = options.vectorColumn ?? 'embedding';
|
|
1099
|
+
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
|
|
1100
|
+
|
|
1101
|
+
const setClauses: string[] = [];
|
|
1102
|
+
const params: unknown[] = [];
|
|
1103
|
+
let paramIndex = 1;
|
|
1104
|
+
|
|
1105
|
+
if (options.vector) {
|
|
1106
|
+
const vector = this.formatVector(options.vector);
|
|
1107
|
+
setClauses.push(`${this.escapeIdentifier(vectorColumn)} = '${vector}'::vector`);
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
if (options.metadata) {
|
|
1111
|
+
if (options.mergeMetadata) {
|
|
1112
|
+
setClauses.push(`metadata = metadata || $${paramIndex++}::jsonb`);
|
|
1113
|
+
} else {
|
|
1114
|
+
setClauses.push(`metadata = $${paramIndex++}::jsonb`);
|
|
1115
|
+
}
|
|
1116
|
+
params.push(JSON.stringify(options.metadata));
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
if (setClauses.length === 0) {
|
|
1120
|
+
return false;
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
params.push(options.id);
|
|
1124
|
+
const sql = `UPDATE ${schemaPrefix}${this.escapeIdentifier(tableName)} ` +
|
|
1125
|
+
`SET ${setClauses.join(', ')} WHERE id = $${paramIndex}`;
|
|
1126
|
+
|
|
1127
|
+
const result = await this.client.query(sql, params);
|
|
1128
|
+
this.queryCount++;
|
|
1129
|
+
|
|
1130
|
+
return (result.rowCount ?? 0) > 0;
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
/**
|
|
1134
|
+
* Delete vectors within the transaction.
|
|
1135
|
+
*
|
|
1136
|
+
* @param ids - IDs to delete
|
|
1137
|
+
* @param tableName - Table name (optional)
|
|
1138
|
+
* @returns Number of deleted rows
|
|
1139
|
+
*/
|
|
1140
|
+
async delete(ids: (string | number)[], tableName?: string): Promise<number> {
|
|
1141
|
+
this.ensureActive();
|
|
1142
|
+
|
|
1143
|
+
const table = tableName ?? this.defaultTableName;
|
|
1144
|
+
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
|
|
1145
|
+
|
|
1146
|
+
const placeholders = ids.map((_, i) => `$${i + 1}`).join(', ');
|
|
1147
|
+
const sql = `DELETE FROM ${schemaPrefix}${this.escapeIdentifier(table)} WHERE id IN (${placeholders})`;
|
|
1148
|
+
|
|
1149
|
+
const result = await this.client.query(sql, ids);
|
|
1150
|
+
this.queryCount++;
|
|
1151
|
+
|
|
1152
|
+
return result.rowCount ?? 0;
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
/**
|
|
1156
|
+
* Execute a raw query within the transaction.
|
|
1157
|
+
*/
|
|
1158
|
+
async query<T = Record<string, unknown>>(
|
|
1159
|
+
sql: string,
|
|
1160
|
+
params?: unknown[]
|
|
1161
|
+
): Promise<QueryResult<T>> {
|
|
1162
|
+
this.ensureActive();
|
|
1163
|
+
|
|
1164
|
+
const startTime = Date.now();
|
|
1165
|
+
const result = await this.client.query<T>(sql, params);
|
|
1166
|
+
this.queryCount++;
|
|
1167
|
+
|
|
1168
|
+
return {
|
|
1169
|
+
rows: result.rows,
|
|
1170
|
+
rowCount: result.rowCount ?? 0,
|
|
1171
|
+
durationMs: Date.now() - startTime,
|
|
1172
|
+
command: result.command,
|
|
1173
|
+
};
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
// ===========================================================================
|
|
1177
|
+
// Utility Methods
|
|
1178
|
+
// ===========================================================================
|
|
1179
|
+
|
|
1180
|
+
/**
|
|
1181
|
+
* Get transaction status.
|
|
1182
|
+
*/
|
|
1183
|
+
getStatus(): {
|
|
1184
|
+
transactionId: string | null;
|
|
1185
|
+
isActive: boolean;
|
|
1186
|
+
savepoints: string[];
|
|
1187
|
+
queryCount: number;
|
|
1188
|
+
durationMs: number;
|
|
1189
|
+
} {
|
|
1190
|
+
return {
|
|
1191
|
+
transactionId: this.transactionId,
|
|
1192
|
+
isActive: this.isActive,
|
|
1193
|
+
savepoints: Array.from(this.savepoints),
|
|
1194
|
+
queryCount: this.queryCount,
|
|
1195
|
+
durationMs: this.startTime ? Date.now() - this.startTime : 0,
|
|
1196
|
+
};
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
/**
|
|
1200
|
+
* Ensure transaction is active.
|
|
1201
|
+
*/
|
|
1202
|
+
private ensureActive(): void {
|
|
1203
|
+
if (!this.isActive) {
|
|
1204
|
+
throw new Error('Transaction is not active. Call begin() first.');
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
/**
|
|
1209
|
+
* Build search query SQL.
|
|
1210
|
+
*/
|
|
1211
|
+
private buildSearchQuery(options: VectorSearchOptions): { sql: string; params: unknown[] } {
|
|
1212
|
+
const tableName = options.tableName ?? this.defaultTableName;
|
|
1213
|
+
const vectorColumn = options.vectorColumn ?? 'embedding';
|
|
1214
|
+
const metric = options.metric ?? 'cosine';
|
|
1215
|
+
const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
|
|
1216
|
+
|
|
1217
|
+
const queryVector = this.formatVector(options.query);
|
|
1218
|
+
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
|
|
1219
|
+
|
|
1220
|
+
const selectColumns = options.selectColumns ?? ['id'];
|
|
1221
|
+
const columnList = [...selectColumns];
|
|
1222
|
+
|
|
1223
|
+
if (options.includeVector) {
|
|
1224
|
+
columnList.push(vectorColumn);
|
|
1225
|
+
}
|
|
1226
|
+
if (options.includeMetadata) {
|
|
1227
|
+
columnList.push('metadata');
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
const distanceExpr = `${this.escapeIdentifier(vectorColumn)} ${operator} '${queryVector}'::vector`;
|
|
1231
|
+
columnList.push(`(${distanceExpr}) as distance`);
|
|
1232
|
+
|
|
1233
|
+
const whereClauses: string[] = [];
|
|
1234
|
+
const params: unknown[] = [];
|
|
1235
|
+
let paramIndex = 1;
|
|
1236
|
+
|
|
1237
|
+
if (options.filter) {
|
|
1238
|
+
for (const [key, value] of Object.entries(options.filter)) {
|
|
1239
|
+
if (key === 'metadata') {
|
|
1240
|
+
whereClauses.push(`metadata @> $${paramIndex++}::jsonb`);
|
|
1241
|
+
params.push(JSON.stringify(value));
|
|
1242
|
+
} else {
|
|
1243
|
+
whereClauses.push(`${this.escapeIdentifier(key)} = $${paramIndex++}`);
|
|
1244
|
+
params.push(value);
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
let sql = `SELECT ${columnList.join(', ')} FROM ${schemaPrefix}${this.escapeIdentifier(tableName)}`;
|
|
1250
|
+
|
|
1251
|
+
if (whereClauses.length > 0) {
|
|
1252
|
+
sql += ` WHERE ${whereClauses.join(' AND ')}`;
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
sql += ` ORDER BY ${distanceExpr} ASC`;
|
|
1256
|
+
sql += ` LIMIT ${options.k}`;
|
|
1257
|
+
|
|
1258
|
+
return { sql, params };
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
/**
|
|
1262
|
+
* Cleanup transaction state.
|
|
1263
|
+
*/
|
|
1264
|
+
private cleanup(): void {
|
|
1265
|
+
this.isActive = false;
|
|
1266
|
+
this.savepoints.clear();
|
|
1267
|
+
this.transactionId = null;
|
|
1268
|
+
this.startTime = null;
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
/**
|
|
1272
|
+
* Format vector for SQL.
|
|
1273
|
+
*/
|
|
1274
|
+
private formatVector(vector: number[] | Float32Array): string {
|
|
1275
|
+
const arr = Array.isArray(vector) ? vector : Array.from(vector);
|
|
1276
|
+
return `[${arr.join(',')}]`;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
/**
|
|
1280
|
+
* Parse vector from SQL result.
|
|
1281
|
+
*/
|
|
1282
|
+
private parseVector(vectorStr: string): number[] {
|
|
1283
|
+
const cleaned = vectorStr.replace(/[\[\]{}]/g, '');
|
|
1284
|
+
return cleaned.split(',').map(Number);
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
/**
|
|
1288
|
+
* Escape SQL identifier.
|
|
1289
|
+
*/
|
|
1290
|
+
private escapeIdentifier(identifier: string): string {
|
|
1291
|
+
return `"${identifier.replace(/"/g, '""')}"`;
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
// ============================================================================
|
|
1296
|
+
// BatchProcessor Class
|
|
1297
|
+
// ============================================================================
|
|
1298
|
+
|
|
1299
|
+
/**
|
|
1300
|
+
* Batch processor for large dataset operations.
|
|
1301
|
+
*
|
|
1302
|
+
* Provides efficient processing of large datasets with configurable
|
|
1303
|
+
* batch sizes, concurrency, and error handling.
|
|
1304
|
+
*
|
|
1305
|
+
* @example
|
|
1306
|
+
* ```typescript
|
|
1307
|
+
* const processor = new BatchProcessor(bridge, { batchSize: 500, concurrency: 4 });
|
|
1308
|
+
*
|
|
1309
|
+
* async function* loadData() {
|
|
1310
|
+
* for (const item of massiveDataset) {
|
|
1311
|
+
* yield item;
|
|
1312
|
+
* }
|
|
1313
|
+
* }
|
|
1314
|
+
*
|
|
1315
|
+
* for await (const result of processor.processBatch(loadData(), async (batch) => {
|
|
1316
|
+
* return batch.map(item => processItem(item));
|
|
1317
|
+
* })) {
|
|
1318
|
+
* console.log(result);
|
|
1319
|
+
* }
|
|
1320
|
+
* ```
|
|
1321
|
+
*/
|
|
1322
|
+
export class BatchProcessor extends EventEmitter {
|
|
1323
|
+
private readonly pool: Pool;
|
|
1324
|
+
private readonly options: Required<BatchOptions>;
|
|
1325
|
+
private readonly schema?: string;
|
|
1326
|
+
|
|
1327
|
+
constructor(
|
|
1328
|
+
pool: Pool,
|
|
1329
|
+
options: BatchOptions & { schema?: string } = {}
|
|
1330
|
+
) {
|
|
1331
|
+
super();
|
|
1332
|
+
this.pool = pool;
|
|
1333
|
+
this.schema = options.schema;
|
|
1334
|
+
this.options = {
|
|
1335
|
+
batchSize: options.batchSize ?? DEFAULT_BATCH_SIZE,
|
|
1336
|
+
concurrency: options.concurrency ?? DEFAULT_CONCURRENCY,
|
|
1337
|
+
retryOnFailure: options.retryOnFailure ?? true,
|
|
1338
|
+
maxRetries: options.maxRetries ?? 3,
|
|
1339
|
+
useTransaction: options.useTransaction ?? false,
|
|
1340
|
+
};
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
/**
|
|
1344
|
+
* Process items in batches with custom processor function.
|
|
1345
|
+
*
|
|
1346
|
+
* @param items - Async iterable of items to process
|
|
1347
|
+
* @param processor - Batch processing function
|
|
1348
|
+
* @param options - Processing options
|
|
1349
|
+
* @yields Processed results
|
|
1350
|
+
*/
|
|
1351
|
+
async *processBatch<T, R>(
|
|
1352
|
+
items: AsyncIterable<T>,
|
|
1353
|
+
processor: (batch: T[]) => Promise<R[]>,
|
|
1354
|
+
options?: {
|
|
1355
|
+
batchSize?: number;
|
|
1356
|
+
concurrency?: number;
|
|
1357
|
+
onBatchComplete?: (batchIndex: number, results: R[]) => void;
|
|
1358
|
+
}
|
|
1359
|
+
): AsyncGenerator<R, void, undefined> {
|
|
1360
|
+
const batchSize = options?.batchSize ?? this.options.batchSize;
|
|
1361
|
+
const concurrency = options?.concurrency ?? this.options.concurrency;
|
|
1362
|
+
|
|
1363
|
+
let batch: T[] = [];
|
|
1364
|
+
let batchIndex = 0;
|
|
1365
|
+
const pendingBatches: Promise<{ index: number; results: R[] }>[] = [];
|
|
1366
|
+
|
|
1367
|
+
// Process items and accumulate into batches
|
|
1368
|
+
for await (const item of items) {
|
|
1369
|
+
batch.push(item);
|
|
1370
|
+
|
|
1371
|
+
if (batch.length >= batchSize) {
|
|
1372
|
+
const currentBatch = batch;
|
|
1373
|
+
const currentIndex = batchIndex;
|
|
1374
|
+
batch = [];
|
|
1375
|
+
batchIndex++;
|
|
1376
|
+
|
|
1377
|
+
// Add batch to processing queue
|
|
1378
|
+
const batchPromise = this.processSingleBatch(
|
|
1379
|
+
currentBatch,
|
|
1380
|
+
processor,
|
|
1381
|
+
currentIndex
|
|
1382
|
+
).then(results => {
|
|
1383
|
+
options?.onBatchComplete?.(currentIndex, results);
|
|
1384
|
+
return { index: currentIndex, results };
|
|
1385
|
+
});
|
|
1386
|
+
|
|
1387
|
+
pendingBatches.push(batchPromise);
|
|
1388
|
+
|
|
1389
|
+
// Yield results when we have enough pending batches
|
|
1390
|
+
if (pendingBatches.length >= concurrency) {
|
|
1391
|
+
const completed = await Promise.race(
|
|
1392
|
+
pendingBatches.map((p, i) => p.then(r => ({ ...r, promiseIndex: i })))
|
|
1393
|
+
);
|
|
1394
|
+
|
|
1395
|
+
// Remove completed batch from pending
|
|
1396
|
+
pendingBatches.splice(completed.promiseIndex, 1);
|
|
1397
|
+
|
|
1398
|
+
for (const result of completed.results) {
|
|
1399
|
+
yield result;
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
// Process remaining batch
|
|
1406
|
+
if (batch.length > 0) {
|
|
1407
|
+
const results = await this.processSingleBatch(batch, processor, batchIndex);
|
|
1408
|
+
options?.onBatchComplete?.(batchIndex, results);
|
|
1409
|
+
for (const result of results) {
|
|
1410
|
+
yield result;
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1414
|
+
// Wait for remaining pending batches
|
|
1415
|
+
const remainingResults = await Promise.all(pendingBatches);
|
|
1416
|
+
for (const { results } of remainingResults.sort((a, b) => a.index - b.index)) {
|
|
1417
|
+
for (const result of results) {
|
|
1418
|
+
yield result;
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
/**
|
|
1424
|
+
* Perform parallel search across multiple queries.
|
|
1425
|
+
*
|
|
1426
|
+
* @param queries - Array of query vectors
|
|
1427
|
+
* @param options - Search options
|
|
1428
|
+
* @returns Array of search results for each query
|
|
1429
|
+
*/
|
|
1430
|
+
async parallelSearch(
|
|
1431
|
+
queries: number[][],
|
|
1432
|
+
options: Omit<VectorSearchOptions, 'query'>
|
|
1433
|
+
): Promise<VectorSearchResult[][]> {
|
|
1434
|
+
const concurrency = this.options.concurrency;
|
|
1435
|
+
const results: VectorSearchResult[][] = new Array(queries.length);
|
|
1436
|
+
|
|
1437
|
+
// Process queries in parallel batches
|
|
1438
|
+
for (let i = 0; i < queries.length; i += concurrency) {
|
|
1439
|
+
const batchQueries = queries.slice(i, i + concurrency);
|
|
1440
|
+
const batchResults = await Promise.all(
|
|
1441
|
+
batchQueries.map((query, j) =>
|
|
1442
|
+
this.executeSingleSearch({ ...options, query } as VectorSearchOptions)
|
|
1443
|
+
.then(r => ({ index: i + j, results: r }))
|
|
1444
|
+
)
|
|
1445
|
+
);
|
|
1446
|
+
|
|
1447
|
+
for (const { index, results: searchResults } of batchResults) {
|
|
1448
|
+
results[index] = searchResults;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
this.emit('batch_search_complete', {
|
|
1452
|
+
batchStart: i,
|
|
1453
|
+
batchEnd: Math.min(i + concurrency, queries.length),
|
|
1454
|
+
total: queries.length,
|
|
1455
|
+
});
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
return results;
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
/**
|
|
1462
|
+
* Process a single batch with retry support.
|
|
1463
|
+
*/
|
|
1464
|
+
private async processSingleBatch<T, R>(
|
|
1465
|
+
batch: T[],
|
|
1466
|
+
processor: (batch: T[]) => Promise<R[]>,
|
|
1467
|
+
batchIndex: number
|
|
1468
|
+
): Promise<R[]> {
|
|
1469
|
+
let attempt = 0;
|
|
1470
|
+
let lastError: Error | null = null;
|
|
1471
|
+
|
|
1472
|
+
while (attempt < this.options.maxRetries) {
|
|
1473
|
+
attempt++;
|
|
1474
|
+
try {
|
|
1475
|
+
const results = await processor(batch);
|
|
1476
|
+
this.emit('batch_complete', { batchIndex, attempt, success: true });
|
|
1477
|
+
return results;
|
|
1478
|
+
} catch (error) {
|
|
1479
|
+
lastError = error as Error;
|
|
1480
|
+
this.emit('batch_error', { batchIndex, attempt, error: lastError });
|
|
1481
|
+
|
|
1482
|
+
if (!this.options.retryOnFailure || attempt >= this.options.maxRetries) {
|
|
1483
|
+
break;
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
// Exponential backoff
|
|
1487
|
+
await this.sleep(Math.min(1000 * Math.pow(2, attempt - 1), 10000));
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
1490
|
+
|
|
1491
|
+
throw new Error(`Batch ${batchIndex} failed after ${attempt} attempts: ${lastError?.message}`);
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
/**
|
|
1495
|
+
* Execute a single search query.
|
|
1496
|
+
*/
|
|
1497
|
+
private async executeSingleSearch(
|
|
1498
|
+
options: VectorSearchOptions
|
|
1499
|
+
): Promise<VectorSearchResult[]> {
|
|
1500
|
+
const client = await this.pool.connect();
|
|
1501
|
+
try {
|
|
1502
|
+
const { sql, params } = this.buildSearchQuery(options);
|
|
1503
|
+
const result = await client.query<{
|
|
1504
|
+
id: string | number;
|
|
1505
|
+
distance: number;
|
|
1506
|
+
[key: string]: unknown;
|
|
1507
|
+
}>(sql, params);
|
|
1508
|
+
|
|
1509
|
+
const metric = options.metric ?? 'cosine';
|
|
1510
|
+
return result.rows.map((row, index) => {
|
|
1511
|
+
const score = metric === 'cosine' || metric === 'dot'
|
|
1512
|
+
? 1 - row.distance
|
|
1513
|
+
: 1 / (1 + row.distance);
|
|
1514
|
+
|
|
1515
|
+
return {
|
|
1516
|
+
id: row.id,
|
|
1517
|
+
score,
|
|
1518
|
+
distance: row.distance,
|
|
1519
|
+
rank: index + 1,
|
|
1520
|
+
retrievedAt: new Date(),
|
|
1521
|
+
};
|
|
1522
|
+
});
|
|
1523
|
+
} finally {
|
|
1524
|
+
client.release();
|
|
1525
|
+
}
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1528
|
+
/**
|
|
1529
|
+
* Build search query SQL.
|
|
1530
|
+
*/
|
|
1531
|
+
private buildSearchQuery(options: VectorSearchOptions): { sql: string; params: unknown[] } {
|
|
1532
|
+
const tableName = options.tableName ?? 'vectors';
|
|
1533
|
+
const vectorColumn = options.vectorColumn ?? 'embedding';
|
|
1534
|
+
const metric = options.metric ?? 'cosine';
|
|
1535
|
+
const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
|
|
1536
|
+
|
|
1537
|
+
const queryVector = this.formatVector(options.query);
|
|
1538
|
+
const schemaPrefix = this.schema ? `"${this.schema}".` : '';
|
|
1539
|
+
|
|
1540
|
+
const selectColumns = options.selectColumns ?? ['id'];
|
|
1541
|
+
const distanceExpr = `"${vectorColumn}" ${operator} '${queryVector}'::vector`;
|
|
1542
|
+
|
|
1543
|
+
let sql = `SELECT ${selectColumns.join(', ')}, (${distanceExpr}) as distance ` +
|
|
1544
|
+
`FROM ${schemaPrefix}"${tableName}" ` +
|
|
1545
|
+
`ORDER BY ${distanceExpr} ASC ` +
|
|
1546
|
+
`LIMIT ${options.k}`;
|
|
1547
|
+
|
|
1548
|
+
return { sql, params: [] };
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
/**
|
|
1552
|
+
* Format vector for SQL.
|
|
1553
|
+
*/
|
|
1554
|
+
private formatVector(vector: number[] | Float32Array): string {
|
|
1555
|
+
const arr = Array.isArray(vector) ? vector : Array.from(vector);
|
|
1556
|
+
return `[${arr.join(',')}]`;
|
|
1557
|
+
}
|
|
1558
|
+
|
|
1559
|
+
/**
|
|
1560
|
+
* Sleep utility.
|
|
1561
|
+
*/
|
|
1562
|
+
private sleep(ms: number): Promise<void> {
|
|
1563
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
1564
|
+
}
|
|
1565
|
+
}
|
|
1566
|
+
|
|
1567
|
+
// ============================================================================
|
|
1568
|
+
// PoolEventEmitter Class
|
|
1569
|
+
// ============================================================================
|
|
1570
|
+
|
|
1571
|
+
/**
|
|
1572
|
+
* Event emitter for connection pool lifecycle events.
|
|
1573
|
+
*
|
|
1574
|
+
* Provides typed event handling for pool operations.
|
|
1575
|
+
*
|
|
1576
|
+
* @example
|
|
1577
|
+
* ```typescript
|
|
1578
|
+
* const poolEvents = new PoolEventEmitter(pool);
|
|
1579
|
+
*
|
|
1580
|
+
* poolEvents.on('pool:connect', (client) => {
|
|
1581
|
+
* console.log('Client connected');
|
|
1582
|
+
* });
|
|
1583
|
+
*
|
|
1584
|
+
* poolEvents.on('pool:error', (error, client) => {
|
|
1585
|
+
* console.error('Pool error:', error);
|
|
1586
|
+
* });
|
|
1587
|
+
* ```
|
|
1588
|
+
*/
|
|
1589
|
+
export class PoolEventEmitter extends EventEmitter {
|
|
1590
|
+
private readonly pool: Pool;
|
|
1591
|
+
|
|
1592
|
+
constructor(pool: Pool) {
|
|
1593
|
+
super();
|
|
1594
|
+
this.pool = pool;
|
|
1595
|
+
this.setupListeners();
|
|
1596
|
+
}
|
|
1597
|
+
|
|
1598
|
+
/**
|
|
1599
|
+
* Add typed event listener.
|
|
1600
|
+
*/
|
|
1601
|
+
on<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
|
|
1602
|
+
return super.on(event, listener as (...args: unknown[]) => void);
|
|
1603
|
+
}
|
|
1604
|
+
|
|
1605
|
+
/**
|
|
1606
|
+
* Add one-time typed event listener.
|
|
1607
|
+
*/
|
|
1608
|
+
once<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
|
|
1609
|
+
return super.once(event, listener as (...args: unknown[]) => void);
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
/**
|
|
1613
|
+
* Remove typed event listener.
|
|
1614
|
+
*/
|
|
1615
|
+
off<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
|
|
1616
|
+
return super.off(event, listener as (...args: unknown[]) => void);
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
/**
|
|
1620
|
+
* Emit typed event.
|
|
1621
|
+
*/
|
|
1622
|
+
emit<K extends keyof PoolEvents>(
|
|
1623
|
+
event: K,
|
|
1624
|
+
...args: Parameters<PoolEvents[K]>
|
|
1625
|
+
): boolean {
|
|
1626
|
+
return super.emit(event, ...args);
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1629
|
+
/**
|
|
1630
|
+
* Get current pool statistics.
|
|
1631
|
+
*/
|
|
1632
|
+
getStats(): {
|
|
1633
|
+
totalCount: number;
|
|
1634
|
+
idleCount: number;
|
|
1635
|
+
waitingCount: number;
|
|
1636
|
+
} {
|
|
1637
|
+
return {
|
|
1638
|
+
totalCount: this.pool.totalCount,
|
|
1639
|
+
idleCount: this.pool.idleCount,
|
|
1640
|
+
waitingCount: this.pool.waitingCount,
|
|
1641
|
+
};
|
|
1642
|
+
}
|
|
1643
|
+
|
|
1644
|
+
/**
|
|
1645
|
+
* Setup pool event listeners.
|
|
1646
|
+
*/
|
|
1647
|
+
private setupListeners(): void {
|
|
1648
|
+
this.pool.on('connect', (...args: unknown[]) => {
|
|
1649
|
+
const client = args[0] as PoolClient;
|
|
1650
|
+
this.emit('pool:connect', client);
|
|
1651
|
+
});
|
|
1652
|
+
|
|
1653
|
+
this.pool.on('acquire', (...args: unknown[]) => {
|
|
1654
|
+
const client = args[0] as PoolClient;
|
|
1655
|
+
this.emit('pool:acquire', client);
|
|
1656
|
+
});
|
|
1657
|
+
|
|
1658
|
+
this.pool.on('release', (...args: unknown[]) => {
|
|
1659
|
+
const client = args[0] as PoolClient;
|
|
1660
|
+
this.emit('pool:release', client);
|
|
1661
|
+
});
|
|
1662
|
+
|
|
1663
|
+
this.pool.on('remove', (...args: unknown[]) => {
|
|
1664
|
+
const client = args[0] as PoolClient;
|
|
1665
|
+
this.emit('pool:remove', client);
|
|
1666
|
+
});
|
|
1667
|
+
|
|
1668
|
+
this.pool.on('error', (...args: unknown[]) => {
|
|
1669
|
+
const error = args[0] as Error;
|
|
1670
|
+
const client = args[1] as PoolClient | undefined;
|
|
1671
|
+
this.emit('pool:error', error, client);
|
|
1672
|
+
});
|
|
1673
|
+
}
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
// ============================================================================
|
|
1677
|
+
// Factory Functions
|
|
1678
|
+
// ============================================================================
|
|
1679
|
+
|
|
1680
|
+
/**
|
|
1681
|
+
* Create a new RuVectorStream instance.
|
|
1682
|
+
*/
|
|
1683
|
+
export function createRuVectorStream(
|
|
1684
|
+
pool: Pool,
|
|
1685
|
+
options?: {
|
|
1686
|
+
schema?: string;
|
|
1687
|
+
defaultTableName?: string;
|
|
1688
|
+
highWaterMark?: number;
|
|
1689
|
+
}
|
|
1690
|
+
): RuVectorStream {
|
|
1691
|
+
return new RuVectorStream(pool, options);
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1694
|
+
/**
|
|
1695
|
+
* Create a new RuVectorTransaction instance.
|
|
1696
|
+
*/
|
|
1697
|
+
export function createRuVectorTransaction(
|
|
1698
|
+
client: PoolClient,
|
|
1699
|
+
options?: {
|
|
1700
|
+
schema?: string;
|
|
1701
|
+
defaultTableName?: string;
|
|
1702
|
+
}
|
|
1703
|
+
): RuVectorTransaction {
|
|
1704
|
+
return new RuVectorTransaction(client, options);
|
|
1705
|
+
}
|
|
1706
|
+
|
|
1707
|
+
/**
|
|
1708
|
+
* Create a new BatchProcessor instance.
|
|
1709
|
+
*/
|
|
1710
|
+
export function createBatchProcessor(
|
|
1711
|
+
pool: Pool,
|
|
1712
|
+
options?: BatchOptions & { schema?: string }
|
|
1713
|
+
): BatchProcessor {
|
|
1714
|
+
return new BatchProcessor(pool, options);
|
|
1715
|
+
}
|
|
1716
|
+
|
|
1717
|
+
/**
|
|
1718
|
+
* Create a new PoolEventEmitter instance.
|
|
1719
|
+
*/
|
|
1720
|
+
export function createPoolEventEmitter(pool: Pool): PoolEventEmitter {
|
|
1721
|
+
return new PoolEventEmitter(pool);
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
// ============================================================================
|
|
1725
|
+
// Default Export
|
|
1726
|
+
// ============================================================================
|
|
1727
|
+
|
|
1728
|
+
export default {
|
|
1729
|
+
RuVectorStream,
|
|
1730
|
+
RuVectorTransaction,
|
|
1731
|
+
BatchProcessor,
|
|
1732
|
+
PoolEventEmitter,
|
|
1733
|
+
createRuVectorStream,
|
|
1734
|
+
createRuVectorTransaction,
|
|
1735
|
+
createBatchProcessor,
|
|
1736
|
+
createPoolEventEmitter,
|
|
1737
|
+
};
|