@sparkleideas/plugins 3.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +401 -0
  2. package/__tests__/collection-manager.test.ts +332 -0
  3. package/__tests__/dependency-graph.test.ts +434 -0
  4. package/__tests__/enhanced-plugin-registry.test.ts +488 -0
  5. package/__tests__/plugin-registry.test.ts +368 -0
  6. package/__tests__/ruvector-bridge.test.ts +2429 -0
  7. package/__tests__/ruvector-integration.test.ts +1602 -0
  8. package/__tests__/ruvector-migrations.test.ts +1099 -0
  9. package/__tests__/ruvector-quantization.test.ts +846 -0
  10. package/__tests__/ruvector-streaming.test.ts +1088 -0
  11. package/__tests__/sdk.test.ts +325 -0
  12. package/__tests__/security.test.ts +348 -0
  13. package/__tests__/utils/ruvector-test-utils.ts +860 -0
  14. package/examples/plugin-creator/index.ts +636 -0
  15. package/examples/plugin-creator/plugin-creator.test.ts +312 -0
  16. package/examples/ruvector/README.md +288 -0
  17. package/examples/ruvector/attention-patterns.ts +394 -0
  18. package/examples/ruvector/basic-usage.ts +288 -0
  19. package/examples/ruvector/docker-compose.yml +75 -0
  20. package/examples/ruvector/gnn-analysis.ts +501 -0
  21. package/examples/ruvector/hyperbolic-hierarchies.ts +557 -0
  22. package/examples/ruvector/init-db.sql +119 -0
  23. package/examples/ruvector/quantization.ts +680 -0
  24. package/examples/ruvector/self-learning.ts +447 -0
  25. package/examples/ruvector/semantic-search.ts +576 -0
  26. package/examples/ruvector/streaming-large-data.ts +507 -0
  27. package/examples/ruvector/transactions.ts +594 -0
  28. package/examples/ruvector-plugins/hook-pattern-library.ts +486 -0
  29. package/examples/ruvector-plugins/index.ts +79 -0
  30. package/examples/ruvector-plugins/intent-router.ts +354 -0
  31. package/examples/ruvector-plugins/mcp-tool-optimizer.ts +424 -0
  32. package/examples/ruvector-plugins/reasoning-bank.ts +657 -0
  33. package/examples/ruvector-plugins/ruvector-plugins.test.ts +518 -0
  34. package/examples/ruvector-plugins/semantic-code-search.ts +498 -0
  35. package/examples/ruvector-plugins/shared/index.ts +20 -0
  36. package/examples/ruvector-plugins/shared/vector-utils.ts +257 -0
  37. package/examples/ruvector-plugins/sona-learning.ts +445 -0
  38. package/package.json +97 -0
  39. package/src/collections/collection-manager.ts +661 -0
  40. package/src/collections/index.ts +56 -0
  41. package/src/collections/official/index.ts +1040 -0
  42. package/src/core/base-plugin.ts +416 -0
  43. package/src/core/plugin-interface.ts +215 -0
  44. package/src/hooks/index.ts +685 -0
  45. package/src/index.ts +378 -0
  46. package/src/integrations/agentic-flow.ts +743 -0
  47. package/src/integrations/index.ts +88 -0
  48. package/src/integrations/ruvector/ARCHITECTURE.md +1245 -0
  49. package/src/integrations/ruvector/attention-advanced.ts +1040 -0
  50. package/src/integrations/ruvector/attention-executor.ts +782 -0
  51. package/src/integrations/ruvector/attention-mechanisms.ts +757 -0
  52. package/src/integrations/ruvector/attention.ts +1063 -0
  53. package/src/integrations/ruvector/gnn.ts +3050 -0
  54. package/src/integrations/ruvector/hyperbolic.ts +1948 -0
  55. package/src/integrations/ruvector/index.ts +394 -0
  56. package/src/integrations/ruvector/migrations/001_create_extension.sql +135 -0
  57. package/src/integrations/ruvector/migrations/002_create_vector_tables.sql +259 -0
  58. package/src/integrations/ruvector/migrations/003_create_indices.sql +328 -0
  59. package/src/integrations/ruvector/migrations/004_create_functions.sql +598 -0
  60. package/src/integrations/ruvector/migrations/005_create_attention_functions.sql +654 -0
  61. package/src/integrations/ruvector/migrations/006_create_gnn_functions.sql +728 -0
  62. package/src/integrations/ruvector/migrations/007_create_hyperbolic_functions.sql +762 -0
  63. package/src/integrations/ruvector/migrations/index.ts +35 -0
  64. package/src/integrations/ruvector/migrations/migrations.ts +647 -0
  65. package/src/integrations/ruvector/quantization.ts +2036 -0
  66. package/src/integrations/ruvector/ruvector-bridge.ts +2000 -0
  67. package/src/integrations/ruvector/self-learning.ts +2376 -0
  68. package/src/integrations/ruvector/streaming.ts +1737 -0
  69. package/src/integrations/ruvector/types.ts +1945 -0
  70. package/src/providers/index.ts +643 -0
  71. package/src/registry/dependency-graph.ts +568 -0
  72. package/src/registry/enhanced-plugin-registry.ts +994 -0
  73. package/src/registry/plugin-registry.ts +604 -0
  74. package/src/sdk/index.ts +563 -0
  75. package/src/security/index.ts +594 -0
  76. package/src/types/index.ts +446 -0
  77. package/src/workers/index.ts +700 -0
  78. package/tmp.json +0 -0
  79. package/tsconfig.json +25 -0
  80. package/vitest.config.ts +23 -0
@@ -0,0 +1,1737 @@
1
+ /**
2
+ * RuVector PostgreSQL Bridge - Streaming and Transaction Support
3
+ *
4
+ * Provides streaming capabilities for large result sets and batch operations,
5
+ * enhanced transaction handling with savepoints and isolation levels,
6
+ * and efficient batch processing with backpressure handling.
7
+ *
8
+ * @module @sparkleideas/plugins/integrations/ruvector/streaming
9
+ * @version 1.0.0
10
+ */
11
+
12
+ import { EventEmitter } from 'events';
13
+ import type {
14
+ VectorSearchOptions,
15
+ VectorSearchResult,
16
+ VectorInsertOptions,
17
+ VectorUpdateOptions,
18
+ BatchResult,
19
+ DistanceMetric,
20
+ QueryResult,
21
+ } from './types.js';
22
+
23
+ // ============================================================================
24
+ // Type Definitions
25
+ // ============================================================================
26
+
27
+ /**
28
+ * PostgreSQL PoolClient interface (from pg package).
29
+ */
30
+ export interface PoolClient {
31
+ query<T = unknown>(text: string, values?: unknown[]): Promise<PgQueryResult<T>>;
32
+ release(err?: Error): void;
33
+ }
34
+
35
+ /**
36
+ * PostgreSQL query result interface.
37
+ */
38
+ interface PgQueryResult<T> {
39
+ rows: T[];
40
+ rowCount: number | null;
41
+ command: string;
42
+ fields?: Array<{ name: string; dataTypeID: number }>;
43
+ }
44
+
45
+ /**
46
+ * Pool interface for connection management.
47
+ */
48
+ interface Pool {
49
+ connect(): Promise<PoolClient>;
50
+ query<T = unknown>(text: string, values?: unknown[]): Promise<PgQueryResult<T>>;
51
+ end(): Promise<void>;
52
+ on(event: string, callback: (...args: unknown[]) => void): this;
53
+ totalCount: number;
54
+ idleCount: number;
55
+ waitingCount: number;
56
+ }
57
+
58
+ /**
59
+ * Extended search options for streaming operations.
60
+ */
61
+ export interface StreamSearchOptions extends VectorSearchOptions {
62
+ /** Number of results per batch (default: 1000) */
63
+ batchSize?: number;
64
+ /** Cursor name for server-side cursor */
65
+ cursorName?: string;
66
+ /** Query timeout in milliseconds */
67
+ timeout?: number;
68
+ /** Whether to use a server-side cursor */
69
+ useServerCursor?: boolean;
70
+ /** Fetch direction for cursor */
71
+ fetchDirection?: 'forward' | 'backward';
72
+ }
73
+
74
+ /**
75
+ * Insert result for streaming operations.
76
+ */
77
+ export interface InsertResult {
78
+ /** ID of the inserted vector */
79
+ id: string | number;
80
+ /** Whether the insert was successful */
81
+ success: boolean;
82
+ /** Error message if insert failed */
83
+ error?: string;
84
+ /** Batch index */
85
+ batchIndex: number;
86
+ /** Item index within batch */
87
+ itemIndex: number;
88
+ }
89
+
90
+ /**
91
+ * Vector entry for streaming inserts.
92
+ */
93
+ export interface VectorEntry {
94
+ /** Optional ID (auto-generated if not provided) */
95
+ id?: string | number;
96
+ /** Vector data */
97
+ vector: number[] | Float32Array;
98
+ /** Optional metadata */
99
+ metadata?: Record<string, unknown>;
100
+ }
101
+
102
+ /**
103
+ * Transaction isolation levels.
104
+ */
105
+ export type IsolationLevel = 'read_committed' | 'repeatable_read' | 'serializable';
106
+
107
+ /**
108
+ * Batch processing options.
109
+ */
110
+ export interface BatchOptions {
111
+ /** Batch size for processing */
112
+ batchSize?: number;
113
+ /** Maximum concurrent batches */
114
+ concurrency?: number;
115
+ /** Retry failed operations */
116
+ retryOnFailure?: boolean;
117
+ /** Maximum retry attempts */
118
+ maxRetries?: number;
119
+ /** Enable transaction mode */
120
+ useTransaction?: boolean;
121
+ }
122
+
123
+ /**
124
+ * Pool events interface.
125
+ */
126
+ export interface PoolEvents {
127
+ 'pool:connect': (client: PoolClient) => void;
128
+ 'pool:acquire': (client: PoolClient) => void;
129
+ 'pool:release': (client: PoolClient) => void;
130
+ 'pool:remove': (client: PoolClient) => void;
131
+ 'pool:error': (error: Error, client?: PoolClient) => void;
132
+ }
133
+
134
+ /**
135
+ * Stream state for backpressure handling.
136
+ */
137
+ interface StreamState {
138
+ paused: boolean;
139
+ buffer: unknown[];
140
+ bufferSize: number;
141
+ highWaterMark: number;
142
+ drainPromise: Promise<void> | null;
143
+ drainResolve: (() => void) | null;
144
+ }
145
+
146
+ // ============================================================================
147
+ // Constants
148
+ // ============================================================================
149
+
150
+ const DEFAULT_BATCH_SIZE = 1000;
151
+ const DEFAULT_CONCURRENCY = 4;
152
+ const DEFAULT_HIGH_WATER_MARK = 16384;
153
+ const DEFAULT_TIMEOUT_MS = 30000;
154
+ const DEFAULT_CURSOR_PREFIX = 'ruvector_cursor_';
155
+
156
+ // Distance operators mapping
157
+ const DISTANCE_OPERATORS: Record<DistanceMetric, string> = {
158
+ cosine: '<=>',
159
+ euclidean: '<->',
160
+ dot: '<#>',
161
+ hamming: '<~>',
162
+ manhattan: '<+>',
163
+ chebyshev: '<+>',
164
+ jaccard: '<~>',
165
+ minkowski: '<->',
166
+ bray_curtis: '<->',
167
+ canberra: '<->',
168
+ mahalanobis: '<->',
169
+ correlation: '<=>',
170
+ };
171
+
172
+ // ============================================================================
173
+ // RuVectorStream Class
174
+ // ============================================================================
175
+
176
+ /**
177
+ * Streaming support for RuVector operations.
178
+ *
179
+ * Provides async generators for streaming large result sets and batch inserts
180
+ * with backpressure handling.
181
+ *
182
+ * @example
183
+ * ```typescript
184
+ * const stream = new RuVectorStream(pool, config);
185
+ *
186
+ * // Stream search results
187
+ * for await (const result of stream.streamSearch({ query: vector, k: 10000 })) {
188
+ * console.log(result);
189
+ * }
190
+ *
191
+ * // Stream inserts
192
+ * async function* vectorGenerator() {
193
+ * for (let i = 0; i < 100000; i++) {
194
+ * yield { vector: generateVector(), metadata: { index: i } };
195
+ * }
196
+ * }
197
+ *
198
+ * for await (const result of stream.streamInsert(vectorGenerator())) {
199
+ * console.log(`Inserted: ${result.id}`);
200
+ * }
201
+ * ```
202
+ */
203
+ export class RuVectorStream extends EventEmitter {
204
+ private readonly pool: Pool;
205
+ private readonly schema?: string;
206
+ private readonly defaultTableName: string;
207
+ private readonly state: StreamState;
208
+ private activeClient: PoolClient | null = null;
209
+ private activeCursors: Set<string> = new Set();
210
+
211
+ constructor(
212
+ pool: Pool,
213
+ options: {
214
+ schema?: string;
215
+ defaultTableName?: string;
216
+ highWaterMark?: number;
217
+ } = {}
218
+ ) {
219
+ super();
220
+ this.pool = pool;
221
+ this.schema = options.schema;
222
+ this.defaultTableName = options.defaultTableName ?? 'vectors';
223
+ this.state = {
224
+ paused: false,
225
+ buffer: [],
226
+ bufferSize: 0,
227
+ highWaterMark: options.highWaterMark ?? DEFAULT_HIGH_WATER_MARK,
228
+ drainPromise: null,
229
+ drainResolve: null,
230
+ };
231
+ }
232
+
233
+ // ===========================================================================
234
+ // Stream Search
235
+ // ===========================================================================
236
+
237
+ /**
238
+ * Stream large result sets using server-side cursors.
239
+ *
240
+ * @param options - Search options with streaming configuration
241
+ * @yields {VectorSearchResult} Individual search results
242
+ */
243
+ async *streamSearch(options: StreamSearchOptions): AsyncGenerator<VectorSearchResult, void, undefined> {
244
+ const batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;
245
+ const cursorName = options.cursorName ?? `${DEFAULT_CURSOR_PREFIX}${Date.now()}_${Math.random().toString(36).slice(2)}`;
246
+ const timeout = options.timeout ?? DEFAULT_TIMEOUT_MS;
247
+ const useServerCursor = options.useServerCursor ?? true;
248
+
249
+ const client = await this.pool.connect();
250
+ this.activeClient = client;
251
+ this.activeCursors.add(cursorName);
252
+
253
+ try {
254
+ // Set statement timeout
255
+ await client.query(`SET LOCAL statement_timeout = ${timeout}`);
256
+
257
+ if (useServerCursor) {
258
+ // Use server-side cursor for memory efficiency
259
+ yield* this.streamWithCursor(client, options, cursorName, batchSize);
260
+ } else {
261
+ // Use OFFSET/LIMIT pagination (less efficient but simpler)
262
+ yield* this.streamWithPagination(client, options, batchSize);
263
+ }
264
+ } finally {
265
+ // Cleanup
266
+ if (this.activeCursors.has(cursorName)) {
267
+ try {
268
+ await client.query(`CLOSE ${this.escapeIdentifier(cursorName)}`);
269
+ } catch {
270
+ // Cursor may already be closed
271
+ }
272
+ this.activeCursors.delete(cursorName);
273
+ }
274
+ client.release();
275
+ this.activeClient = null;
276
+ }
277
+ }
278
+
279
+ /**
280
+ * Stream results using a server-side cursor.
281
+ */
282
+ private async *streamWithCursor(
283
+ client: PoolClient,
284
+ options: StreamSearchOptions,
285
+ cursorName: string,
286
+ batchSize: number
287
+ ): AsyncGenerator<VectorSearchResult, void, undefined> {
288
+ const { sql, params } = this.buildSearchQuery(options);
289
+ const escapedCursor = this.escapeIdentifier(cursorName);
290
+
291
+ // Begin transaction for cursor
292
+ await client.query('BEGIN');
293
+
294
+ try {
295
+ // Declare cursor
296
+ await client.query(
297
+ `DECLARE ${escapedCursor} CURSOR WITH HOLD FOR ${sql}`,
298
+ params
299
+ );
300
+
301
+ let rank = 0;
302
+ let hasMore = true;
303
+
304
+ while (hasMore) {
305
+ // Wait if paused (backpressure)
306
+ await this.waitIfPaused();
307
+
308
+ // Fetch batch
309
+ const fetchResult = await client.query<{
310
+ id: string | number;
311
+ distance: number;
312
+ [key: string]: unknown;
313
+ }>(
314
+ `FETCH ${batchSize} FROM ${escapedCursor}`
315
+ );
316
+
317
+ if (fetchResult.rows.length === 0) {
318
+ hasMore = false;
319
+ break;
320
+ }
321
+
322
+ // Yield individual results
323
+ for (const row of fetchResult.rows) {
324
+ rank++;
325
+ const result = this.transformSearchResult(row, options, rank);
326
+ yield result;
327
+
328
+ this.emit('result', result);
329
+ }
330
+
331
+ // Check if we've received less than batch size (end of results)
332
+ if (fetchResult.rows.length < batchSize) {
333
+ hasMore = false;
334
+ }
335
+ }
336
+
337
+ await client.query('COMMIT');
338
+ } catch (error) {
339
+ await client.query('ROLLBACK');
340
+ throw error;
341
+ }
342
+ }
343
+
344
+ /**
345
+ * Stream results using OFFSET/LIMIT pagination.
346
+ */
347
+ private async *streamWithPagination(
348
+ client: PoolClient,
349
+ options: StreamSearchOptions,
350
+ batchSize: number
351
+ ): AsyncGenerator<VectorSearchResult, void, undefined> {
352
+ const { sql: baseSql, params } = this.buildSearchQuery(options, true);
353
+
354
+ let offset = 0;
355
+ let rank = 0;
356
+ let hasMore = true;
357
+
358
+ while (hasMore) {
359
+ // Wait if paused (backpressure)
360
+ await this.waitIfPaused();
361
+
362
+ const sql = `${baseSql} LIMIT ${batchSize} OFFSET ${offset}`;
363
+ const result = await client.query<{
364
+ id: string | number;
365
+ distance: number;
366
+ [key: string]: unknown;
367
+ }>(sql, params);
368
+
369
+ if (result.rows.length === 0) {
370
+ hasMore = false;
371
+ break;
372
+ }
373
+
374
+ for (const row of result.rows) {
375
+ rank++;
376
+ const searchResult = this.transformSearchResult(row, options, rank);
377
+ yield searchResult;
378
+
379
+ this.emit('result', searchResult);
380
+ }
381
+
382
+ offset += batchSize;
383
+
384
+ if (result.rows.length < batchSize) {
385
+ hasMore = false;
386
+ }
387
+ }
388
+ }
389
+
390
+ /**
391
+ * Build the search query SQL.
392
+ */
393
+ private buildSearchQuery(
394
+ options: StreamSearchOptions,
395
+ forPagination = false
396
+ ): { sql: string; params: unknown[] } {
397
+ const tableName = options.tableName ?? this.defaultTableName;
398
+ const vectorColumn = options.vectorColumn ?? 'embedding';
399
+ const metric = options.metric ?? 'cosine';
400
+ const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
401
+
402
+ const queryVector = this.formatVector(options.query);
403
+ const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
404
+
405
+ // Build SELECT columns
406
+ const selectColumns = options.selectColumns ?? ['id'];
407
+ const columnList = [...selectColumns];
408
+
409
+ if (options.includeVector) {
410
+ columnList.push(vectorColumn);
411
+ }
412
+ if (options.includeMetadata) {
413
+ columnList.push('metadata');
414
+ }
415
+
416
+ const distanceExpr = `${this.escapeIdentifier(vectorColumn)} ${operator} '${queryVector}'::vector`;
417
+ columnList.push(`(${distanceExpr}) as distance`);
418
+
419
+ // Build WHERE clause
420
+ const whereClauses: string[] = [];
421
+ const params: unknown[] = [];
422
+ let paramIndex = 1;
423
+
424
+ if (options.threshold !== undefined) {
425
+ if (metric === 'cosine' || metric === 'dot') {
426
+ whereClauses.push(`(1 - (${distanceExpr})) >= $${paramIndex++}`);
427
+ params.push(options.threshold);
428
+ } else {
429
+ whereClauses.push(`(${distanceExpr}) <= $${paramIndex++}`);
430
+ params.push(options.threshold);
431
+ }
432
+ }
433
+
434
+ if (options.maxDistance !== undefined) {
435
+ whereClauses.push(`(${distanceExpr}) <= $${paramIndex++}`);
436
+ params.push(options.maxDistance);
437
+ }
438
+
439
+ if (options.filter) {
440
+ for (const [key, value] of Object.entries(options.filter)) {
441
+ if (key === 'metadata') {
442
+ whereClauses.push(`metadata @> $${paramIndex++}::jsonb`);
443
+ params.push(JSON.stringify(value));
444
+ } else {
445
+ whereClauses.push(`${this.escapeIdentifier(key)} = $${paramIndex++}`);
446
+ params.push(value);
447
+ }
448
+ }
449
+ }
450
+
451
+ // Build query
452
+ let sql = `SELECT ${columnList.join(', ')} FROM ${schemaPrefix}${this.escapeIdentifier(tableName)}`;
453
+
454
+ if (whereClauses.length > 0) {
455
+ sql += ` WHERE ${whereClauses.join(' AND ')}`;
456
+ }
457
+
458
+ sql += ` ORDER BY ${distanceExpr} ASC`;
459
+
460
+ // For cursor-based streaming, don't add LIMIT (cursor handles it)
461
+ // For pagination, LIMIT/OFFSET will be added by the caller
462
+ if (!forPagination && options.k) {
463
+ sql += ` LIMIT ${options.k}`;
464
+ }
465
+
466
+ return { sql, params };
467
+ }
468
+
469
+ /**
470
+ * Transform a database row into a VectorSearchResult.
471
+ */
472
+ private transformSearchResult(
473
+ row: { id: string | number; distance: number; [key: string]: unknown },
474
+ options: StreamSearchOptions,
475
+ rank: number
476
+ ): VectorSearchResult {
477
+ const metric = options.metric ?? 'cosine';
478
+ const score = metric === 'cosine' || metric === 'dot'
479
+ ? 1 - row.distance
480
+ : 1 / (1 + row.distance);
481
+
482
+ const result: VectorSearchResult = {
483
+ id: row.id,
484
+ score,
485
+ distance: row.distance,
486
+ rank,
487
+ retrievedAt: new Date(),
488
+ };
489
+
490
+ if (options.includeVector && row[options.vectorColumn ?? 'embedding']) {
491
+ (result as { vector?: number[] }).vector = this.parseVector(
492
+ row[options.vectorColumn ?? 'embedding'] as string
493
+ );
494
+ }
495
+
496
+ if (options.includeMetadata && row.metadata) {
497
+ (result as { metadata?: Record<string, unknown> }).metadata =
498
+ row.metadata as Record<string, unknown>;
499
+ }
500
+
501
+ return result;
502
+ }
503
+
504
+ // ===========================================================================
505
+ // Stream Insert
506
+ // ===========================================================================
507
+
508
+ /**
509
+ * Stream batch inserts for large datasets.
510
+ *
511
+ * @param vectors - Async iterable of vector entries
512
+ * @param options - Insert configuration options
513
+ * @yields {InsertResult} Individual insert results
514
+ */
515
+ async *streamInsert(
516
+ vectors: AsyncIterable<VectorEntry>,
517
+ options: {
518
+ tableName?: string;
519
+ vectorColumn?: string;
520
+ batchSize?: number;
521
+ upsert?: boolean;
522
+ conflictColumns?: string[];
523
+ } = {}
524
+ ): AsyncGenerator<InsertResult, void, undefined> {
525
+ const tableName = options.tableName ?? this.defaultTableName;
526
+ const vectorColumn = options.vectorColumn ?? 'embedding';
527
+ const batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;
528
+ const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
529
+
530
+ let batch: VectorEntry[] = [];
531
+ let batchIndex = 0;
532
+ let totalProcessed = 0;
533
+
534
+ const client = await this.pool.connect();
535
+ this.activeClient = client;
536
+
537
+ try {
538
+ // Process vectors in batches
539
+ for await (const entry of vectors) {
540
+ // Wait if paused (backpressure)
541
+ await this.waitIfPaused();
542
+
543
+ batch.push(entry);
544
+
545
+ if (batch.length >= batchSize) {
546
+ // Process batch
547
+ const results = await this.insertBatch(
548
+ client,
549
+ batch,
550
+ tableName,
551
+ vectorColumn,
552
+ schemaPrefix,
553
+ batchIndex,
554
+ options.upsert,
555
+ options.conflictColumns
556
+ );
557
+
558
+ for (const result of results) {
559
+ yield result;
560
+ totalProcessed++;
561
+ this.emit('insert', result);
562
+ }
563
+
564
+ batch = [];
565
+ batchIndex++;
566
+ }
567
+ }
568
+
569
+ // Process remaining items
570
+ if (batch.length > 0) {
571
+ const results = await this.insertBatch(
572
+ client,
573
+ batch,
574
+ tableName,
575
+ vectorColumn,
576
+ schemaPrefix,
577
+ batchIndex,
578
+ options.upsert,
579
+ options.conflictColumns
580
+ );
581
+
582
+ for (const result of results) {
583
+ yield result;
584
+ totalProcessed++;
585
+ this.emit('insert', result);
586
+ }
587
+ }
588
+
589
+ this.emit('complete', { totalProcessed, batches: batchIndex + 1 });
590
+ } finally {
591
+ client.release();
592
+ this.activeClient = null;
593
+ }
594
+ }
595
+
596
+ /**
597
+ * Insert a batch of vectors.
598
+ */
599
+ private async insertBatch(
600
+ client: PoolClient,
601
+ batch: VectorEntry[],
602
+ tableName: string,
603
+ vectorColumn: string,
604
+ schemaPrefix: string,
605
+ batchIndex: number,
606
+ upsert?: boolean,
607
+ conflictColumns?: string[]
608
+ ): Promise<InsertResult[]> {
609
+ const results: InsertResult[] = [];
610
+
611
+ // Build multi-row INSERT
612
+ const values: string[] = [];
613
+ const params: unknown[] = [];
614
+ let paramIndex = 1;
615
+
616
+ for (const item of batch) {
617
+ const vector = this.formatVector(item.vector);
618
+ const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
619
+
620
+ if (item.id !== undefined) {
621
+ values.push(`($${paramIndex++}, '${vector}'::vector, $${paramIndex++}::jsonb)`);
622
+ params.push(item.id, metadata);
623
+ } else {
624
+ values.push(`(gen_random_uuid(), '${vector}'::vector, $${paramIndex++}::jsonb)`);
625
+ params.push(metadata);
626
+ }
627
+ }
628
+
629
+ let sql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} `;
630
+ sql += `(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ${values.join(', ')}`;
631
+
632
+ if (upsert) {
633
+ const conflictCols = conflictColumns ?? ['id'];
634
+ sql += ` ON CONFLICT (${conflictCols.join(', ')}) DO UPDATE SET `;
635
+ sql += `${this.escapeIdentifier(vectorColumn)} = EXCLUDED.${this.escapeIdentifier(vectorColumn)}, `;
636
+ sql += `metadata = EXCLUDED.metadata`;
637
+ }
638
+
639
+ sql += ' RETURNING id';
640
+
641
+ try {
642
+ const result = await client.query<{ id: string | number }>(sql, params);
643
+
644
+ for (let i = 0; i < result.rows.length; i++) {
645
+ results.push({
646
+ id: result.rows[i].id,
647
+ success: true,
648
+ batchIndex,
649
+ itemIndex: i,
650
+ });
651
+ }
652
+ } catch (error) {
653
+ // On batch failure, try individual inserts
654
+ for (let i = 0; i < batch.length; i++) {
655
+ const item = batch[i];
656
+ try {
657
+ const vector = this.formatVector(item.vector);
658
+ const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
659
+
660
+ const singleSql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} ` +
661
+ `(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ` +
662
+ `($1, '${vector}'::vector, $2::jsonb) RETURNING id`;
663
+
664
+ const singleResult = await client.query<{ id: string | number }>(
665
+ singleSql,
666
+ [item.id ?? null, metadata]
667
+ );
668
+
669
+ results.push({
670
+ id: singleResult.rows[0]?.id ?? item.id ?? 'unknown',
671
+ success: true,
672
+ batchIndex,
673
+ itemIndex: i,
674
+ });
675
+ } catch (itemError) {
676
+ results.push({
677
+ id: item.id ?? 'unknown',
678
+ success: false,
679
+ error: (itemError as Error).message,
680
+ batchIndex,
681
+ itemIndex: i,
682
+ });
683
+ }
684
+ }
685
+ }
686
+
687
+ return results;
688
+ }
689
+
690
+ // ===========================================================================
691
+ // Backpressure Handling
692
+ // ===========================================================================
693
+
694
+ /**
695
+ * Pause the stream (backpressure).
696
+ */
697
+ pause(): void {
698
+ this.state.paused = true;
699
+ this.emit('pause');
700
+ }
701
+
702
+ /**
703
+ * Resume the stream.
704
+ */
705
+ resume(): void {
706
+ this.state.paused = false;
707
+ if (this.state.drainResolve) {
708
+ this.state.drainResolve();
709
+ this.state.drainResolve = null;
710
+ this.state.drainPromise = null;
711
+ }
712
+ this.emit('resume');
713
+ }
714
+
715
+ /**
716
+ * Check if stream is paused.
717
+ */
718
+ isPaused(): boolean {
719
+ return this.state.paused;
720
+ }
721
+
722
+ /**
723
+ * Wait if the stream is paused.
724
+ */
725
+ private async waitIfPaused(): Promise<void> {
726
+ if (!this.state.paused) {
727
+ return;
728
+ }
729
+
730
+ if (!this.state.drainPromise) {
731
+ this.state.drainPromise = new Promise<void>(resolve => {
732
+ this.state.drainResolve = resolve;
733
+ });
734
+ }
735
+
736
+ await this.state.drainPromise;
737
+ }
738
+
739
+ // ===========================================================================
740
+ // Cleanup
741
+ // ===========================================================================
742
+
743
+ /**
744
+ * Abort all active operations.
745
+ */
746
+ async abort(): Promise<void> {
747
+ // Close all active cursors
748
+ if (this.activeClient) {
749
+ const cursors = Array.from(this.activeCursors);
750
+ for (let i = 0; i < cursors.length; i++) {
751
+ const cursorName = cursors[i];
752
+ try {
753
+ await this.activeClient.query(`CLOSE ${this.escapeIdentifier(cursorName)}`);
754
+ } catch {
755
+ // Ignore errors
756
+ }
757
+ }
758
+ this.activeCursors.clear();
759
+ }
760
+
761
+ this.emit('abort');
762
+ }
763
+
764
+ // ===========================================================================
765
+ // Utility Methods
766
+ // ===========================================================================
767
+
768
+ /**
769
+ * Format vector for SQL.
770
+ */
771
+ private formatVector(vector: number[] | Float32Array): string {
772
+ const arr = Array.isArray(vector) ? vector : Array.from(vector);
773
+ return `[${arr.join(',')}]`;
774
+ }
775
+
776
+ /**
777
+ * Parse vector from SQL result.
778
+ */
779
+ private parseVector(vectorStr: string): number[] {
780
+ const cleaned = vectorStr.replace(/[\[\]{}]/g, '');
781
+ return cleaned.split(',').map(Number);
782
+ }
783
+
784
+ /**
785
+ * Escape SQL identifier.
786
+ */
787
+ private escapeIdentifier(identifier: string): string {
788
+ return `"${identifier.replace(/"/g, '""')}"`;
789
+ }
790
+ }
791
+
792
+ // ============================================================================
793
+ // RuVectorTransaction Class
794
+ // ============================================================================
795
+
796
+ /**
797
+ * Enhanced transaction support for RuVector operations.
798
+ *
799
+ * Provides transaction management with:
800
+ * - Isolation levels (read_committed, repeatable_read, serializable)
801
+ * - Savepoints for partial rollback
802
+ * - Vector operations within transaction context
803
+ *
804
+ * @example
805
+ * ```typescript
806
+ * const tx = new RuVectorTransaction(client);
807
+ * await tx.begin('serializable');
808
+ *
809
+ * try {
810
+ * await tx.savepoint('before_insert');
811
+ * await tx.insert({ tableName: 'vectors', vectors: [...] });
812
+ *
813
+ * const results = await tx.search({ query: vector, k: 10 });
814
+ *
815
+ * if (results.length === 0) {
816
+ * await tx.rollbackToSavepoint('before_insert');
817
+ * }
818
+ *
819
+ * await tx.commit();
820
+ * } catch (error) {
821
+ * await tx.rollback();
822
+ * throw error;
823
+ * }
824
+ * ```
825
+ */
826
+ export class RuVectorTransaction extends EventEmitter {
827
+ private readonly client: PoolClient;
828
+ private readonly schema?: string;
829
+ private readonly defaultTableName: string;
830
+ private transactionId: string | null = null;
831
+ private isActive = false;
832
+ private savepoints: Set<string> = new Set();
833
+ private queryCount = 0;
834
+ private startTime: number | null = null;
835
+
836
+ constructor(
837
+ client: PoolClient,
838
+ options: {
839
+ schema?: string;
840
+ defaultTableName?: string;
841
+ } = {}
842
+ ) {
843
+ super();
844
+ this.client = client;
845
+ this.schema = options.schema;
846
+ this.defaultTableName = options.defaultTableName ?? 'vectors';
847
+ }
848
+
849
+ // ===========================================================================
850
+ // Transaction Control
851
+ // ===========================================================================
852
+
853
+ /**
854
+ * Begin a transaction with optional isolation level.
855
+ *
856
+ * @param isolation - Transaction isolation level
857
+ */
858
+ async begin(isolation?: IsolationLevel): Promise<void> {
859
+ if (this.isActive) {
860
+ throw new Error('Transaction already active');
861
+ }
862
+
863
+ this.transactionId = `tx_${Date.now()}_${Math.random().toString(36).slice(2)}`;
864
+ this.startTime = Date.now();
865
+
866
+ let sql = 'BEGIN';
867
+ if (isolation) {
868
+ sql += ` ISOLATION LEVEL ${isolation.replace('_', ' ').toUpperCase()}`;
869
+ }
870
+
871
+ await this.client.query(sql);
872
+ this.isActive = true;
873
+ this.queryCount = 1;
874
+
875
+ this.emit('begin', { transactionId: this.transactionId, isolation });
876
+ }
877
+
878
+ /**
879
+ * Create a savepoint within the transaction.
880
+ *
881
+ * @param name - Savepoint name
882
+ */
883
+ async savepoint(name: string): Promise<void> {
884
+ this.ensureActive();
885
+
886
+ const escapedName = this.escapeIdentifier(name);
887
+ await this.client.query(`SAVEPOINT ${escapedName}`);
888
+ this.savepoints.add(name);
889
+ this.queryCount++;
890
+
891
+ this.emit('savepoint', { transactionId: this.transactionId, name });
892
+ }
893
+
894
+ /**
895
+ * Rollback to a savepoint.
896
+ *
897
+ * @param name - Savepoint name
898
+ */
899
+ async rollbackToSavepoint(name: string): Promise<void> {
900
+ this.ensureActive();
901
+
902
+ if (!this.savepoints.has(name)) {
903
+ throw new Error(`Savepoint '${name}' does not exist`);
904
+ }
905
+
906
+ const escapedName = this.escapeIdentifier(name);
907
+ await this.client.query(`ROLLBACK TO SAVEPOINT ${escapedName}`);
908
+ this.queryCount++;
909
+
910
+ this.emit('rollback_to_savepoint', { transactionId: this.transactionId, name });
911
+ }
912
+
913
+ /**
914
+ * Release a savepoint.
915
+ *
916
+ * @param name - Savepoint name
917
+ */
918
+ async releaseSavepoint(name: string): Promise<void> {
919
+ this.ensureActive();
920
+
921
+ if (!this.savepoints.has(name)) {
922
+ throw new Error(`Savepoint '${name}' does not exist`);
923
+ }
924
+
925
+ const escapedName = this.escapeIdentifier(name);
926
+ await this.client.query(`RELEASE SAVEPOINT ${escapedName}`);
927
+ this.savepoints.delete(name);
928
+ this.queryCount++;
929
+
930
+ this.emit('release_savepoint', { transactionId: this.transactionId, name });
931
+ }
932
+
933
+ /**
934
+ * Commit the transaction.
935
+ */
936
+ async commit(): Promise<void> {
937
+ this.ensureActive();
938
+
939
+ await this.client.query('COMMIT');
940
+ const durationMs = this.startTime ? Date.now() - this.startTime : 0;
941
+
942
+ this.emit('commit', {
943
+ transactionId: this.transactionId,
944
+ queryCount: this.queryCount,
945
+ durationMs,
946
+ });
947
+
948
+ this.cleanup();
949
+ }
950
+
951
+ /**
952
+ * Rollback the transaction.
953
+ */
954
+ async rollback(): Promise<void> {
955
+ if (!this.isActive) {
956
+ return; // Already rolled back or not started
957
+ }
958
+
959
+ await this.client.query('ROLLBACK');
960
+ const durationMs = this.startTime ? Date.now() - this.startTime : 0;
961
+
962
+ this.emit('rollback', {
963
+ transactionId: this.transactionId,
964
+ queryCount: this.queryCount,
965
+ durationMs,
966
+ });
967
+
968
+ this.cleanup();
969
+ }
970
+
971
+ // ===========================================================================
972
+ // Vector Operations within Transaction
973
+ // ===========================================================================
974
+
975
+ /**
976
+ * Perform vector search within the transaction.
977
+ */
978
+ async search(options: VectorSearchOptions): Promise<VectorSearchResult[]> {
979
+ this.ensureActive();
980
+
981
+ const { sql, params } = this.buildSearchQuery(options);
982
+ const result = await this.client.query<{
983
+ id: string | number;
984
+ distance: number;
985
+ [key: string]: unknown;
986
+ }>(sql, params);
987
+
988
+ this.queryCount++;
989
+
990
+ const metric = options.metric ?? 'cosine';
991
+ return result.rows.map((row, index) => {
992
+ const score = metric === 'cosine' || metric === 'dot'
993
+ ? 1 - row.distance
994
+ : 1 / (1 + row.distance);
995
+
996
+ const searchResult: VectorSearchResult = {
997
+ id: row.id,
998
+ score,
999
+ distance: row.distance,
1000
+ rank: index + 1,
1001
+ retrievedAt: new Date(),
1002
+ };
1003
+
1004
+ if (options.includeVector && row[options.vectorColumn ?? 'embedding']) {
1005
+ (searchResult as { vector?: number[] }).vector = this.parseVector(
1006
+ row[options.vectorColumn ?? 'embedding'] as string
1007
+ );
1008
+ }
1009
+
1010
+ if (options.includeMetadata && row.metadata) {
1011
+ (searchResult as { metadata?: Record<string, unknown> }).metadata =
1012
+ row.metadata as Record<string, unknown>;
1013
+ }
1014
+
1015
+ return searchResult;
1016
+ });
1017
+ }
1018
+
1019
+ /**
1020
+ * Insert vectors within the transaction.
1021
+ */
1022
+ async insert(options: VectorInsertOptions): Promise<BatchResult<string>> {
1023
+ this.ensureActive();
1024
+
1025
+ const startTime = Date.now();
1026
+ const tableName = options.tableName ?? this.defaultTableName;
1027
+ const vectorColumn = options.vectorColumn ?? 'embedding';
1028
+ const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
1029
+
1030
+ const successful: string[] = [];
1031
+ const errors: Array<{ index: number; message: string; input?: unknown }> = [];
1032
+
1033
+ // Build multi-row INSERT
1034
+ const values: string[] = [];
1035
+ const params: unknown[] = [];
1036
+ let paramIndex = 1;
1037
+
1038
+ for (const item of options.vectors) {
1039
+ const vector = this.formatVector(item.vector);
1040
+ const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
1041
+
1042
+ if (item.id !== undefined) {
1043
+ values.push(`($${paramIndex++}, '${vector}'::vector, $${paramIndex++}::jsonb)`);
1044
+ params.push(item.id, metadata);
1045
+ } else {
1046
+ values.push(`(gen_random_uuid(), '${vector}'::vector, $${paramIndex++}::jsonb)`);
1047
+ params.push(metadata);
1048
+ }
1049
+ }
1050
+
1051
+ let sql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} `;
1052
+ sql += `(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ${values.join(', ')}`;
1053
+
1054
+ if (options.upsert) {
1055
+ const conflictCols = options.conflictColumns ?? ['id'];
1056
+ sql += ` ON CONFLICT (${conflictCols.join(', ')}) DO UPDATE SET `;
1057
+ sql += `${this.escapeIdentifier(vectorColumn)} = EXCLUDED.${this.escapeIdentifier(vectorColumn)}, `;
1058
+ sql += `metadata = EXCLUDED.metadata`;
1059
+ }
1060
+
1061
+ sql += ' RETURNING id';
1062
+
1063
+ try {
1064
+ const result = await this.client.query<{ id: string }>(sql, params);
1065
+ this.queryCount++;
1066
+
1067
+ if (result.rows) {
1068
+ successful.push(...result.rows.map(r => String(r.id)));
1069
+ }
1070
+ } catch (error) {
1071
+ errors.push({
1072
+ index: 0,
1073
+ message: (error as Error).message,
1074
+ });
1075
+ }
1076
+
1077
+ const durationMs = Date.now() - startTime;
1078
+ const insertedCount = successful.length;
1079
+
1080
+ return {
1081
+ total: options.vectors.length,
1082
+ successful: insertedCount,
1083
+ failed: options.vectors.length - insertedCount,
1084
+ results: successful,
1085
+ errors: errors.length > 0 ? errors : undefined,
1086
+ durationMs,
1087
+ throughput: insertedCount / (durationMs / 1000),
1088
+ };
1089
+ }
1090
+
1091
+ /**
1092
+ * Update a vector within the transaction.
1093
+ */
1094
+ async update(options: VectorUpdateOptions): Promise<boolean> {
1095
+ this.ensureActive();
1096
+
1097
+ const tableName = options.tableName ?? this.defaultTableName;
1098
+ const vectorColumn = options.vectorColumn ?? 'embedding';
1099
+ const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
1100
+
1101
+ const setClauses: string[] = [];
1102
+ const params: unknown[] = [];
1103
+ let paramIndex = 1;
1104
+
1105
+ if (options.vector) {
1106
+ const vector = this.formatVector(options.vector);
1107
+ setClauses.push(`${this.escapeIdentifier(vectorColumn)} = '${vector}'::vector`);
1108
+ }
1109
+
1110
+ if (options.metadata) {
1111
+ if (options.mergeMetadata) {
1112
+ setClauses.push(`metadata = metadata || $${paramIndex++}::jsonb`);
1113
+ } else {
1114
+ setClauses.push(`metadata = $${paramIndex++}::jsonb`);
1115
+ }
1116
+ params.push(JSON.stringify(options.metadata));
1117
+ }
1118
+
1119
+ if (setClauses.length === 0) {
1120
+ return false;
1121
+ }
1122
+
1123
+ params.push(options.id);
1124
+ const sql = `UPDATE ${schemaPrefix}${this.escapeIdentifier(tableName)} ` +
1125
+ `SET ${setClauses.join(', ')} WHERE id = $${paramIndex}`;
1126
+
1127
+ const result = await this.client.query(sql, params);
1128
+ this.queryCount++;
1129
+
1130
+ return (result.rowCount ?? 0) > 0;
1131
+ }
1132
+
1133
+ /**
1134
+ * Delete vectors within the transaction.
1135
+ *
1136
+ * @param ids - IDs to delete
1137
+ * @param tableName - Table name (optional)
1138
+ * @returns Number of deleted rows
1139
+ */
1140
+ async delete(ids: (string | number)[], tableName?: string): Promise<number> {
1141
+ this.ensureActive();
1142
+
1143
+ const table = tableName ?? this.defaultTableName;
1144
+ const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
1145
+
1146
+ const placeholders = ids.map((_, i) => `$${i + 1}`).join(', ');
1147
+ const sql = `DELETE FROM ${schemaPrefix}${this.escapeIdentifier(table)} WHERE id IN (${placeholders})`;
1148
+
1149
+ const result = await this.client.query(sql, ids);
1150
+ this.queryCount++;
1151
+
1152
+ return result.rowCount ?? 0;
1153
+ }
1154
+
1155
+ /**
1156
+ * Execute a raw query within the transaction.
1157
+ */
1158
+ async query<T = Record<string, unknown>>(
1159
+ sql: string,
1160
+ params?: unknown[]
1161
+ ): Promise<QueryResult<T>> {
1162
+ this.ensureActive();
1163
+
1164
+ const startTime = Date.now();
1165
+ const result = await this.client.query<T>(sql, params);
1166
+ this.queryCount++;
1167
+
1168
+ return {
1169
+ rows: result.rows,
1170
+ rowCount: result.rowCount ?? 0,
1171
+ durationMs: Date.now() - startTime,
1172
+ command: result.command,
1173
+ };
1174
+ }
1175
+
1176
+ // ===========================================================================
1177
+ // Utility Methods
1178
+ // ===========================================================================
1179
+
1180
+ /**
1181
+ * Get transaction status.
1182
+ */
1183
+ getStatus(): {
1184
+ transactionId: string | null;
1185
+ isActive: boolean;
1186
+ savepoints: string[];
1187
+ queryCount: number;
1188
+ durationMs: number;
1189
+ } {
1190
+ return {
1191
+ transactionId: this.transactionId,
1192
+ isActive: this.isActive,
1193
+ savepoints: Array.from(this.savepoints),
1194
+ queryCount: this.queryCount,
1195
+ durationMs: this.startTime ? Date.now() - this.startTime : 0,
1196
+ };
1197
+ }
1198
+
1199
+ /**
1200
+ * Ensure transaction is active.
1201
+ */
1202
+ private ensureActive(): void {
1203
+ if (!this.isActive) {
1204
+ throw new Error('Transaction is not active. Call begin() first.');
1205
+ }
1206
+ }
1207
+
1208
+ /**
1209
+ * Build search query SQL.
1210
+ */
1211
+ private buildSearchQuery(options: VectorSearchOptions): { sql: string; params: unknown[] } {
1212
+ const tableName = options.tableName ?? this.defaultTableName;
1213
+ const vectorColumn = options.vectorColumn ?? 'embedding';
1214
+ const metric = options.metric ?? 'cosine';
1215
+ const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
1216
+
1217
+ const queryVector = this.formatVector(options.query);
1218
+ const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
1219
+
1220
+ const selectColumns = options.selectColumns ?? ['id'];
1221
+ const columnList = [...selectColumns];
1222
+
1223
+ if (options.includeVector) {
1224
+ columnList.push(vectorColumn);
1225
+ }
1226
+ if (options.includeMetadata) {
1227
+ columnList.push('metadata');
1228
+ }
1229
+
1230
+ const distanceExpr = `${this.escapeIdentifier(vectorColumn)} ${operator} '${queryVector}'::vector`;
1231
+ columnList.push(`(${distanceExpr}) as distance`);
1232
+
1233
+ const whereClauses: string[] = [];
1234
+ const params: unknown[] = [];
1235
+ let paramIndex = 1;
1236
+
1237
+ if (options.filter) {
1238
+ for (const [key, value] of Object.entries(options.filter)) {
1239
+ if (key === 'metadata') {
1240
+ whereClauses.push(`metadata @> $${paramIndex++}::jsonb`);
1241
+ params.push(JSON.stringify(value));
1242
+ } else {
1243
+ whereClauses.push(`${this.escapeIdentifier(key)} = $${paramIndex++}`);
1244
+ params.push(value);
1245
+ }
1246
+ }
1247
+ }
1248
+
1249
+ let sql = `SELECT ${columnList.join(', ')} FROM ${schemaPrefix}${this.escapeIdentifier(tableName)}`;
1250
+
1251
+ if (whereClauses.length > 0) {
1252
+ sql += ` WHERE ${whereClauses.join(' AND ')}`;
1253
+ }
1254
+
1255
+ sql += ` ORDER BY ${distanceExpr} ASC`;
1256
+ sql += ` LIMIT ${options.k}`;
1257
+
1258
+ return { sql, params };
1259
+ }
1260
+
1261
+ /**
1262
+ * Cleanup transaction state.
1263
+ */
1264
+ private cleanup(): void {
1265
+ this.isActive = false;
1266
+ this.savepoints.clear();
1267
+ this.transactionId = null;
1268
+ this.startTime = null;
1269
+ }
1270
+
1271
+ /**
1272
+ * Format vector for SQL.
1273
+ */
1274
+ private formatVector(vector: number[] | Float32Array): string {
1275
+ const arr = Array.isArray(vector) ? vector : Array.from(vector);
1276
+ return `[${arr.join(',')}]`;
1277
+ }
1278
+
1279
+ /**
1280
+ * Parse vector from SQL result.
1281
+ */
1282
+ private parseVector(vectorStr: string): number[] {
1283
+ const cleaned = vectorStr.replace(/[\[\]{}]/g, '');
1284
+ return cleaned.split(',').map(Number);
1285
+ }
1286
+
1287
+ /**
1288
+ * Escape SQL identifier.
1289
+ */
1290
+ private escapeIdentifier(identifier: string): string {
1291
+ return `"${identifier.replace(/"/g, '""')}"`;
1292
+ }
1293
+ }
1294
+
1295
+ // ============================================================================
1296
+ // BatchProcessor Class
1297
+ // ============================================================================
1298
+
1299
+ /**
1300
+ * Batch processor for large dataset operations.
1301
+ *
1302
+ * Provides efficient processing of large datasets with configurable
1303
+ * batch sizes, concurrency, and error handling.
1304
+ *
1305
+ * @example
1306
+ * ```typescript
1307
+ * const processor = new BatchProcessor(bridge, { batchSize: 500, concurrency: 4 });
1308
+ *
1309
+ * async function* loadData() {
1310
+ * for (const item of massiveDataset) {
1311
+ * yield item;
1312
+ * }
1313
+ * }
1314
+ *
1315
+ * for await (const result of processor.processBatch(loadData(), async (batch) => {
1316
+ * return batch.map(item => processItem(item));
1317
+ * })) {
1318
+ * console.log(result);
1319
+ * }
1320
+ * ```
1321
+ */
1322
+ export class BatchProcessor extends EventEmitter {
1323
+ private readonly pool: Pool;
1324
+ private readonly options: Required<BatchOptions>;
1325
+ private readonly schema?: string;
1326
+
1327
+ constructor(
1328
+ pool: Pool,
1329
+ options: BatchOptions & { schema?: string } = {}
1330
+ ) {
1331
+ super();
1332
+ this.pool = pool;
1333
+ this.schema = options.schema;
1334
+ this.options = {
1335
+ batchSize: options.batchSize ?? DEFAULT_BATCH_SIZE,
1336
+ concurrency: options.concurrency ?? DEFAULT_CONCURRENCY,
1337
+ retryOnFailure: options.retryOnFailure ?? true,
1338
+ maxRetries: options.maxRetries ?? 3,
1339
+ useTransaction: options.useTransaction ?? false,
1340
+ };
1341
+ }
1342
+
1343
+ /**
1344
+ * Process items in batches with custom processor function.
1345
+ *
1346
+ * @param items - Async iterable of items to process
1347
+ * @param processor - Batch processing function
1348
+ * @param options - Processing options
1349
+ * @yields Processed results
1350
+ */
1351
+ async *processBatch<T, R>(
1352
+ items: AsyncIterable<T>,
1353
+ processor: (batch: T[]) => Promise<R[]>,
1354
+ options?: {
1355
+ batchSize?: number;
1356
+ concurrency?: number;
1357
+ onBatchComplete?: (batchIndex: number, results: R[]) => void;
1358
+ }
1359
+ ): AsyncGenerator<R, void, undefined> {
1360
+ const batchSize = options?.batchSize ?? this.options.batchSize;
1361
+ const concurrency = options?.concurrency ?? this.options.concurrency;
1362
+
1363
+ let batch: T[] = [];
1364
+ let batchIndex = 0;
1365
+ const pendingBatches: Promise<{ index: number; results: R[] }>[] = [];
1366
+
1367
+ // Process items and accumulate into batches
1368
+ for await (const item of items) {
1369
+ batch.push(item);
1370
+
1371
+ if (batch.length >= batchSize) {
1372
+ const currentBatch = batch;
1373
+ const currentIndex = batchIndex;
1374
+ batch = [];
1375
+ batchIndex++;
1376
+
1377
+ // Add batch to processing queue
1378
+ const batchPromise = this.processSingleBatch(
1379
+ currentBatch,
1380
+ processor,
1381
+ currentIndex
1382
+ ).then(results => {
1383
+ options?.onBatchComplete?.(currentIndex, results);
1384
+ return { index: currentIndex, results };
1385
+ });
1386
+
1387
+ pendingBatches.push(batchPromise);
1388
+
1389
+ // Yield results when we have enough pending batches
1390
+ if (pendingBatches.length >= concurrency) {
1391
+ const completed = await Promise.race(
1392
+ pendingBatches.map((p, i) => p.then(r => ({ ...r, promiseIndex: i })))
1393
+ );
1394
+
1395
+ // Remove completed batch from pending
1396
+ pendingBatches.splice(completed.promiseIndex, 1);
1397
+
1398
+ for (const result of completed.results) {
1399
+ yield result;
1400
+ }
1401
+ }
1402
+ }
1403
+ }
1404
+
1405
+ // Process remaining batch
1406
+ if (batch.length > 0) {
1407
+ const results = await this.processSingleBatch(batch, processor, batchIndex);
1408
+ options?.onBatchComplete?.(batchIndex, results);
1409
+ for (const result of results) {
1410
+ yield result;
1411
+ }
1412
+ }
1413
+
1414
+ // Wait for remaining pending batches
1415
+ const remainingResults = await Promise.all(pendingBatches);
1416
+ for (const { results } of remainingResults.sort((a, b) => a.index - b.index)) {
1417
+ for (const result of results) {
1418
+ yield result;
1419
+ }
1420
+ }
1421
+ }
1422
+
1423
+ /**
1424
+ * Perform parallel search across multiple queries.
1425
+ *
1426
+ * @param queries - Array of query vectors
1427
+ * @param options - Search options
1428
+ * @returns Array of search results for each query
1429
+ */
1430
+ async parallelSearch(
1431
+ queries: number[][],
1432
+ options: Omit<VectorSearchOptions, 'query'>
1433
+ ): Promise<VectorSearchResult[][]> {
1434
+ const concurrency = this.options.concurrency;
1435
+ const results: VectorSearchResult[][] = new Array(queries.length);
1436
+
1437
+ // Process queries in parallel batches
1438
+ for (let i = 0; i < queries.length; i += concurrency) {
1439
+ const batchQueries = queries.slice(i, i + concurrency);
1440
+ const batchResults = await Promise.all(
1441
+ batchQueries.map((query, j) =>
1442
+ this.executeSingleSearch({ ...options, query } as VectorSearchOptions)
1443
+ .then(r => ({ index: i + j, results: r }))
1444
+ )
1445
+ );
1446
+
1447
+ for (const { index, results: searchResults } of batchResults) {
1448
+ results[index] = searchResults;
1449
+ }
1450
+
1451
+ this.emit('batch_search_complete', {
1452
+ batchStart: i,
1453
+ batchEnd: Math.min(i + concurrency, queries.length),
1454
+ total: queries.length,
1455
+ });
1456
+ }
1457
+
1458
+ return results;
1459
+ }
1460
+
1461
+ /**
1462
+ * Process a single batch with retry support.
1463
+ */
1464
+ private async processSingleBatch<T, R>(
1465
+ batch: T[],
1466
+ processor: (batch: T[]) => Promise<R[]>,
1467
+ batchIndex: number
1468
+ ): Promise<R[]> {
1469
+ let attempt = 0;
1470
+ let lastError: Error | null = null;
1471
+
1472
+ while (attempt < this.options.maxRetries) {
1473
+ attempt++;
1474
+ try {
1475
+ const results = await processor(batch);
1476
+ this.emit('batch_complete', { batchIndex, attempt, success: true });
1477
+ return results;
1478
+ } catch (error) {
1479
+ lastError = error as Error;
1480
+ this.emit('batch_error', { batchIndex, attempt, error: lastError });
1481
+
1482
+ if (!this.options.retryOnFailure || attempt >= this.options.maxRetries) {
1483
+ break;
1484
+ }
1485
+
1486
+ // Exponential backoff
1487
+ await this.sleep(Math.min(1000 * Math.pow(2, attempt - 1), 10000));
1488
+ }
1489
+ }
1490
+
1491
+ throw new Error(`Batch ${batchIndex} failed after ${attempt} attempts: ${lastError?.message}`);
1492
+ }
1493
+
1494
+ /**
1495
+ * Execute a single search query.
1496
+ */
1497
+ private async executeSingleSearch(
1498
+ options: VectorSearchOptions
1499
+ ): Promise<VectorSearchResult[]> {
1500
+ const client = await this.pool.connect();
1501
+ try {
1502
+ const { sql, params } = this.buildSearchQuery(options);
1503
+ const result = await client.query<{
1504
+ id: string | number;
1505
+ distance: number;
1506
+ [key: string]: unknown;
1507
+ }>(sql, params);
1508
+
1509
+ const metric = options.metric ?? 'cosine';
1510
+ return result.rows.map((row, index) => {
1511
+ const score = metric === 'cosine' || metric === 'dot'
1512
+ ? 1 - row.distance
1513
+ : 1 / (1 + row.distance);
1514
+
1515
+ return {
1516
+ id: row.id,
1517
+ score,
1518
+ distance: row.distance,
1519
+ rank: index + 1,
1520
+ retrievedAt: new Date(),
1521
+ };
1522
+ });
1523
+ } finally {
1524
+ client.release();
1525
+ }
1526
+ }
1527
+
1528
+ /**
1529
+ * Build search query SQL.
1530
+ */
1531
+ private buildSearchQuery(options: VectorSearchOptions): { sql: string; params: unknown[] } {
1532
+ const tableName = options.tableName ?? 'vectors';
1533
+ const vectorColumn = options.vectorColumn ?? 'embedding';
1534
+ const metric = options.metric ?? 'cosine';
1535
+ const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
1536
+
1537
+ const queryVector = this.formatVector(options.query);
1538
+ const schemaPrefix = this.schema ? `"${this.schema}".` : '';
1539
+
1540
+ const selectColumns = options.selectColumns ?? ['id'];
1541
+ const distanceExpr = `"${vectorColumn}" ${operator} '${queryVector}'::vector`;
1542
+
1543
+ let sql = `SELECT ${selectColumns.join(', ')}, (${distanceExpr}) as distance ` +
1544
+ `FROM ${schemaPrefix}"${tableName}" ` +
1545
+ `ORDER BY ${distanceExpr} ASC ` +
1546
+ `LIMIT ${options.k}`;
1547
+
1548
+ return { sql, params: [] };
1549
+ }
1550
+
1551
+ /**
1552
+ * Format vector for SQL.
1553
+ */
1554
+ private formatVector(vector: number[] | Float32Array): string {
1555
+ const arr = Array.isArray(vector) ? vector : Array.from(vector);
1556
+ return `[${arr.join(',')}]`;
1557
+ }
1558
+
1559
+ /**
1560
+ * Sleep utility.
1561
+ */
1562
+ private sleep(ms: number): Promise<void> {
1563
+ return new Promise(resolve => setTimeout(resolve, ms));
1564
+ }
1565
+ }
1566
+
1567
+ // ============================================================================
1568
+ // PoolEventEmitter Class
1569
+ // ============================================================================
1570
+
1571
+ /**
1572
+ * Event emitter for connection pool lifecycle events.
1573
+ *
1574
+ * Provides typed event handling for pool operations.
1575
+ *
1576
+ * @example
1577
+ * ```typescript
1578
+ * const poolEvents = new PoolEventEmitter(pool);
1579
+ *
1580
+ * poolEvents.on('pool:connect', (client) => {
1581
+ * console.log('Client connected');
1582
+ * });
1583
+ *
1584
+ * poolEvents.on('pool:error', (error, client) => {
1585
+ * console.error('Pool error:', error);
1586
+ * });
1587
+ * ```
1588
+ */
1589
+ export class PoolEventEmitter extends EventEmitter {
1590
+ private readonly pool: Pool;
1591
+
1592
+ constructor(pool: Pool) {
1593
+ super();
1594
+ this.pool = pool;
1595
+ this.setupListeners();
1596
+ }
1597
+
1598
+ /**
1599
+ * Add typed event listener.
1600
+ */
1601
+ on<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
1602
+ return super.on(event, listener as (...args: unknown[]) => void);
1603
+ }
1604
+
1605
+ /**
1606
+ * Add one-time typed event listener.
1607
+ */
1608
+ once<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
1609
+ return super.once(event, listener as (...args: unknown[]) => void);
1610
+ }
1611
+
1612
+ /**
1613
+ * Remove typed event listener.
1614
+ */
1615
+ off<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
1616
+ return super.off(event, listener as (...args: unknown[]) => void);
1617
+ }
1618
+
1619
+ /**
1620
+ * Emit typed event.
1621
+ */
1622
+ emit<K extends keyof PoolEvents>(
1623
+ event: K,
1624
+ ...args: Parameters<PoolEvents[K]>
1625
+ ): boolean {
1626
+ return super.emit(event, ...args);
1627
+ }
1628
+
1629
+ /**
1630
+ * Get current pool statistics.
1631
+ */
1632
+ getStats(): {
1633
+ totalCount: number;
1634
+ idleCount: number;
1635
+ waitingCount: number;
1636
+ } {
1637
+ return {
1638
+ totalCount: this.pool.totalCount,
1639
+ idleCount: this.pool.idleCount,
1640
+ waitingCount: this.pool.waitingCount,
1641
+ };
1642
+ }
1643
+
1644
+ /**
1645
+ * Setup pool event listeners.
1646
+ */
1647
+ private setupListeners(): void {
1648
+ this.pool.on('connect', (...args: unknown[]) => {
1649
+ const client = args[0] as PoolClient;
1650
+ this.emit('pool:connect', client);
1651
+ });
1652
+
1653
+ this.pool.on('acquire', (...args: unknown[]) => {
1654
+ const client = args[0] as PoolClient;
1655
+ this.emit('pool:acquire', client);
1656
+ });
1657
+
1658
+ this.pool.on('release', (...args: unknown[]) => {
1659
+ const client = args[0] as PoolClient;
1660
+ this.emit('pool:release', client);
1661
+ });
1662
+
1663
+ this.pool.on('remove', (...args: unknown[]) => {
1664
+ const client = args[0] as PoolClient;
1665
+ this.emit('pool:remove', client);
1666
+ });
1667
+
1668
+ this.pool.on('error', (...args: unknown[]) => {
1669
+ const error = args[0] as Error;
1670
+ const client = args[1] as PoolClient | undefined;
1671
+ this.emit('pool:error', error, client);
1672
+ });
1673
+ }
1674
+ }
1675
+
1676
+ // ============================================================================
1677
+ // Factory Functions
1678
+ // ============================================================================
1679
+
1680
+ /**
1681
+ * Create a new RuVectorStream instance.
1682
+ */
1683
+ export function createRuVectorStream(
1684
+ pool: Pool,
1685
+ options?: {
1686
+ schema?: string;
1687
+ defaultTableName?: string;
1688
+ highWaterMark?: number;
1689
+ }
1690
+ ): RuVectorStream {
1691
+ return new RuVectorStream(pool, options);
1692
+ }
1693
+
1694
+ /**
1695
+ * Create a new RuVectorTransaction instance.
1696
+ */
1697
+ export function createRuVectorTransaction(
1698
+ client: PoolClient,
1699
+ options?: {
1700
+ schema?: string;
1701
+ defaultTableName?: string;
1702
+ }
1703
+ ): RuVectorTransaction {
1704
+ return new RuVectorTransaction(client, options);
1705
+ }
1706
+
1707
+ /**
1708
+ * Create a new BatchProcessor instance.
1709
+ */
1710
+ export function createBatchProcessor(
1711
+ pool: Pool,
1712
+ options?: BatchOptions & { schema?: string }
1713
+ ): BatchProcessor {
1714
+ return new BatchProcessor(pool, options);
1715
+ }
1716
+
1717
+ /**
1718
+ * Create a new PoolEventEmitter instance.
1719
+ */
1720
+ export function createPoolEventEmitter(pool: Pool): PoolEventEmitter {
1721
+ return new PoolEventEmitter(pool);
1722
+ }
1723
+
1724
+ // ============================================================================
1725
+ // Default Export
1726
+ // ============================================================================
1727
+
1728
+ export default {
1729
+ RuVectorStream,
1730
+ RuVectorTransaction,
1731
+ BatchProcessor,
1732
+ PoolEventEmitter,
1733
+ createRuVectorStream,
1734
+ createRuVectorTransaction,
1735
+ createBatchProcessor,
1736
+ createPoolEventEmitter,
1737
+ };