verso-db 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.md +13 -7
  3. package/dist/BinaryHeap.d.ts +11 -1
  4. package/dist/BinaryHeap.d.ts.map +1 -1
  5. package/dist/BinaryHeap.js +138 -0
  6. package/dist/BinaryHeap.js.map +1 -0
  7. package/dist/Collection.d.ts +30 -4
  8. package/dist/Collection.d.ts.map +1 -1
  9. package/dist/Collection.js +1186 -0
  10. package/dist/Collection.js.map +1 -0
  11. package/dist/HNSWIndex.d.ts +59 -0
  12. package/dist/HNSWIndex.d.ts.map +1 -1
  13. package/dist/HNSWIndex.js +2818 -0
  14. package/dist/HNSWIndex.js.map +1 -0
  15. package/dist/MaxBinaryHeap.d.ts +2 -64
  16. package/dist/MaxBinaryHeap.d.ts.map +1 -1
  17. package/dist/MaxBinaryHeap.js +5 -0
  18. package/dist/MaxBinaryHeap.js.map +1 -0
  19. package/dist/SearchWorker.d.ts +57 -4
  20. package/dist/SearchWorker.d.ts.map +1 -1
  21. package/dist/SearchWorker.js +573 -0
  22. package/dist/SearchWorker.js.map +1 -0
  23. package/dist/VectorDB.d.ts.map +1 -1
  24. package/dist/VectorDB.js +246 -0
  25. package/dist/VectorDB.js.map +1 -0
  26. package/dist/WorkerPool.d.ts +32 -2
  27. package/dist/WorkerPool.d.ts.map +1 -1
  28. package/dist/WorkerPool.js +266 -0
  29. package/dist/WorkerPool.js.map +1 -0
  30. package/dist/backends/JsDistanceBackend.d.ts.map +1 -1
  31. package/dist/backends/JsDistanceBackend.js +163 -0
  32. package/dist/backends/JsDistanceBackend.js.map +1 -0
  33. package/dist/encoding/DeltaEncoder.d.ts +2 -2
  34. package/dist/encoding/DeltaEncoder.d.ts.map +1 -1
  35. package/dist/encoding/DeltaEncoder.js +199 -0
  36. package/dist/encoding/DeltaEncoder.js.map +1 -0
  37. package/dist/errors.js +97 -0
  38. package/dist/errors.js.map +1 -0
  39. package/dist/index.d.ts +3 -3
  40. package/dist/index.d.ts.map +1 -1
  41. package/dist/index.js +61 -42
  42. package/dist/index.js.map +1 -9
  43. package/dist/presets.js +205 -0
  44. package/dist/presets.js.map +1 -0
  45. package/dist/quantization/ScalarQuantizer.d.ts +0 -34
  46. package/dist/quantization/ScalarQuantizer.d.ts.map +1 -1
  47. package/dist/quantization/ScalarQuantizer.js +346 -0
  48. package/dist/quantization/ScalarQuantizer.js.map +1 -0
  49. package/dist/storage/BatchWriter.js +351 -0
  50. package/dist/storage/BatchWriter.js.map +1 -0
  51. package/dist/storage/BunStorageBackend.d.ts +7 -3
  52. package/dist/storage/BunStorageBackend.d.ts.map +1 -1
  53. package/dist/storage/BunStorageBackend.js +182 -0
  54. package/dist/storage/BunStorageBackend.js.map +1 -0
  55. package/dist/storage/MemoryBackend.js +109 -0
  56. package/dist/storage/MemoryBackend.js.map +1 -0
  57. package/dist/storage/OPFSBackend.d.ts.map +1 -1
  58. package/dist/storage/OPFSBackend.js +325 -0
  59. package/dist/storage/OPFSBackend.js.map +1 -0
  60. package/dist/storage/StorageBackend.js +12 -0
  61. package/dist/storage/StorageBackend.js.map +1 -0
  62. package/dist/storage/WriteAheadLog.js +321 -0
  63. package/dist/storage/WriteAheadLog.js.map +1 -0
  64. package/dist/storage/createStorageBackend.d.ts +4 -0
  65. package/dist/storage/createStorageBackend.d.ts.map +1 -1
  66. package/dist/storage/createStorageBackend.js +119 -0
  67. package/dist/storage/createStorageBackend.js.map +1 -0
  68. package/{src/storage/index.ts → dist/storage/index.js} +7 -27
  69. package/dist/storage/index.js.map +1 -0
  70. package/dist/storage/nodeFsRuntime.d.ts +14 -0
  71. package/dist/storage/nodeFsRuntime.d.ts.map +1 -0
  72. package/dist/storage/nodeFsRuntime.js +105 -0
  73. package/dist/storage/nodeFsRuntime.js.map +1 -0
  74. package/package.json +9 -7
  75. package/src/BinaryHeap.ts +0 -136
  76. package/src/Collection.ts +0 -1262
  77. package/src/HNSWIndex.ts +0 -2894
  78. package/src/MaxBinaryHeap.ts +0 -181
  79. package/src/SearchWorker.ts +0 -264
  80. package/src/VectorDB.ts +0 -319
  81. package/src/WorkerPool.ts +0 -222
  82. package/src/backends/JsDistanceBackend.ts +0 -171
  83. package/src/encoding/DeltaEncoder.ts +0 -236
  84. package/src/errors.ts +0 -110
  85. package/src/index.ts +0 -106
  86. package/src/presets.ts +0 -229
  87. package/src/quantization/ScalarQuantizer.ts +0 -487
  88. package/src/storage/BatchWriter.ts +0 -420
  89. package/src/storage/BunStorageBackend.ts +0 -199
  90. package/src/storage/MemoryBackend.ts +0 -122
  91. package/src/storage/OPFSBackend.ts +0 -348
  92. package/src/storage/StorageBackend.ts +0 -74
  93. package/src/storage/WriteAheadLog.ts +0 -379
  94. package/src/storage/createStorageBackend.ts +0 -137
package/src/VectorDB.ts DELETED
@@ -1,319 +0,0 @@
1
- import { Collection } from './Collection';
2
- import type { DistanceMetric } from './HNSWIndex';
3
- import type { StorageBackend } from './storage/StorageBackend';
4
- import { BunStorageBackend } from './storage/BunStorageBackend';
5
- import { MemoryBackend } from './storage/MemoryBackend';
6
- import { OPFSBackend } from './storage/OPFSBackend';
7
- import { CollectionExistsError, CollectionNotFoundError, VectorDBError } from './errors';
8
-
9
- export interface VectorDBConfig {
10
- /**
11
- * Path for storing collections on disk. Default: './vectordb_data'
12
- * Ignored if a custom storageBackend is provided.
13
- */
14
- storagePath?: string;
15
- /**
16
- * Custom storage backend for platform-specific storage (e.g., OPFS for browsers).
17
- * If not provided, backend is auto-selected for the current runtime.
18
- */
19
- storageBackend?: StorageBackend;
20
- }
21
-
22
- export interface CollectionConfig {
23
- dimension: number;
24
- metric?: DistanceMetric;
25
- M?: number;
26
- efConstruction?: number;
27
- }
28
-
29
- /** Manifest entry stored for each collection */
30
- interface ManifestEntry {
31
- dimension: number;
32
- metric: DistanceMetric;
33
- M: number;
34
- efConstruction: number;
35
- }
36
-
37
- const MANIFEST_KEY = 'manifest.json';
38
-
39
- export class VectorDB {
40
- private collections: Map<string, Collection>;
41
- private storageBackend: StorageBackend;
42
- private initialized: boolean = false;
43
- private initPromise: Promise<void> | null = null;
44
- private createCollectionLocks: Map<string, Promise<void>> = new Map();
45
-
46
- constructor(config?: VectorDBConfig) {
47
- this.collections = new Map();
48
- if (config?.storageBackend) {
49
- this.storageBackend = config.storageBackend;
50
- } else {
51
- this.storageBackend = this.createDefaultStorageBackend(config?.storagePath);
52
- }
53
- }
54
-
55
- private createDefaultStorageBackend(storagePath?: string): StorageBackend {
56
- if (typeof Bun !== 'undefined') {
57
- return new BunStorageBackend(storagePath || './vectordb_data');
58
- }
59
-
60
- if (typeof navigator !== 'undefined' && OPFSBackend.isAvailable()) {
61
- return new OPFSBackend();
62
- }
63
-
64
- return new MemoryBackend();
65
- }
66
-
67
- /**
68
- * Get the storage backend used by this VectorDB instance.
69
- * Useful for custom storage operations or debugging.
70
- */
71
- getStorageBackend(): StorageBackend {
72
- return this.storageBackend;
73
- }
74
-
75
- /**
76
- * Initialize the VectorDB - creates storage directory and loads existing collections.
77
- * Called automatically on first operation if not called explicitly.
78
- * Safe to call concurrently — only runs initialization once.
79
- */
80
- async init(): Promise<void> {
81
- if (this.initialized) return;
82
- if (!this.initPromise) {
83
- this.initPromise = this.doInit().catch((err) => {
84
- // Allow retry after transient initialization failures.
85
- this.initPromise = null;
86
- this.initialized = false;
87
- throw err;
88
- });
89
- }
90
- return this.initPromise;
91
- }
92
-
93
- private async doInit(): Promise<void> {
94
- // Use storage backend to create root directory
95
- await this.storageBackend.mkdir('');
96
-
97
- // Load existing collections from manifest
98
- await this.loadManifest();
99
-
100
- this.initialized = true;
101
- }
102
-
103
- /**
104
- * Load collection metadata from the manifest file and restore collections.
105
- */
106
- private async loadManifest(): Promise<void> {
107
- const data = await this.storageBackend.read(MANIFEST_KEY);
108
- if (!data) return;
109
-
110
- let manifest: Record<string, ManifestEntry>;
111
- try {
112
- const text = new TextDecoder().decode(data);
113
- manifest = JSON.parse(text);
114
- } catch {
115
- // Corrupt manifest JSON — continue with empty collections
116
- return;
117
- }
118
-
119
- const VALID_METRICS = new Set(['cosine', 'euclidean', 'dot_product']);
120
-
121
- for (const [name, entry] of Object.entries(manifest)) {
122
- // Skip collections already loaded
123
- if (this.collections.has(name)) continue;
124
-
125
- try {
126
- this.validateCollectionName(name);
127
- } catch {
128
- console.warn(`Skipping collection '${name}' with invalid name in manifest`);
129
- continue;
130
- }
131
-
132
- // Validate manifest entry types to catch corrupted manifests early
133
- if (
134
- typeof entry.dimension !== 'number' || entry.dimension <= 0 || !Number.isInteger(entry.dimension) ||
135
- typeof entry.metric !== 'string' || !VALID_METRICS.has(entry.metric) ||
136
- typeof entry.M !== 'number' || entry.M <= 0 || !Number.isInteger(entry.M) ||
137
- typeof entry.efConstruction !== 'number' || entry.efConstruction <= 0 || !Number.isInteger(entry.efConstruction)
138
- ) {
139
- console.warn(`Skipping collection '${name}' with invalid manifest entry`);
140
- continue;
141
- }
142
-
143
- const config: CollectionConfig = {
144
- dimension: entry.dimension,
145
- metric: entry.metric,
146
- M: entry.M,
147
- efConstruction: entry.efConstruction,
148
- };
149
-
150
- try {
151
- const collection = new Collection(name, config, this.storageBackend);
152
- await collection.init();
153
- this.collections.set(name, collection);
154
- } catch (e) {
155
- // Per-collection init failure — skip this collection but continue loading others
156
- console.warn(`Failed to load collection '${name}':`, e);
157
- }
158
- }
159
- }
160
-
161
- /**
162
- * Save collection metadata to the manifest file.
163
- */
164
- private async saveManifest(): Promise<void> {
165
- const manifest: Record<string, ManifestEntry> = {};
166
-
167
- for (const [name, collection] of this.collections) {
168
- manifest[name] = {
169
- dimension: collection.getDimension(),
170
- metric: collection.getMetric(),
171
- M: collection.getM(),
172
- efConstruction: collection.getEfConstruction(),
173
- };
174
- }
175
-
176
- const data = new TextEncoder().encode(JSON.stringify(manifest));
177
- await this.storageBackend.write(MANIFEST_KEY, data);
178
- }
179
-
180
- private async withCollectionCreateLock<T>(name: string, fn: () => Promise<T>): Promise<T> {
181
- const previous = this.createCollectionLocks.get(name) ?? Promise.resolve();
182
- let release!: () => void;
183
- const lock = new Promise<void>((resolve) => {
184
- release = resolve;
185
- });
186
- this.createCollectionLocks.set(name, lock);
187
-
188
- await previous;
189
- try {
190
- return await fn();
191
- } finally {
192
- release();
193
- if (this.createCollectionLocks.get(name) === lock) {
194
- this.createCollectionLocks.delete(name);
195
- }
196
- }
197
- }
198
-
199
- async createCollection(name: string, config: CollectionConfig): Promise<Collection> {
200
- this.validateCollectionName(name);
201
-
202
- // Ensure VectorDB is initialized
203
- await this.init();
204
-
205
- return this.withCollectionCreateLock(name, async () => {
206
- if (this.collections.has(name)) {
207
- throw new CollectionExistsError(name);
208
- }
209
-
210
- // Create collection directory using storage backend
211
- await this.storageBackend.mkdir(name);
212
-
213
- const collection = new Collection(name, config, this.storageBackend);
214
- await collection.init();
215
- this.collections.set(name, collection);
216
-
217
- // Persist manifest
218
- await this.saveManifest();
219
-
220
- return collection;
221
- });
222
- }
223
-
224
- async getCollection(name: string): Promise<Collection | undefined> {
225
- await this.init();
226
- return this.collections.get(name);
227
- }
228
-
229
- async listCollections(): Promise<string[]> {
230
- await this.init();
231
- return [...this.collections.keys()];
232
- }
233
-
234
- async deleteCollection(name: string): Promise<void> {
235
- await this.init();
236
- const collection = this.collections.get(name);
237
- if (!collection) {
238
- throw new CollectionNotFoundError(name);
239
- }
240
-
241
- // Destroy without saving — we're deleting the collection
242
- await collection.destroy(false);
243
- this.collections.delete(name);
244
-
245
- // Remove all files under the collection prefix, including versioned snapshots.
246
- const keys = await this.storageBackend.list(name);
247
- for (const key of keys) {
248
- await this.storageBackend.delete(key).catch(() => {});
249
- }
250
-
251
- // Update manifest
252
- await this.saveManifest();
253
- }
254
-
255
- private validateCollectionName(name: string): void {
256
- if (!name || name.trim().length === 0) {
257
- throw new VectorDBError('Collection name must be a non-empty string', 'VALIDATION_ERROR');
258
- }
259
- if (name.length > 255) {
260
- throw new VectorDBError(
261
- `Collection name must be 255 characters or fewer, got ${name.length}`,
262
- 'VALIDATION_ERROR'
263
- );
264
- }
265
- if (/[\/\\.\x00]/.test(name)) {
266
- throw new VectorDBError(
267
- `Collection name '${name}' contains invalid characters (/, \\, ., or null)`,
268
- 'VALIDATION_ERROR'
269
- );
270
- }
271
- // Reject control characters (U+0001–U+001F, U+007F)
272
- // eslint-disable-next-line no-control-regex
273
- if (/[\x01-\x1f\x7f]/.test(name)) {
274
- throw new VectorDBError(
275
- 'Collection name contains control characters',
276
- 'VALIDATION_ERROR'
277
- );
278
- }
279
- // Reject leading/trailing whitespace (checked after control chars since
280
- // tab/etc. are control chars and should get the more specific message)
281
- if (name !== name.trim()) {
282
- throw new VectorDBError('Collection name must not have leading or trailing whitespace', 'VALIDATION_ERROR');
283
- }
284
- }
285
-
286
- async close(): Promise<void> {
287
- // Close all collections — continue on individual failures so remaining
288
- // collections still get saved and cleanup always completes.
289
- const errors: Error[] = [];
290
- for (const collection of this.collections.values()) {
291
- try {
292
- await collection.destroy();
293
- } catch (e) {
294
- errors.push(e instanceof Error ? e : new Error(String(e)));
295
- }
296
- }
297
-
298
- // Persist the manifest so collection metadata survives re-open
299
- if (this.initialized) {
300
- try {
301
- await this.saveManifest();
302
- } catch (e) {
303
- errors.push(e instanceof Error ? e : new Error(String(e)));
304
- }
305
- }
306
-
307
- this.collections.clear();
308
- this.createCollectionLocks.clear();
309
- this.initialized = false;
310
- this.initPromise = null;
311
-
312
- if (errors.length > 0) {
313
- throw new VectorDBError(
314
- `Failed to close ${errors.length} collection(s): ${errors.map(e => e.message).join('; ')}`,
315
- 'STORAGE_ERROR'
316
- );
317
- }
318
- }
319
- }
package/src/WorkerPool.ts DELETED
@@ -1,222 +0,0 @@
1
- /**
2
- * Worker pool for parallel query processing.
3
- * Dispatches queries to workers via round-robin for concurrent execution.
4
- * Falls back to sequential processing when workers are unavailable.
5
- */
6
-
7
- import { HNSWIndex } from './HNSWIndex';
8
-
9
- interface WorkerHandle {
10
- worker: Worker;
11
- busy: boolean;
12
- pendingResolve: Map<number, (results: Array<{ id: number; distance: number }>) => void>;
13
- pendingReject: Map<number, (error: Error) => void>;
14
- }
15
-
16
- export class WorkerPool {
17
- private workers: WorkerHandle[] = [];
18
- private nextQueryId: number = 0;
19
- private roundRobinIndex: number = 0;
20
- private initialized: boolean = false;
21
- private fallbackIndex: HNSWIndex | null = null;
22
-
23
- /**
24
- * Create a worker pool for parallel HNSW search.
25
- *
26
- * @param numWorkers Number of workers (default: available hardware concurrency or 4)
27
- */
28
- constructor(private numWorkers?: number) {
29
- if (!numWorkers) {
30
- this.numWorkers = typeof navigator !== 'undefined'
31
- ? (navigator.hardwareConcurrency ?? 4)
32
- : 4;
33
- }
34
- }
35
-
36
- /**
37
- * Initialize workers with shared index data.
38
- * Each worker receives a copy of the flat vectors and graph structure
39
- * and maintains its own independent search state.
40
- *
41
- * @param index The HNSW index to distribute to workers
42
- */
43
- async init(index: HNSWIndex): Promise<void> {
44
- this.fallbackIndex = index;
45
-
46
- // Check if Worker is available
47
- if (typeof Worker === 'undefined') {
48
- // Workers not available, will use fallback
49
- this.initialized = true;
50
- return;
51
- }
52
-
53
- const sharedData = index.getSharedSearchData();
54
- if (!sharedData) {
55
- // Index can't produce shared data, use fallback
56
- this.initialized = true;
57
- return;
58
- }
59
-
60
- const workerUrl = new URL('./SearchWorker.ts', import.meta.url).href;
61
-
62
- const initPromises: Promise<void>[] = [];
63
-
64
- for (let i = 0; i < this.numWorkers!; i++) {
65
- try {
66
- const worker = new Worker(workerUrl, { type: 'module' });
67
- const handle: WorkerHandle = {
68
- worker,
69
- busy: false,
70
- pendingResolve: new Map(),
71
- pendingReject: new Map(),
72
- };
73
-
74
- worker.onmessage = (event: MessageEvent) => {
75
- const msg = event.data;
76
- if (msg.type === 'result') {
77
- const resolve = handle.pendingResolve.get(msg.queryId);
78
- if (resolve) {
79
- handle.pendingResolve.delete(msg.queryId);
80
- handle.pendingReject.delete(msg.queryId);
81
- resolve(msg.results);
82
- }
83
- } else if (msg.type === 'error') {
84
- const reject = handle.pendingReject.get(msg.queryId);
85
- if (reject) {
86
- handle.pendingResolve.delete(msg.queryId);
87
- handle.pendingReject.delete(msg.queryId);
88
- reject(new Error(msg.error));
89
- }
90
- }
91
- };
92
-
93
- worker.onerror = (event) => {
94
- // Reject all pending queries for this worker
95
- for (const [qid, reject] of handle.pendingReject) {
96
- reject(new Error(`Worker error: ${event.message}`));
97
- }
98
- handle.pendingResolve.clear();
99
- handle.pendingReject.clear();
100
- };
101
-
102
- const readyPromise = new Promise<void>((resolve) => {
103
- const originalHandler = worker.onmessage!;
104
- worker.onmessage = (event: MessageEvent) => {
105
- if (event.data.type === 'ready') {
106
- worker.onmessage = originalHandler;
107
- resolve();
108
- }
109
- };
110
- });
111
-
112
- worker.postMessage({
113
- type: 'init',
114
- ...sharedData,
115
- });
116
-
117
- this.workers.push(handle);
118
- initPromises.push(readyPromise);
119
- } catch {
120
- // Worker creation failed, will use fallback for remaining
121
- break;
122
- }
123
- }
124
-
125
- if (initPromises.length > 0) {
126
- await Promise.all(initPromises);
127
- }
128
-
129
- this.initialized = true;
130
- }
131
-
132
- /**
133
- * Search for k nearest neighbors using worker pool.
134
- * Dispatches to next available worker via round-robin.
135
- *
136
- * @param query Query vector
137
- * @param k Number of results
138
- * @param efSearch Search effort parameter
139
- * @returns Array of {id, distance} results
140
- */
141
- async search(
142
- query: Float32Array,
143
- k: number,
144
- efSearch?: number
145
- ): Promise<Array<{ id: number; distance: number }>> {
146
- if (!this.initialized) {
147
- throw new Error('WorkerPool not initialized. Call init() first.');
148
- }
149
-
150
- // Fallback to sequential if no workers available
151
- if (this.workers.length === 0 && this.fallbackIndex) {
152
- return this.fallbackIndex.searchKNN(query, k, efSearch);
153
- }
154
-
155
- const queryId = this.nextQueryId++;
156
- const workerIdx = this.roundRobinIndex % this.workers.length;
157
- this.roundRobinIndex++;
158
- const handle = this.workers[workerIdx];
159
-
160
- return new Promise<Array<{ id: number; distance: number }>>((resolve, reject) => {
161
- handle.pendingResolve.set(queryId, resolve);
162
- handle.pendingReject.set(queryId, reject);
163
- handle.worker.postMessage({
164
- type: 'search',
165
- queryId,
166
- query,
167
- k,
168
- efSearch: efSearch || Math.max(k * 2, 50),
169
- });
170
- });
171
- }
172
-
173
- /**
174
- * Batch search: dispatch multiple queries in parallel across workers.
175
- *
176
- * @param queries Array of query vectors
177
- * @param k Number of results per query
178
- * @param efSearch Search effort parameter
179
- * @returns Array of results, one per query
180
- */
181
- async searchBatch(
182
- queries: Float32Array[],
183
- k: number,
184
- efSearch?: number
185
- ): Promise<Array<Array<{ id: number; distance: number }>>> {
186
- if (!this.initialized) {
187
- throw new Error('WorkerPool not initialized. Call init() first.');
188
- }
189
-
190
- // Fallback to sequential
191
- if (this.workers.length === 0 && this.fallbackIndex) {
192
- return queries.map(q => this.fallbackIndex!.searchKNN(q, k, efSearch));
193
- }
194
-
195
- const promises = queries.map(q => this.search(q, k, efSearch));
196
- return Promise.all(promises);
197
- }
198
-
199
- /**
200
- * Terminate all workers and clean up.
201
- */
202
- destroy(): void {
203
- for (const handle of this.workers) {
204
- handle.worker.terminate();
205
- for (const reject of handle.pendingReject.values()) {
206
- reject(new Error('Worker pool destroyed'));
207
- }
208
- handle.pendingResolve.clear();
209
- handle.pendingReject.clear();
210
- }
211
- this.workers = [];
212
- this.initialized = false;
213
- this.fallbackIndex = null;
214
- }
215
-
216
- /**
217
- * Get the number of active workers.
218
- */
219
- get workerCount(): number {
220
- return this.workers.length;
221
- }
222
- }
@@ -1,171 +0,0 @@
1
- /**
2
- * Optimized JavaScript distance functions with SIMD-style unrolling.
3
- * These provide ~1.5-2x speedup over naive loops by reducing loop overhead
4
- * and enabling better CPU pipelining.
5
- */
6
-
7
- /**
8
- * Fast inline distance functions for single-vector comparisons.
9
- * Uses 8-wide unrolling with separate accumulators for instruction-level parallelism.
10
- */
11
- function assertMatchingDimensions(a: Float32Array, b: Float32Array, context: string): void {
12
- if (a.length !== b.length) {
13
- throw new Error(`${context}: vector length mismatch (${a.length} vs ${b.length})`);
14
- }
15
- }
16
-
17
- export function dotProductFast(a: Float32Array, b: Float32Array): number {
18
- assertMatchingDimensions(a, b, 'dotProductFast');
19
- const len = a.length;
20
- // Use 8 accumulators for better ILP (instruction-level parallelism)
21
- let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
22
- let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
23
- let i = 0;
24
-
25
- // 8-wide unrolling for high-dimensional vectors (768D, 1536D)
26
- const limit8 = len - 7;
27
- for (; i < limit8; i += 8) {
28
- sum0 += a[i] * b[i];
29
- sum1 += a[i + 1] * b[i + 1];
30
- sum2 += a[i + 2] * b[i + 2];
31
- sum3 += a[i + 3] * b[i + 3];
32
- sum4 += a[i + 4] * b[i + 4];
33
- sum5 += a[i + 5] * b[i + 5];
34
- sum6 += a[i + 6] * b[i + 6];
35
- sum7 += a[i + 7] * b[i + 7];
36
- }
37
-
38
- // Handle remaining elements
39
- for (; i < len; i++) {
40
- sum0 += a[i] * b[i];
41
- }
42
-
43
- return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
44
- }
45
-
46
- export function l2SquaredFast(a: Float32Array, b: Float32Array): number {
47
- assertMatchingDimensions(a, b, 'l2SquaredFast');
48
- const len = a.length;
49
- // Use 8 accumulators for better ILP
50
- let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
51
- let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
52
- let i = 0;
53
-
54
- // 8-wide unrolling for high-dimensional vectors
55
- const limit8 = len - 7;
56
- for (; i < limit8; i += 8) {
57
- const d0 = a[i] - b[i];
58
- const d1 = a[i + 1] - b[i + 1];
59
- const d2 = a[i + 2] - b[i + 2];
60
- const d3 = a[i + 3] - b[i + 3];
61
- const d4 = a[i + 4] - b[i + 4];
62
- const d5 = a[i + 5] - b[i + 5];
63
- const d6 = a[i + 6] - b[i + 6];
64
- const d7 = a[i + 7] - b[i + 7];
65
- sum0 += d0 * d0;
66
- sum1 += d1 * d1;
67
- sum2 += d2 * d2;
68
- sum3 += d3 * d3;
69
- sum4 += d4 * d4;
70
- sum5 += d5 * d5;
71
- sum6 += d6 * d6;
72
- sum7 += d7 * d7;
73
- }
74
-
75
- // Handle remaining elements
76
- for (; i < len; i++) {
77
- const d = a[i] - b[i];
78
- sum0 += d * d;
79
- }
80
-
81
- return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
82
- }
83
-
84
- /**
85
- * Normalize a vector in place.
86
- * Uses 8-wide unrolling for better ILP.
87
- * Returns the original norm of the vector.
88
- */
89
- export function normalizeInPlace(v: Float32Array): number {
90
- const len = v.length;
91
- let s0 = 0, s1 = 0, s2 = 0, s3 = 0;
92
- let s4 = 0, s5 = 0, s6 = 0, s7 = 0;
93
- let i = 0;
94
- const limit8 = len - 7;
95
-
96
- for (; i < limit8; i += 8) {
97
- s0 += v[i] * v[i];
98
- s1 += v[i + 1] * v[i + 1];
99
- s2 += v[i + 2] * v[i + 2];
100
- s3 += v[i + 3] * v[i + 3];
101
- s4 += v[i + 4] * v[i + 4];
102
- s5 += v[i + 5] * v[i + 5];
103
- s6 += v[i + 6] * v[i + 6];
104
- s7 += v[i + 7] * v[i + 7];
105
- }
106
- for (; i < len; i++) {
107
- s0 += v[i] * v[i];
108
- }
109
-
110
- const norm = Math.sqrt(s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7);
111
- if (norm < 1e-10) {
112
- // Zero or near-zero vector: leave as-is to avoid Infinity/NaN from division
113
- return norm;
114
- }
115
- const invNorm = 1 / norm;
116
- // 8-wide unrolling for the division loop matches the norm computation above
117
- const divLimit8 = len - 7;
118
- let j = 0;
119
- for (; j < divLimit8; j += 8) {
120
- v[j] *= invNorm; v[j + 1] *= invNorm; v[j + 2] *= invNorm; v[j + 3] *= invNorm;
121
- v[j + 4] *= invNorm; v[j + 5] *= invNorm; v[j + 6] *= invNorm; v[j + 7] *= invNorm;
122
- }
123
- for (; j < len; j++) v[j] *= invNorm;
124
- return norm;
125
- }
126
-
127
- /**
128
- * Compute cosine distance between two vectors.
129
- * For pre-normalized vectors, this is simply 1 - dot(a, b).
130
- * Uses 8-wide unrolling for norm computation.
131
- */
132
- export function cosineDistanceFast(a: Float32Array, b: Float32Array, aIsNormalized = false, bIsNormalized = false): number {
133
- assertMatchingDimensions(a, b, 'cosineDistanceFast');
134
- const dot = dotProductFast(a, b);
135
-
136
- if (aIsNormalized && bIsNormalized) {
137
- // Both vectors are normalized, cosine distance = 1 - dot
138
- const dist = 1 - dot;
139
- return dist < 1e-10 ? 0 : dist;
140
- }
141
-
142
- // Need to compute norms with 8-wide unrolling
143
- const len = a.length;
144
- let nA0 = 0, nA1 = 0, nA2 = 0, nA3 = 0;
145
- let nB0 = 0, nB1 = 0, nB2 = 0, nB3 = 0;
146
- let i = 0;
147
- const limit8 = len - 7;
148
-
149
- for (; i < limit8; i += 8) {
150
- nA0 += a[i] * a[i] + a[i + 4] * a[i + 4];
151
- nA1 += a[i + 1] * a[i + 1] + a[i + 5] * a[i + 5];
152
- nA2 += a[i + 2] * a[i + 2] + a[i + 6] * a[i + 6];
153
- nA3 += a[i + 3] * a[i + 3] + a[i + 7] * a[i + 7];
154
- nB0 += b[i] * b[i] + b[i + 4] * b[i + 4];
155
- nB1 += b[i + 1] * b[i + 1] + b[i + 5] * b[i + 5];
156
- nB2 += b[i + 2] * b[i + 2] + b[i + 6] * b[i + 6];
157
- nB3 += b[i + 3] * b[i + 3] + b[i + 7] * b[i + 7];
158
- }
159
- for (; i < len; i++) {
160
- nA0 += a[i] * a[i];
161
- nB0 += b[i] * b[i];
162
- }
163
-
164
- const normA = nA0 + nA1 + nA2 + nA3;
165
- const normB = nB0 + nB1 + nB2 + nB3;
166
- const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
167
- if (magnitude === 0) return 1;
168
-
169
- const dist = 1 - (dot / magnitude);
170
- return dist < 1e-10 ? 0 : dist;
171
- }