sweet-search 2.5.13 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +36 -9
  2. package/core/cli.js +41 -3
  3. package/core/embedding/embedding-local-model.js +106 -10
  4. package/core/embedding/embedding-service.js +59 -1
  5. package/core/embedding/model-client.mjs +257 -0
  6. package/core/embedding/model-server.mjs +217 -0
  7. package/core/incremental-indexing/application/maintenance-handlers.mjs +19 -98
  8. package/core/incremental-indexing/application/maintenance-worker.mjs +46 -9
  9. package/core/incremental-indexing/application/operator-cli.mjs +14 -5
  10. package/core/incremental-indexing/application/production-reconciler-helpers.mjs +40 -0
  11. package/core/incremental-indexing/application/production-reconciler.mjs +718 -54
  12. package/core/incremental-indexing/application/reconciler.mjs +87 -15
  13. package/core/incremental-indexing/domain/cutoff-cache.mjs +191 -0
  14. package/core/incremental-indexing/domain/interval-autotune.mjs +84 -1
  15. package/core/incremental-indexing/domain/reconcile-counters.mjs +0 -4
  16. package/core/incremental-indexing/domain/watermark-scheduler.mjs +0 -24
  17. package/core/incremental-indexing/infrastructure/maintenance-state-reader.mjs +2 -26
  18. package/core/incremental-indexing/infrastructure/manifest.mjs +1 -9
  19. package/core/incremental-indexing/infrastructure/sqlite-fts5.mjs +72 -0
  20. package/core/indexing/artifact-builder.js +1 -1
  21. package/core/indexing/dedup/dedup-phase.js +36 -17
  22. package/core/indexing/dedup/exemplar-selector.js +5 -0
  23. package/core/indexing/index-codebase-v21.js +37 -14
  24. package/core/indexing/index-maintainer.mjs +337 -6
  25. package/core/indexing/indexer-ann.js +27 -434
  26. package/core/indexing/indexer-build.js +30 -14
  27. package/core/indexing/indexer-manifest.js +0 -3
  28. package/core/indexing/indexer-phases.js +101 -25
  29. package/core/indexing/maintainer-launcher.mjs +22 -0
  30. package/core/indexing/maintainer-watcher.mjs +397 -0
  31. package/core/indexing/os-priority.mjs +160 -0
  32. package/core/indexing/rss-budget.mjs +425 -0
  33. package/core/indexing/streaming-vectors.js +450 -0
  34. package/core/infrastructure/config/platform.js +14 -10
  35. package/core/infrastructure/onnx-session-utils.js +37 -0
  36. package/core/infrastructure/sparse-gram-delta-reader.js +11 -1
  37. package/core/ranking/late-interaction-index.js +58 -7
  38. package/core/search/daemon-registry.js +199 -0
  39. package/core/search/search-read-semantic.js +9 -3
  40. package/core/search/search-semantic.js +6 -29
  41. package/core/search/search-server.js +527 -27
  42. package/core/search/session-daemon-prewarm.mjs +110 -1
  43. package/core/search/sweet-search.js +0 -38
  44. package/core/vector-store/binary-hnsw-index.js +692 -78
  45. package/core/vector-store/index.js +1 -4
  46. package/eval/agent-read-workflows/bin/_ss-argparse.mjs +51 -5
  47. package/eval/agent-read-workflows/bin/_ss-helpers.mjs +95 -44
  48. package/eval/agent-read-workflows/bin/ss-read +2 -0
  49. package/mcp/tool-handlers.js +1 -2
  50. package/package.json +11 -8
  51. package/scripts/uninstall.js +2 -0
  52. package/core/vector-store/hnsw-index.js +0 -751
@@ -1,751 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * HNSW Index Wrapper using USearch
5
- *
6
- * In-memory Hierarchical Navigable Small World graph for fast ANN search.
7
- * Replaces O(N) vector scan with O(log N) approximate nearest neighbor search.
8
- *
9
- * Target: <1ms p50 ANN lookup (often 50-500μs)
10
- *
11
- * Uses usearch for the HNSW implementation (replaces hnswlib-node which had segfaults).
12
- * USearch is actively maintained, 10x faster with SIMD acceleration, and stable.
13
- */
14
-
15
- import fs from 'fs/promises';
16
- import { existsSync, statSync } from 'fs';
17
- import path from 'path';
18
- import { HNSW_CONFIG, DB_PATHS, EMBEDDING_CONFIG } from '../infrastructure/config/index.js';
19
- import {
20
- createBitmap,
21
- loadBitmap,
22
- resizeBitmap,
23
- saveBitmap,
24
- setBit,
25
- isSet,
26
- } from '../infrastructure/tombstone-bitmap-reader.js';
27
-
28
- // =============================================================================
29
- // HNSW INDEX CLASS (USearch Implementation)
30
- // =============================================================================
31
-
32
- const HNSW_MAX_ELEMENTS_HARD_CEILING = 100_000_000;
33
-
34
- export class HNSWIndex {
35
- constructor(options = {}) {
36
- this.dimension = options.dimension || EMBEDDING_CONFIG.hnswDimension;
37
- this.maxElements = options.maxElements || HNSW_CONFIG.maxElements;
38
- this.M = options.M || HNSW_CONFIG.M;
39
- this.efConstruction = options.efConstruction || HNSW_CONFIG.efConstruction;
40
- this.efSearch = options.efSearch || HNSW_CONFIG.efSearch;
41
- this.metric = options.metric || HNSW_CONFIG.metric;
42
- this.indexPath = options.indexPath || DB_PATHS.hnswIndex;
43
- this.stalePath = options.stalePath || `${this.indexPath}.stale.bin`;
44
-
45
- this.index = null;
46
- this.idMap = new Map(); // string id -> numeric key
47
- this.reverseMap = new Map(); // numeric key -> string id
48
- this.metadata = new Map(); // string id -> metadata
49
- this.nextKey = 0;
50
-
51
- // Use pure JS fallback if usearch not available
52
- this.useFallback = false;
53
- this.vectors = []; // Fallback: store all vectors
54
- this.usearchModule = null;
55
- this._staleBitmapCache = null;
56
- }
57
-
58
- /**
59
- * Initialize the HNSW index using USearch
60
- */
61
- async init() {
62
- if (this.index !== null) return;
63
-
64
- try {
65
- // Try to use usearch (native, fast, SIMD-accelerated)
66
- this.usearchModule = await import('usearch');
67
- const Index = this.usearchModule.Index || this.usearchModule.default?.Index;
68
-
69
- if (!Index) {
70
- throw new Error('Index class not found in usearch module');
71
- }
72
-
73
- // Map metric to usearch metric
74
- const metricMap = {
75
- cosine: 'cos',
76
- euclidean: 'l2sq',
77
- ip: 'ip',
78
- };
79
- const usearchMetric = metricMap[this.metric] || 'cos';
80
-
81
- // Create USearch index with configuration
82
- this.index = new Index({
83
- metric: usearchMetric,
84
- connectivity: this.M,
85
- dimensions: this.dimension,
86
- quantization: 'f32',
87
- });
88
- this._reserveNativeCapacity(this.maxElements);
89
- this.useFallback = false;
90
-
91
- console.log(`HNSW: Using USearch (${usearchMetric}, dim=${this.dimension}, M=${this.M})`);
92
- } catch (err) {
93
- // Fallback to pure JS implementation
94
- console.log(`HNSW: Using pure JS fallback (${err.message})`);
95
- this.useFallback = true;
96
- this.vectors = [];
97
- }
98
- }
99
-
100
- /**
101
- * Add a vector to the index
102
- */
103
- async add(id, vector, metadata = {}) {
104
- await this.init();
105
-
106
- // Truncate vector to HNSW dimension if needed
107
- const truncated = vector.length > this.dimension
108
- ? vector.slice(0, this.dimension)
109
- : vector;
110
-
111
- // Normalize for cosine similarity
112
- const normalized = this.normalize(truncated);
113
-
114
- // Convert to Float32Array for usearch
115
- const vecArray = new Float32Array(normalized);
116
-
117
- if (this.idMap.has(id)) {
118
- // Update existing - usearch doesn't support direct update, so we track metadata only
119
- const key = this.idMap.get(id);
120
- this.metadata.set(id, metadata);
121
-
122
- if (this.useFallback) {
123
- this.vectors[key] = { id, vector: normalized, metadata };
124
- }
125
- return key;
126
- }
127
-
128
- // Add new. Commit maps only after native add succeeds; otherwise a
129
- // transient native failure would leave a row visible without a graph node.
130
- const key = this.nextKey;
131
-
132
- if (!this.useFallback && this.index) {
133
- this._addNativeVector(key, vecArray);
134
- } else {
135
- this.vectors[key] = { id, vector: normalized, metadata };
136
- }
137
-
138
- this.nextKey++;
139
- this.idMap.set(id, key);
140
- this.reverseMap.set(key, id);
141
- this.metadata.set(id, metadata);
142
-
143
- return key;
144
- }
145
-
146
- _addNativeVector(key, vecArray) {
147
- try {
148
- this.index.add(BigInt(key), vecArray);
149
- return;
150
- } catch (err) {
151
- if (!isNativeCapacityError(err)) {
152
- throw err;
153
- }
154
- }
155
-
156
- const nextCapacity = this._nextNativeCapacity();
157
- if (!this._reserveNativeCapacity(nextCapacity)) {
158
- throw new Error(
159
- `HNSW capacity exhausted at ${this.maxElements} elements and native reserve() is unavailable`
160
- );
161
- }
162
- this.index.add(BigInt(key), vecArray);
163
- }
164
-
165
- _nextNativeCapacity() {
166
- const current = Math.max(1, this.maxElements, this.nextKey + 1);
167
- return Math.min(
168
- HNSW_MAX_ELEMENTS_HARD_CEILING,
169
- Math.max(current + 1, Math.ceil(current * 1.25), this.nextKey + 1)
170
- );
171
- }
172
-
173
- _reserveNativeCapacity(capacity) {
174
- if (!this.index || typeof this.index.reserve !== 'function') return false;
175
- const nextCapacity = Math.min(HNSW_MAX_ELEMENTS_HARD_CEILING, Math.ceil(capacity));
176
- if (!Number.isFinite(nextCapacity) || nextCapacity <= 0) return false;
177
- this.index.reserve(nextCapacity);
178
- this.maxElements = Math.max(this.maxElements, nextCapacity);
179
- return true;
180
- }
181
-
182
- /**
183
- * Batch add vectors
184
- */
185
- async addBatch(items) {
186
- await this.init();
187
-
188
- const results = [];
189
- for (const { id, vector, metadata } of items) {
190
- const key = await this.add(id, vector, metadata || {});
191
- results.push(key);
192
- }
193
- return results;
194
- }
195
-
196
- /**
197
- * Search for k nearest neighbors
198
- */
199
- _loadStaleBitmap() {
200
- let stat;
201
- try {
202
- stat = statSync(this.stalePath, { bigint: true });
203
- } catch {
204
- this._staleBitmapCache = null;
205
- return null;
206
- }
207
- const statKey = `${stat.mtimeNs}:${stat.ctimeNs}:${stat.size}`;
208
-
209
- if (
210
- this._staleBitmapCache
211
- && this._staleBitmapCache.statKey === statKey
212
- ) {
213
- return this._staleBitmapCache.bitmap;
214
- }
215
-
216
- try {
217
- const bitmap = loadBitmap(this.stalePath);
218
- this._staleBitmapCache = { statKey, bitmap };
219
- return bitmap;
220
- } catch (err) {
221
- if (process.env.SWEET_DEBUG) {
222
- console.debug(`[HNSW] ignoring unreadable stale bitmap ${this.stalePath}: ${err.message}`);
223
- }
224
- this._staleBitmapCache = { statKey, bitmap: null };
225
- return null;
226
- }
227
- }
228
-
229
- _isKeyStale(key, bitmap) {
230
- return bitmap ? isSet(bitmap, key) : false;
231
- }
232
-
233
- _markKeyStale(key) {
234
- const capacity = Math.max(key + 1, this.nextKey, 1);
235
- let bitmap = null;
236
- try {
237
- bitmap = loadBitmap(this.stalePath);
238
- } catch (err) {
239
- if (process.env.SWEET_DEBUG) {
240
- console.debug(`[HNSW] replacing unreadable stale bitmap ${this.stalePath}: ${err.message}`);
241
- }
242
- }
243
- bitmap = bitmap ? resizeBitmap(bitmap, capacity) : createBitmap(capacity);
244
- setBit(bitmap, key);
245
- saveBitmap(this.stalePath, bitmap);
246
- this._staleBitmapCache = null;
247
- }
248
-
249
- async clearStaleBitmap() {
250
- await fs.rm(this.stalePath, { force: true });
251
- this._staleBitmapCache = null;
252
- }
253
-
254
- _oversampleTarget(k, bitmap) {
255
- const searchable = this._searchableKeyCount();
256
- const live = this._liveCount(bitmap);
257
- const tombstoned = Math.max(0, searchable - live);
258
- if (tombstoned === 0) return k;
259
- const s = Math.max(0, Math.min(tombstoned / Math.max(1, searchable), 0.5));
260
- return Math.min(Math.max(k + 64, Math.ceil(k / Math.max(0.05, 1 - s) * 2)), k * 20);
261
- }
262
-
263
- _searchableKeyCount() {
264
- if (this.useFallback) return this.vectors.length;
265
- return Math.max(this.nextKey, this.idMap.size);
266
- }
267
-
268
- _liveCount(bitmap) {
269
- if (!bitmap) return this.idMap.size;
270
- let live = 0;
271
- for (const key of this.reverseMap.keys()) {
272
- if (!this._isKeyStale(key, bitmap)) live++;
273
- }
274
- return live;
275
- }
276
-
277
- async search(queryVector, k = 10) {
278
- await this.init();
279
-
280
- const start = performance.now();
281
- const staleBitmap = this._loadStaleBitmap();
282
-
283
- // Truncate and normalize query
284
- const truncated = queryVector.length > this.dimension
285
- ? queryVector.slice(0, this.dimension)
286
- : queryVector;
287
- const normalized = this.normalize(truncated);
288
-
289
- let results;
290
-
291
- if (!this.useFallback && this.index) {
292
- // Use native USearch
293
- const vecArray = new Float32Array(normalized);
294
- const candidateK = this._oversampleTarget(k, staleBitmap);
295
- const actualK = Math.min(candidateK, this._searchableKeyCount());
296
-
297
- if (actualK === 0) {
298
- results = [];
299
- } else {
300
- const collect = (limit) => {
301
- const searchResult = this.index.search(vecArray, limit);
302
- const collected = [];
303
- // USearch returns { keys: BigUint64Array, distances: Float32Array, count: number }
304
- const count = searchResult.count || searchResult.keys?.length || 0;
305
-
306
- for (let i = 0; i < count; i++) {
307
- const key = Number(searchResult.keys[i]);
308
- if (this._isKeyStale(key, staleBitmap)) continue;
309
- const id = this.reverseMap.get(key);
310
- if (id) {
311
- // Convert distance to similarity (cosine distance to similarity)
312
- const distance = searchResult.distances[i];
313
- const score = this.metric === 'cosine' ? 1 - distance : -distance;
314
-
315
- collected.push({
316
- id,
317
- score,
318
- metadata: this.metadata.get(id) || {},
319
- });
320
- if (collected.length >= k) break;
321
- }
322
- }
323
- return collected;
324
- };
325
-
326
- results = collect(actualK);
327
- const retryK = Math.min(actualK * 2, this._searchableKeyCount());
328
- if (results.length < k && retryK > actualK) {
329
- results = collect(retryK);
330
- }
331
- }
332
- } else {
333
- // Pure JS fallback: O(N) scan
334
- results = this.vectors
335
- .map((v, key) => ({ v, key }))
336
- .filter(({ v, key }) => v !== null && !this._isKeyStale(key, staleBitmap))
337
- .map(({ v }) => ({
338
- id: v.id,
339
- score: this.cosineSimilarity(normalized, v.vector),
340
- metadata: v.metadata || {},
341
- }))
342
- .sort((a, b) => b.score - a.score)
343
- .slice(0, k);
344
- }
345
-
346
- const latency = performance.now() - start;
347
-
348
- return {
349
- results,
350
- latency_us: Math.round(latency * 1000), // microseconds
351
- latency_ms: latency.toFixed(3),
352
- k,
353
- total: this._liveCount(staleBitmap),
354
- usedFallback: this.useFallback,
355
- };
356
- }
357
-
358
- /**
359
- * Get vector by ID
360
- */
361
- async get(id) {
362
- if (!this.idMap.has(id)) return null;
363
- const key = this.idMap.get(id);
364
- if (this._isKeyStale(key, this._loadStaleBitmap())) return null;
365
-
366
- if (this.useFallback) {
367
- return this.vectors[key];
368
- }
369
-
370
- return {
371
- id,
372
- metadata: this.metadata.get(id),
373
- };
374
- }
375
-
376
- /**
377
- * Remove vector by ID (soft delete)
378
- */
379
- async remove(id) {
380
- if (!this.idMap.has(id)) return false;
381
-
382
- const key = this.idMap.get(id);
383
- this._markKeyStale(key);
384
-
385
- if (this.useFallback) {
386
- this.vectors[key] = null;
387
- }
388
-
389
- this.idMap.delete(id);
390
- this.reverseMap.delete(key);
391
- this.metadata.delete(id);
392
-
393
- return true;
394
- }
395
-
396
- /**
397
- * Save index to disk.
398
- *
399
- * Publish semantics: each sidecar is written to a sibling
400
- * `<path>.tmp.<pid>` and then `renameSync`'d into its canonical name.
401
- * On POSIX, atomic rename keeps existing mmaps valid against the
402
- * unlinked old inode — without this, a cross-process reader that
403
- * holds a `usearch.view()` mmap over the canonical .usearch file
404
- * would SIGBUS / SIGSEGV the moment the next reconcile tick or
405
- * maintenance pass truncates+writes the file in place.
406
- *
407
- * Publish ORDER: data first (.usearch / .vectors.json), then
408
- * .meta.json LAST. A fresh reader that successfully reads the new
409
- * meta.json is guaranteed to read the matching data sidecar
410
- * alongside it. The brief residual window — `(OLD meta, NEW
411
- * .usearch)` — yields MISSING results (keys beyond the new index
412
- * size are absent) instead of GARBAGE results.
413
- */
414
- async save(indexPath = this.indexPath) {
415
- await fs.mkdir(path.dirname(indexPath), { recursive: true });
416
-
417
- // Save index state
418
- const state = {
419
- dimension: this.dimension,
420
- maxElements: this.maxElements,
421
- M: this.M,
422
- efConstruction: this.efConstruction,
423
- efSearch: this.efSearch,
424
- metric: this.metric,
425
- nextKey: this.nextKey,
426
- idMap: Array.from(this.idMap.entries()),
427
- metadata: Array.from(this.metadata.entries()),
428
- useFallback: this.useFallback,
429
- };
430
-
431
- const metaPath = indexPath.replace('.idx', '.meta.json');
432
- const metaTmpPath = `${metaPath}.tmp.${process.pid}`;
433
- await fs.writeFile(metaTmpPath, JSON.stringify(state, null, 2));
434
-
435
- if (!this.useFallback && this.index) {
436
- const usearchPath = indexPath.replace('.idx', '.usearch');
437
- const usearchTmpPath = `${usearchPath}.tmp.${process.pid}`;
438
- this.index.save(usearchTmpPath);
439
- // Atomic rename: data first, descriptor last.
440
- await fs.rename(usearchTmpPath, usearchPath);
441
- await fs.rename(metaTmpPath, metaPath);
442
- console.log(`HNSW: Saved ${this.nextKey} vectors to ${usearchPath} (USearch)`);
443
- } else {
444
- const vectorsPath = indexPath.replace('.idx', '.vectors.json');
445
- const vectorsTmpPath = `${vectorsPath}.tmp.${process.pid}`;
446
- await fs.writeFile(vectorsTmpPath, JSON.stringify(this.vectors));
447
- await fs.rename(vectorsTmpPath, vectorsPath);
448
- await fs.rename(metaTmpPath, metaPath);
449
- console.log(`HNSW: Saved ${this.vectors.length} vectors to ${vectorsPath} (fallback)`);
450
- }
451
- }
452
-
453
- /**
454
- * Load index from disk.
455
- * @param {string} indexPath - Path to index files (defaults to this.indexPath)
456
- * @param {Object} options - Load options
457
- * @param {boolean} options.mmap - Use memory-mapped view() for zero-copy search (read-only, no add/remove)
458
- */
459
- async load(indexPath = this.indexPath, options = {}) {
460
- const { mmap = false } = options;
461
- const metaPath = indexPath.replace('.idx', '.meta.json');
462
-
463
- if (!existsSync(metaPath)) {
464
- throw new Error(`Index metadata not found: ${metaPath}`);
465
- }
466
-
467
- // Load metadata
468
- const state = JSON.parse(await fs.readFile(metaPath, 'utf-8'));
469
-
470
- this.dimension = state.dimension;
471
- this.maxElements = state.maxElements;
472
- this.M = state.M;
473
- this.efConstruction = state.efConstruction;
474
- this.efSearch = state.efSearch;
475
- this.metric = state.metric;
476
- this.nextKey = state.nextKey;
477
- this.idMap = new Map(state.idMap);
478
- this.metadata = new Map(state.metadata);
479
-
480
- // Rebuild reverse map
481
- this.reverseMap = new Map();
482
- for (const [id, key] of this.idMap) {
483
- this.reverseMap.set(key, id);
484
- }
485
-
486
- // Try to load USearch index
487
- const usearchPath = indexPath.replace('.idx', '.usearch');
488
- if (!state.useFallback && existsSync(usearchPath)) {
489
- try {
490
- this.usearchModule = await import('usearch');
491
- const Index = this.usearchModule.Index || this.usearchModule.default?.Index;
492
-
493
- const metricMap = { cosine: 'cos', euclidean: 'l2sq', ip: 'ip' };
494
- const usearchMetric = metricMap[this.metric] || 'cos';
495
-
496
- this.index = new Index({
497
- metric: usearchMetric,
498
- connectivity: this.M,
499
- dimensions: this.dimension,
500
- quantization: 'f32',
501
- });
502
-
503
- // Load from file: view() = mmap zero-copy (search only), load() = full copy
504
- if (mmap) {
505
- this.index.view(usearchPath);
506
- } else {
507
- this.index.load(usearchPath);
508
- }
509
- this.useFallback = false;
510
-
511
- console.log(`HNSW: ${mmap ? 'Mapped' : 'Loaded'} ${this.nextKey} vectors from ${usearchPath} (USearch${mmap ? ', mmap' : ''})`);
512
- } catch (err) {
513
- console.log(`HNSW: Failed to load USearch index, using fallback: ${err.message}`);
514
- this.useFallback = true;
515
- }
516
- }
517
-
518
- // Fallback: try legacy hnswlib-node index or vectors.json
519
- if (this.useFallback || !this.index) {
520
- const vectorsPath = indexPath.replace('.idx', '.vectors.json');
521
- if (existsSync(vectorsPath)) {
522
- this.vectors = JSON.parse(await fs.readFile(vectorsPath, 'utf-8'));
523
- this.useFallback = true;
524
- console.log(`HNSW: Loaded ${this.vectors.length} vectors from ${vectorsPath} (fallback)`);
525
- } else if (!state.useFallback && (state.idMap?.length || state.nextKey || 0) > 0) {
526
- throw new Error(
527
- `HNSW native artifact is missing or unreadable for ${indexPath}; refusing to serve stale metadata without vectors`
528
- );
529
- } else {
530
- // Initialize empty fallback
531
- this.useFallback = true;
532
- this.vectors = [];
533
- console.log('HNSW: No saved vectors found, starting fresh');
534
- }
535
- }
536
- }
537
-
538
- /**
539
- * Get index statistics
540
- */
541
- getStats() {
542
- return {
543
- dimension: this.dimension,
544
- totalVectors: this.idMap.size, // Accurate live count (not nextKey which never decrements)
545
- maxElements: this.maxElements,
546
- M: this.M,
547
- efConstruction: this.efConstruction,
548
- efSearch: this.efSearch,
549
- metric: this.metric,
550
- useFallback: this.useFallback,
551
- engine: this.useFallback ? 'js-fallback' : 'usearch',
552
- nextKey: this.nextKey, // Expose for debugging (total keys ever assigned)
553
- };
554
- }
555
-
556
- /**
557
- * Normalize vector to unit length
558
- */
559
- normalize(vector) {
560
- let norm = 0;
561
- for (let i = 0; i < vector.length; i++) {
562
- norm += vector[i] * vector[i];
563
- }
564
- norm = Math.sqrt(norm);
565
-
566
- if (norm === 0) return vector;
567
-
568
- const normalized = new Array(vector.length);
569
- for (let i = 0; i < vector.length; i++) {
570
- normalized[i] = vector[i] / norm;
571
- }
572
- return normalized;
573
- }
574
-
575
- /**
576
- * Cosine similarity between two vectors
577
- */
578
- cosineSimilarity(a, b) {
579
- let dotProduct = 0;
580
- for (let i = 0; i < a.length; i++) {
581
- dotProduct += a[i] * b[i];
582
- }
583
- return dotProduct; // Already normalized
584
- }
585
-
586
- /**
587
- * Clear all data
588
- */
589
- async clear() {
590
- this.idMap.clear();
591
- this.reverseMap.clear();
592
- this.metadata.clear();
593
- this.nextKey = 0;
594
- this.vectors = [];
595
- await this.clearStaleBitmap();
596
- this.index = null;
597
- await this.init();
598
- }
599
- }
600
-
601
- function isNativeCapacityError(err) {
602
- const message = String(err?.message || err).toLowerCase();
603
- return /\b(capacity|reserve|max\s*elements?|max_elements|full|allocation|out of memory|oom)\b/.test(message);
604
- }
605
-
606
- // =============================================================================
607
- // FACTORY FUNCTION
608
- // =============================================================================
609
-
610
- /**
611
- * Create or load an HNSW index
612
- */
613
- export async function createHNSWIndex(options = {}) {
614
- const index = new HNSWIndex(options);
615
-
616
- if (options.load && hnswArtifactsExist(options.indexPath || DB_PATHS.hnswIndex)) {
617
- await index.load(options.indexPath);
618
- } else {
619
- await index.init();
620
- }
621
-
622
- return index;
623
- }
624
-
625
- function hnswArtifactsExist(indexPath) {
626
- const metaPath = indexPath.replace('.idx', '.meta.json');
627
- return existsSync(metaPath);
628
- }
629
-
630
- // =============================================================================
631
- // CLI
632
- // =============================================================================
633
-
634
- if (import.meta.url === `file://${process.argv[1]}`) {
635
- const args = process.argv.slice(2);
636
-
637
- console.log(`
638
- HNSW Index CLI (USearch backend)
639
-
640
- Usage:
641
- hnsw-index.js stats Show index statistics
642
- hnsw-index.js test Run performance test
643
- hnsw-index.js search <query> Search with query text (requires embedding model)
644
-
645
- Options:
646
- --index <path> Path to index file (default: .sweet-search/codebase-hnsw.idx)
647
- `);
648
-
649
- const command = args[0];
650
-
651
- (async () => {
652
- const index = new HNSWIndex();
653
-
654
- try {
655
- if (command === 'stats') {
656
- await index.load();
657
- console.log('\nIndex Statistics:');
658
- console.log(JSON.stringify(index.getStats(), null, 2));
659
- } else if (command === 'test') {
660
- // Performance test
661
- console.log('\nRunning HNSW performance test (USearch)...\n');
662
-
663
- await index.init();
664
-
665
- const dim = 256;
666
- const numVectors = 10000;
667
- const numQueries = 100;
668
-
669
- // Generate random vectors
670
- console.log(`Generating ${numVectors} random ${dim}-dim vectors...`);
671
- const vectors = [];
672
- for (let i = 0; i < numVectors; i++) {
673
- const vec = new Array(dim).fill(0).map(() => Math.random());
674
- vectors.push(vec);
675
- }
676
-
677
- // Add vectors
678
- console.log('Adding vectors to index...');
679
- const addStart = performance.now();
680
- for (let i = 0; i < numVectors; i++) {
681
- await index.add(`vec-${i}`, vectors[i], { index: i });
682
- }
683
- const addTime = performance.now() - addStart;
684
- console.log(`Added ${numVectors} vectors in ${addTime.toFixed(2)}ms (${(numVectors / addTime * 1000).toFixed(0)} vec/s)`);
685
-
686
- // Search
687
- console.log(`\nRunning ${numQueries} searches...`);
688
- const latencies = [];
689
- for (let i = 0; i < numQueries; i++) {
690
- const queryVec = new Array(dim).fill(0).map(() => Math.random());
691
- const result = await index.search(queryVec, 10);
692
- latencies.push(result.latency_us);
693
- }
694
-
695
- // Stats
696
- latencies.sort((a, b) => a - b);
697
- const p50 = latencies[Math.floor(numQueries * 0.5)];
698
- const p95 = latencies[Math.floor(numQueries * 0.95)];
699
- const p99 = latencies[Math.floor(numQueries * 0.99)];
700
- const avg = latencies.reduce((a, b) => a + b, 0) / numQueries;
701
-
702
- console.log(`\nSearch Latency (μs):`);
703
- console.log(` p50: ${p50.toFixed(0)}μs`);
704
- console.log(` p95: ${p95.toFixed(0)}μs`);
705
- console.log(` p99: ${p99.toFixed(0)}μs`);
706
- console.log(` avg: ${avg.toFixed(0)}μs`);
707
- console.log(`\nEngine: ${index.useFallback ? 'JS fallback' : 'USearch'}`);
708
- } else {
709
- console.log('Unknown command. Use: stats, test, or search');
710
- }
711
- } catch (err) {
712
- console.error('Error:', err.message);
713
- process.exit(1);
714
- }
715
- })();
716
- }
717
-
718
- /**
719
- * Check if native ANN backend (usearch) is available.
720
- * @returns {Promise<{ available: boolean, engine: string, error?: string }>}
721
- */
722
- export async function checkNativeBackend() {
723
- try {
724
- const mod = await import('usearch');
725
- const Index = mod.Index || mod.default?.Index;
726
- if (!Index) {
727
- return { available: false, engine: 'js-fallback', error: 'Index class not found in usearch module' };
728
- }
729
- return { available: true, engine: 'usearch' };
730
- } catch (err) {
731
- return { available: false, engine: 'js-fallback', error: err.message };
732
- }
733
- }
734
-
735
- /**
736
- * Require native ANN backend. Throws if usearch is not available.
737
- * Use this in benchmark runs to prevent accidentally benchmarking the slow JS fallback.
738
- * @throws {Error} If usearch is not available
739
- */
740
- export async function requireNativeAnn() {
741
- const result = await checkNativeBackend();
742
- if (!result.available) {
743
- throw new Error(
744
- `Native ANN backend (usearch) is required but not available: ${result.error}. ` +
745
- 'Install usearch or remove --require-native-ann flag.'
746
- );
747
- }
748
- return result;
749
- }
750
-
751
- export default HNSWIndex;