verso-db 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/README.md +81 -49
  3. package/dist/BinaryHeap.d.ts +16 -5
  4. package/dist/BinaryHeap.d.ts.map +1 -1
  5. package/dist/BinaryHeap.js +138 -0
  6. package/dist/BinaryHeap.js.map +1 -0
  7. package/dist/Collection.d.ts +98 -17
  8. package/dist/Collection.d.ts.map +1 -1
  9. package/dist/Collection.js +1186 -0
  10. package/dist/Collection.js.map +1 -0
  11. package/dist/HNSWIndex.d.ts +170 -15
  12. package/dist/HNSWIndex.d.ts.map +1 -1
  13. package/dist/HNSWIndex.js +2818 -0
  14. package/dist/HNSWIndex.js.map +1 -0
  15. package/dist/MaxBinaryHeap.d.ts +2 -60
  16. package/dist/MaxBinaryHeap.d.ts.map +1 -1
  17. package/dist/MaxBinaryHeap.js +5 -0
  18. package/dist/MaxBinaryHeap.js.map +1 -0
  19. package/dist/SearchWorker.d.ts +104 -0
  20. package/dist/SearchWorker.d.ts.map +1 -0
  21. package/dist/SearchWorker.js +573 -0
  22. package/dist/SearchWorker.js.map +1 -0
  23. package/dist/VectorDB.d.ts +19 -5
  24. package/dist/VectorDB.d.ts.map +1 -1
  25. package/dist/VectorDB.js +246 -0
  26. package/dist/VectorDB.js.map +1 -0
  27. package/dist/WorkerPool.d.ts +92 -0
  28. package/dist/WorkerPool.d.ts.map +1 -0
  29. package/dist/WorkerPool.js +266 -0
  30. package/dist/WorkerPool.js.map +1 -0
  31. package/dist/backends/JsDistanceBackend.d.ts +3 -20
  32. package/dist/backends/JsDistanceBackend.d.ts.map +1 -1
  33. package/dist/backends/JsDistanceBackend.js +163 -0
  34. package/dist/backends/JsDistanceBackend.js.map +1 -0
  35. package/dist/encoding/DeltaEncoder.d.ts +2 -2
  36. package/dist/encoding/DeltaEncoder.d.ts.map +1 -1
  37. package/dist/encoding/DeltaEncoder.js +199 -0
  38. package/dist/encoding/DeltaEncoder.js.map +1 -0
  39. package/dist/errors.js +97 -0
  40. package/dist/errors.js.map +1 -0
  41. package/dist/index.d.ts +16 -17
  42. package/dist/index.d.ts.map +1 -1
  43. package/dist/index.js +61 -3419
  44. package/dist/index.js.map +1 -0
  45. package/dist/presets.d.ts +9 -9
  46. package/dist/presets.d.ts.map +1 -1
  47. package/dist/presets.js +205 -0
  48. package/dist/presets.js.map +1 -0
  49. package/dist/quantization/ScalarQuantizer.d.ts +10 -34
  50. package/dist/quantization/ScalarQuantizer.d.ts.map +1 -1
  51. package/dist/quantization/ScalarQuantizer.js +346 -0
  52. package/dist/quantization/ScalarQuantizer.js.map +1 -0
  53. package/dist/storage/BatchWriter.d.ts.map +1 -1
  54. package/dist/storage/BatchWriter.js +351 -0
  55. package/dist/storage/BatchWriter.js.map +1 -0
  56. package/dist/storage/BunStorageBackend.d.ts +12 -5
  57. package/dist/storage/BunStorageBackend.d.ts.map +1 -1
  58. package/dist/storage/BunStorageBackend.js +182 -0
  59. package/dist/storage/BunStorageBackend.js.map +1 -0
  60. package/dist/storage/MemoryBackend.d.ts.map +1 -1
  61. package/dist/storage/MemoryBackend.js +109 -0
  62. package/dist/storage/MemoryBackend.js.map +1 -0
  63. package/dist/storage/OPFSBackend.d.ts +9 -1
  64. package/dist/storage/OPFSBackend.d.ts.map +1 -1
  65. package/dist/storage/OPFSBackend.js +325 -0
  66. package/dist/storage/OPFSBackend.js.map +1 -0
  67. package/dist/storage/StorageBackend.d.ts +1 -1
  68. package/dist/storage/StorageBackend.js +12 -0
  69. package/dist/storage/StorageBackend.js.map +1 -0
  70. package/dist/storage/WriteAheadLog.d.ts +15 -11
  71. package/dist/storage/WriteAheadLog.d.ts.map +1 -1
  72. package/dist/storage/WriteAheadLog.js +321 -0
  73. package/dist/storage/WriteAheadLog.js.map +1 -0
  74. package/dist/storage/createStorageBackend.d.ts +4 -0
  75. package/dist/storage/createStorageBackend.d.ts.map +1 -1
  76. package/dist/storage/createStorageBackend.js +119 -0
  77. package/dist/storage/createStorageBackend.js.map +1 -0
  78. package/dist/storage/index.d.ts +3 -3
  79. package/dist/storage/index.js +33 -0
  80. package/dist/storage/index.js.map +1 -0
  81. package/dist/storage/nodeFsRuntime.d.ts +14 -0
  82. package/dist/storage/nodeFsRuntime.d.ts.map +1 -0
  83. package/dist/storage/nodeFsRuntime.js +105 -0
  84. package/dist/storage/nodeFsRuntime.js.map +1 -0
  85. package/package.json +47 -23
  86. package/dist/Storage.d.ts +0 -54
  87. package/dist/Storage.d.ts.map +0 -1
  88. package/dist/backends/DistanceBackend.d.ts +0 -5
  89. package/dist/backends/DistanceBackend.d.ts.map +0 -1
  90. package/src/BinaryHeap.ts +0 -131
  91. package/src/Collection.ts +0 -695
  92. package/src/HNSWIndex.ts +0 -1839
  93. package/src/MaxBinaryHeap.ts +0 -175
  94. package/src/Storage.ts +0 -435
  95. package/src/VectorDB.ts +0 -109
  96. package/src/backends/DistanceBackend.ts +0 -17
  97. package/src/backends/JsDistanceBackend.ts +0 -227
  98. package/src/encoding/DeltaEncoder.ts +0 -217
  99. package/src/errors.ts +0 -110
  100. package/src/index.ts +0 -138
  101. package/src/presets.ts +0 -229
  102. package/src/quantization/ScalarQuantizer.ts +0 -383
  103. package/src/storage/BatchWriter.ts +0 -336
  104. package/src/storage/BunStorageBackend.ts +0 -161
  105. package/src/storage/MemoryBackend.ts +0 -120
  106. package/src/storage/OPFSBackend.ts +0 -250
  107. package/src/storage/StorageBackend.ts +0 -74
  108. package/src/storage/WriteAheadLog.ts +0 -326
  109. package/src/storage/createStorageBackend.ts +0 -137
  110. package/src/storage/index.ts +0 -53
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AAEH,gFAAgF;AAEhF,OAAO;AACP,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEtC,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAE1C,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAKxC,OAAO,EACL,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,oBAAoB,EACpB,oBAAoB,EACpB,oBAAoB,EACpB,iBAAiB,EACjB,kBAAkB,EAClB,OAAO,EACP,oBAAoB,EACpB,SAAS,EACT,YAAY,EACb,MAAM,WAAW,CAAC;AAEnB,eAAe;AACf,OAAO,EAAE,eAAe,EAAE,MAAM,gCAAgC,CAAC;AAKjE,OAAO,EAAE,iBAAiB,EAAE,MAAM,6BAA6B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AACpD,OAAO,EACL,oBAAoB,EACpB,yBAAyB,EACzB,sBAAsB,GAGvB,MAAM,gCAAgC,CAAC;AAExC,mCAAmC;AACnC,OAAO,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAE1E,OAAO,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAGvE,4BAA4B;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AAEnD,gBAAgB;AAChB,OAAO,EACL,aAAa,EACb,sBAAsB,EACtB,oBAAoB,EACpB,uBAAuB,EACvB,qBAAqB,EACrB,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,GACpB,MAAM,UAAU,CAAC"}
package/dist/presets.d.ts CHANGED
@@ -23,12 +23,12 @@ export interface HNSWPreset {
23
23
  * Preset for low-dimensional vectors (128D or less)
24
24
  * Suitable for: Image features, word2vec, GloVe embeddings
25
25
  */
26
- export declare const PRESET_LOW_DIM: HNSWPreset;
26
+ export declare const PRESET_LOW_DIM: Readonly<HNSWPreset>;
27
27
  /**
28
28
  * Preset for medium-dimensional vectors (256-512D)
29
29
  * Suitable for: Sentence embeddings, smaller transformer outputs
30
30
  */
31
- export declare const PRESET_MEDIUM_DIM: HNSWPreset;
31
+ export declare const PRESET_MEDIUM_DIM: Readonly<HNSWPreset>;
32
32
  /**
33
33
  * Preset for high-dimensional vectors (768D+)
34
34
  * Suitable for: BERT, GPT embeddings, Cohere, OpenAI embeddings
@@ -37,19 +37,19 @@ export declare const PRESET_MEDIUM_DIM: HNSWPreset;
37
37
  * Benchmarked on Cohere Wikipedia 1024D (495K vectors):
38
38
  * - efSearch=128: 99.2% recall, 168 QPS, 10.72ms P99
39
39
  */
40
- export declare const PRESET_HIGH_DIM: HNSWPreset;
40
+ export declare const PRESET_HIGH_DIM: Readonly<HNSWPreset>;
41
41
  /**
42
42
  * Preset for very high-dimensional vectors (1536D+)
43
43
  * Suitable for: OpenAI text-embedding-ada-002, text-embedding-3-large
44
44
  *
45
45
  * Scaled from PRESET_HIGH_DIM benchmarks (higher M for higher dimensions)
46
46
  */
47
- export declare const PRESET_VERY_HIGH_DIM: HNSWPreset;
47
+ export declare const PRESET_VERY_HIGH_DIM: Readonly<HNSWPreset>;
48
48
  /**
49
49
  * Preset for small datasets (<10K vectors)
50
50
  * Prioritizes recall over speed since brute-force is viable
51
51
  */
52
- export declare const PRESET_SMALL_DATASET: HNSWPreset;
52
+ export declare const PRESET_SMALL_DATASET: Readonly<HNSWPreset>;
53
53
  /**
54
54
  * Preset for large datasets (100K-1M vectors)
55
55
  * Balances recall with build time and memory
@@ -57,21 +57,21 @@ export declare const PRESET_SMALL_DATASET: HNSWPreset;
57
57
  * Benchmarked on Cohere Wikipedia 1024D (495K vectors):
58
58
  * - efSearch=128: 99.2% recall, 168 QPS
59
59
  */
60
- export declare const PRESET_LARGE_DATASET: HNSWPreset;
60
+ export declare const PRESET_LARGE_DATASET: Readonly<HNSWPreset>;
61
61
  /**
62
62
  * Preset for maximum recall (prioritizes accuracy over speed)
63
63
  * Use when recall is critical and latency is acceptable
64
64
  */
65
- export declare const PRESET_MAX_RECALL: HNSWPreset;
65
+ export declare const PRESET_MAX_RECALL: Readonly<HNSWPreset>;
66
66
  /**
67
67
  * Preset for minimum latency (prioritizes speed over recall)
68
68
  * Use when latency is critical and 90% recall is acceptable
69
69
  */
70
- export declare const PRESET_LOW_LATENCY: HNSWPreset;
70
+ export declare const PRESET_LOW_LATENCY: Readonly<HNSWPreset>;
71
71
  /**
72
72
  * All available presets
73
73
  */
74
- export declare const PRESETS: Record<string, HNSWPreset>;
74
+ export declare const PRESETS: Readonly<Record<string, Readonly<HNSWPreset>>>;
75
75
  /**
76
76
  * Get recommended preset based on dimension and dataset size
77
77
  *
@@ -1 +1 @@
1
- {"version":3,"file":"presets.d.ts","sourceRoot":"","sources":["../src/presets.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,CAAC,EAAE,MAAM,CAAC;IACV,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc,EAAE,UAS5B,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,iBAAiB,EAAE,UAS/B,CAAC;AAEF;;;;;;;GAOG;AACH,eAAO,MAAM,eAAe,EAAE,UAS7B,CAAC;AAEF;;;;;GAKG;AACH,eAAO,MAAM,oBAAoB,EAAE,UASlC,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,oBAAoB,EAAE,UASlC,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,oBAAoB,EAAE,UASlC,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,iBAAiB,EAAE,UAS/B,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,UAShC,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAS9C,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,oBAAoB,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,UAAU,CAkBxF;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAE9D;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,cAAc,EAAE,MAAM,GAAG,UAAU,CAyB/D"}
1
+ {"version":3,"file":"presets.d.ts","sourceRoot":"","sources":["../src/presets.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,CAAC,EAAE,MAAM,CAAC;IACV,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,EAAE,MAAM,CAAC;IACvB,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED;;;GAGG;AACH,eAAO,MAAM,cAAc,EAAE,QAAQ,CAAC,UAAU,CAS9C,CAAC;AAEH;;;GAGG;AACH,eAAO,MAAM,iBAAiB,EAAE,QAAQ,CAAC,UAAU,CASjD,CAAC;AAEH;;;;;;;GAOG;AACH,eAAO,MAAM,eAAe,EAAE,QAAQ,CAAC,UAAU,CAS/C,CAAC;AAEH;;;;;GAKG;AACH,eAAO,MAAM,oBAAoB,EAAE,QAAQ,CAAC,UAAU,CASpD,CAAC;AAEH;;;GAGG;AACH,eAAO,MAAM,oBAAoB,EAAE,QAAQ,CAAC,UAAU,CASpD,CAAC;AAEH;;;;;;GAMG;AACH,eAAO,MAAM,oBAAoB,EAAE,QAAQ,CAAC,UAAU,CASpD,CAAC;AAEH;;;GAGG;AACH,eAAO,MAAM,iBAAiB,EAAE,QAAQ,CAAC,UAAU,CASjD,CAAC;AAEH;;;GAGG;AACH,eAAO,MAAM,kBAAkB,EAAE,QAAQ,CAAC,UAAU,CASlD,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,OAAO,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC,CASjE,CAAC;AAEH;;;;;GAKG;AACH,wBAAgB,oBAAoB,CAAC,SAAS,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,UAAU,CAkBxF;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAE9D;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,cAAc,EAAE,MAAM,GAAG,UAAU,CAyB/D"}
@@ -0,0 +1,205 @@
1
+ /**
2
+ * HNSW Parameter Presets
3
+ *
4
+ * These presets are optimized based on extensive benchmarking to achieve
5
+ * the target recall@10 >= 95% for different dataset sizes and dimensions.
6
+ *
7
+ * Key parameters:
8
+ * - M: Maximum connections per node (higher = better recall, more memory)
9
+ * - efConstruction: Beam width during index building (higher = better quality, slower build)
10
+ * - efSearch: Beam width during search (higher = better recall, slower search)
11
+ */
12
+ /**
13
+ * Preset for low-dimensional vectors (128D or less)
14
+ * Suitable for: Image features, word2vec, GloVe embeddings
15
+ */
16
+ export const PRESET_LOW_DIM = Object.freeze({
17
+ name: 'low-dim',
18
+ description: 'Optimized for low-dimensional vectors (<=128D)',
19
+ M: 16,
20
+ efConstruction: 200,
21
+ efSearch: 100,
22
+ expectedRecall: 0.99,
23
+ targetDimensions: '<=128',
24
+ targetDatasetSize: '1K-100K',
25
+ });
26
+ /**
27
+ * Preset for medium-dimensional vectors (256-512D)
28
+ * Suitable for: Sentence embeddings, smaller transformer outputs
29
+ */
30
+ export const PRESET_MEDIUM_DIM = Object.freeze({
31
+ name: 'medium-dim',
32
+ description: 'Optimized for medium-dimensional vectors (129-512D)',
33
+ M: 24,
34
+ efConstruction: 200,
35
+ efSearch: 150,
36
+ expectedRecall: 0.97,
37
+ targetDimensions: '129-512',
38
+ targetDatasetSize: '1K-100K',
39
+ });
40
+ /**
41
+ * Preset for high-dimensional vectors (768D+)
42
+ * Suitable for: BERT, GPT embeddings, Cohere, OpenAI embeddings
43
+ * This is the recommended preset for RAG applications
44
+ *
45
+ * Benchmarked on Cohere Wikipedia 1024D (495K vectors):
46
+ * - efSearch=128: 99.2% recall, 168 QPS, 10.72ms P99
47
+ */
48
+ export const PRESET_HIGH_DIM = Object.freeze({
49
+ name: 'high-dim',
50
+ description: 'Optimized for high-dimensional vectors (768D+)',
51
+ M: 32,
52
+ efConstruction: 200,
53
+ efSearch: 128,
54
+ expectedRecall: 0.99,
55
+ targetDimensions: '>=768',
56
+ targetDatasetSize: '1K-500K',
57
+ });
58
+ /**
59
+ * Preset for very high-dimensional vectors (1536D+)
60
+ * Suitable for: OpenAI text-embedding-ada-002, text-embedding-3-large
61
+ *
62
+ * Scaled from PRESET_HIGH_DIM benchmarks (higher M for higher dimensions)
63
+ */
64
+ export const PRESET_VERY_HIGH_DIM = Object.freeze({
65
+ name: 'very-high-dim',
66
+ description: 'Optimized for very high-dimensional vectors (1536D+)',
67
+ M: 48,
68
+ efConstruction: 300,
69
+ efSearch: 150,
70
+ expectedRecall: 0.99,
71
+ targetDimensions: '>=1536',
72
+ targetDatasetSize: '1K-500K',
73
+ });
74
+ /**
75
+ * Preset for small datasets (<10K vectors)
76
+ * Prioritizes recall over speed since brute-force is viable
77
+ */
78
+ export const PRESET_SMALL_DATASET = Object.freeze({
79
+ name: 'small-dataset',
80
+ description: 'Optimized for small datasets (<10K vectors)',
81
+ M: 16,
82
+ efConstruction: 200,
83
+ efSearch: 200,
84
+ expectedRecall: 0.99,
85
+ targetDimensions: 'any',
86
+ targetDatasetSize: '<10K',
87
+ });
88
+ /**
89
+ * Preset for large datasets (100K-1M vectors)
90
+ * Balances recall with build time and memory
91
+ *
92
+ * Benchmarked on Cohere Wikipedia 1024D (495K vectors):
93
+ * - efSearch=128: 99.2% recall, 168 QPS
94
+ */
95
+ export const PRESET_LARGE_DATASET = Object.freeze({
96
+ name: 'large-dataset',
97
+ description: 'Optimized for large datasets (100K-1M vectors)',
98
+ M: 32,
99
+ efConstruction: 200,
100
+ efSearch: 128,
101
+ expectedRecall: 0.99,
102
+ targetDimensions: 'any',
103
+ targetDatasetSize: '100K-1M',
104
+ });
105
+ /**
106
+ * Preset for maximum recall (prioritizes accuracy over speed)
107
+ * Use when recall is critical and latency is acceptable
108
+ */
109
+ export const PRESET_MAX_RECALL = Object.freeze({
110
+ name: 'max-recall',
111
+ description: 'Maximum recall configuration',
112
+ M: 48,
113
+ efConstruction: 500,
114
+ efSearch: 400,
115
+ expectedRecall: 0.99,
116
+ targetDimensions: 'any',
117
+ targetDatasetSize: 'any',
118
+ });
119
+ /**
120
+ * Preset for minimum latency (prioritizes speed over recall)
121
+ * Use when latency is critical and 90% recall is acceptable
122
+ */
123
+ export const PRESET_LOW_LATENCY = Object.freeze({
124
+ name: 'low-latency',
125
+ description: 'Minimum latency configuration (90% recall)',
126
+ M: 12,
127
+ efConstruction: 100,
128
+ efSearch: 50,
129
+ expectedRecall: 0.90,
130
+ targetDimensions: 'any',
131
+ targetDatasetSize: 'any',
132
+ });
133
+ /**
134
+ * All available presets
135
+ */
136
+ export const PRESETS = Object.freeze({
137
+ 'low-dim': PRESET_LOW_DIM,
138
+ 'medium-dim': PRESET_MEDIUM_DIM,
139
+ 'high-dim': PRESET_HIGH_DIM,
140
+ 'very-high-dim': PRESET_VERY_HIGH_DIM,
141
+ 'small-dataset': PRESET_SMALL_DATASET,
142
+ 'large-dataset': PRESET_LARGE_DATASET,
143
+ 'max-recall': PRESET_MAX_RECALL,
144
+ 'low-latency': PRESET_LOW_LATENCY,
145
+ });
146
+ /**
147
+ * Get recommended preset based on dimension and dataset size
148
+ *
149
+ * For high-dimensional vectors (768D+), dimension takes priority over dataset size
150
+ * because recall degrades significantly without higher M values.
151
+ */
152
+ export function getRecommendedPreset(dimension, datasetSize) {
153
+ // For high-dimensional vectors, always use dimension-based presets
154
+ // (dimension matters more than dataset size for recall)
155
+ // Note: Check 1536 BEFORE 768 since 1536 >= 768 would match first
156
+ if (dimension >= 1536)
157
+ return PRESET_VERY_HIGH_DIM;
158
+ if (dimension >= 768)
159
+ return PRESET_HIGH_DIM;
160
+ // For lower dimensions, consider dataset size
161
+ if (datasetSize !== undefined) {
162
+ if (datasetSize < 10000)
163
+ return PRESET_SMALL_DATASET;
164
+ if (datasetSize > 100000)
165
+ return PRESET_LARGE_DATASET;
166
+ }
167
+ // Dimension-based selection for medium dimensions
168
+ if (dimension <= 128)
169
+ return PRESET_LOW_DIM;
170
+ if (dimension <= 512)
171
+ return PRESET_MEDIUM_DIM;
172
+ return PRESET_HIGH_DIM;
173
+ }
174
+ /**
175
+ * Get preset by name
176
+ */
177
+ export function getPreset(name) {
178
+ return PRESETS[name];
179
+ }
180
+ /**
181
+ * RAG-specific preset recommendation
182
+ * For typical RAG applications using popular embedding models
183
+ */
184
+ export function getRAGPreset(embeddingModel) {
185
+ const model = embeddingModel.toLowerCase();
186
+ // OpenAI models
187
+ if (model.includes('ada-002') || model.includes('text-embedding-3')) {
188
+ return PRESET_VERY_HIGH_DIM;
189
+ }
190
+ // Cohere models
191
+ if (model.includes('cohere') || model.includes('embed-')) {
192
+ return PRESET_HIGH_DIM;
193
+ }
194
+ // BERT/Sentence Transformers
195
+ if (model.includes('bert') || model.includes('minilm') || model.includes('mpnet')) {
196
+ return PRESET_HIGH_DIM;
197
+ }
198
+ // E5 models
199
+ if (model.includes('e5-')) {
200
+ return model.includes('large') ? PRESET_HIGH_DIM : PRESET_MEDIUM_DIM;
201
+ }
202
+ // Default to high-dim for unknown models (most modern embeddings are 768D+)
203
+ return PRESET_HIGH_DIM;
204
+ }
205
+ //# sourceMappingURL=presets.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"presets.js","sourceRoot":"","sources":["../src/presets.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAaH;;;GAGG;AACH,MAAM,CAAC,MAAM,cAAc,GAAyB,MAAM,CAAC,MAAM,CAAC;IAChE,IAAI,EAAE,SAAS;IACf,WAAW,EAAE,gDAAgD;IAC7D,CAAC,EAAE,EAAE;IACL,cAAc,EAAE,GAAG;IACnB,QAAQ,EAAE,GAAG;IACb,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,OAAO;IACzB,iBAAiB,EAAE,SAAS;CAC7B,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAyB,MAAM,CAAC,MAAM,CAAC;IACnE,IAAI,EAAE,YAAY;IAClB,WAAW,EAAE,qDAAqD;IAClE,CAAC,EAAE,EAAE;IACL,cAAc,EAAE,GAAG;IACnB,QAAQ,EAAE,GAAG;IACb,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,SAAS;IAC3B,iBAAiB,EAAE,SAAS;CAC7B,CAAC,CAAC;AAEH;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,eAAe,GAAyB,MAAM,CAAC,MAAM,CAAC;IACjE,IAAI,EAAE,UAAU;IAChB,WAAW,EAAE,gDAAgD;IAC7D,CAAC,EAAE,EAAE;IACL,cAAc,EAAE,GAAG;IACnB,QAAQ,EAAE,GAAG;IACb,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,OAAO;IACzB,iBAAiB,EAAE,SAAS;CAC7B,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAyB,MAAM,CAAC,MAAM,CAAC;IACtE,IAAI,EAAE,eAAe;IACrB,WAAW,EAAE,sDAAsD;IACnE,CAAC,EAAE,EAAE;IACL,cAAc,EAAE,GAAG;IACnB,QAAQ,EAAE,GAAG;IACb,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,QAAQ;IAC1B,iBAAiB,EAAE,SAAS;CAC7B,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAyB,MAAM,CAAC,MAAM,CAAC;IACtE,IAAI,EAAE,eAAe;IACrB,WAAW,EAAE,6CAA6C;IAC1D,CAAC,EAAE,EAAE;IACL,cAAc,EAAE,GAAG;IACnB,QAAQ,EAAE,GAAG;IACb,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,KAAK;IACvB,iBAAiB,EAAE,MAAM;CAC1B,CAAC,CAAC;AAEH;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAyB,MAAM,CAAC,MAAM,CAAC;IACtE,IAAI,EAAE,eAAe;IACrB,WAAW,EAAE,gDAAgD;IAC7D,CAAC,EAAE,EAAE;IACL,cAAc,EAAE,GAAG;IACnB,QAAQ,EAAE,GAAG;IACb,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,KAAK;IACvB,iBAAiB,EAAE,SAAS;CAC7B,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAyB,MAAM,CAAC,MAAM,CAAC;IACnE,IAAI,EAAE,YAAY;IAClB,WAAW,EAAE,8BAA8B;IAC3C,CAAC,EAAE,EAAE;IACL,cAAc,EAAE,GAAG;IACnB,QAAQ,EAAE,GAAG;IACb,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,KAAK;IACvB,iBAAiB,EAAE,KAAK;CACzB,CAAC,CAAC;AAEH;;;GAGG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAyB,MAAM,CAAC,MAAM,CAAC;IACpE,IAAI,EAAE,aAAa;IACnB,WAAW,EAAE,4CAA4C;IACzD,CAAC,EAAE,EAAE;IACL,cAAc,EAAE,GAAG;IACnB,QAAQ,EAAE,EAAE;IACZ,cAAc,EAAE,IAAI;IACpB,gBAAgB,EAAE,KAAK;IACvB,iBAAiB,EAAE,KAAK;CACzB,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,OAAO,GAAmD,MAAM,CAAC,MAAM,CAAC;IACnF,SAAS,EAAE,cAAc;IACzB,YAAY,EAAE,iBAAiB;IAC/B,UAAU,EAAE,eAAe;IAC3B,eAAe,EAAE,oBAAoB;IACrC,eAAe,EAAE,oBAAoB;IACrC,eAAe,EAAE,oBAAoB;IACrC,YAAY,EAAE,iBAAiB;IAC/B,aAAa,EAAE,kBAAkB;CAClC,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,UAAU,oBAAoB,CAAC,SAAiB,EAAE,WAAoB;IAC1E,mEAAmE;IACnE,wDAAwD;IACxD,kEAAkE;IAClE,IAAI,SAAS,IAAI,IAAI;QAAE,OAAO,oBAAoB,CAAC;IACnD,IAAI,SAAS,IAAI,GAAG;QAAE,OAAO,eAAe,CAAC;IAE7C,8CAA8C;IAC9C,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;QAC9B,IAAI,WAAW,GAAG,KAAK;YAAE,OAAO,oBAAoB,CAAC;QACrD,IAAI,WAAW,GAAG,MAAM;YAAE,OAAO,oBAAoB,CAAC;IACxD,CAAC;IAED,kDAAkD;IAClD,IAAI,SAAS,IAAI,GAAG;QAAE,OAAO,cAAc,CAAC;IAC5C,IAAI,SAAS,IAAI,GAAG;QAAE,OAAO,iBAAiB,CAAC;IAE/C,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,OAAO,OAAO,CAAC,IAAI,CAAC,CAAC;AACvB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,cAAsB;IACjD,MAAM,KAAK,GAAG,cAAc,CAAC,WAAW,EAAE,CAAC;IAE3C,gBAAgB;IAChB,IAAI,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;QACpE,OAAO,oBAAoB,CAAC;IAC9B,CAAC;IAED,gBAAgB;IAChB,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzD,OAAO,eAAe,CAAC;IACzB,CAAC;IAED,6BAA6B;IAC7B,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QAClF,OAAO,eAAe,CAAC;IACzB,CAAC;IAED,YAAY;IACZ,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,iBAAiB,CAAC;IACvE,CAAC;IAED,4EAA4E;IAC5E,OAAO,eAAe,CAAC;AACzB,CAAC"}
@@ -32,6 +32,7 @@ export declare class ScalarQuantizer {
32
32
  * Get quantization parameters
33
33
  */
34
34
  getParams(): QuantizationParams | null;
35
+ private validateParams;
35
36
  /**
36
37
  * Set quantization parameters (for loading saved quantizer)
37
38
  */
@@ -40,6 +41,15 @@ export declare class ScalarQuantizer {
40
41
  * Quantize a single float32 vector to int8
41
42
  */
42
43
  quantize(vector: Float32Array): Int8Array;
44
+ /**
45
+ * Quantize a float32 vector directly into a target Int8Array at the given offset.
46
+ * Zero-allocation: avoids creating a new Int8Array per vector.
47
+ *
48
+ * @param vector Source float32 vector
49
+ * @param target Target Int8Array to write into
50
+ * @param targetOffset Byte offset in target where quantized values start
51
+ */
52
+ quantizeInto(vector: Float32Array, target: Int8Array, targetOffset: number): void;
43
53
  /**
44
54
  * Quantize multiple vectors
45
55
  */
@@ -77,38 +87,4 @@ export declare function l2SquaredInt8(a: Int8Array, b: Int8Array): number;
77
87
  * Uses 8-wide unrolling with separate accumulators for better ILP
78
88
  */
79
89
  export declare function cosineDistanceInt8(a: Int8Array, b: Int8Array): number;
80
- /**
81
- * QuantizedVectorStore - Efficient storage for quantized vectors
82
- */
83
- export declare class QuantizedVectorStore {
84
- private quantizer;
85
- private vectors;
86
- private originalVectors;
87
- private keepOriginals;
88
- constructor(dimension: number, keepOriginals?: boolean);
89
- /**
90
- * Train the quantizer and add vectors
91
- */
92
- addVectors(vectors: Float32Array[]): void;
93
- /**
94
- * Get quantized vector by index
95
- */
96
- getQuantized(index: number): Int8Array;
97
- /**
98
- * Get original float32 vector by index (for rescoring)
99
- */
100
- getOriginal(index: number): Float32Array | null;
101
- /**
102
- * Get number of vectors
103
- */
104
- size(): number;
105
- /**
106
- * Calculate memory usage in bytes
107
- */
108
- memoryUsage(): {
109
- quantized: number;
110
- original: number;
111
- total: number;
112
- };
113
- }
114
90
  //# sourceMappingURL=ScalarQuantizer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"ScalarQuantizer.d.ts","sourceRoot":"","sources":["../../src/quantization/ScalarQuantizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,YAAY,CAAC;IAClB,GAAG,EAAE,YAAY,CAAC;IAClB,KAAK,EAAE,YAAY,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC;CACtB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,OAAO,CAAkB;gBAErB,SAAS,EAAE,MAAM;IAI7B;;OAEG;IACH,KAAK,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,IAAI;IAsCpC;;OAEG;IACH,SAAS,IAAI,OAAO;IAIpB;;OAEG;IACH,SAAS,IAAI,kBAAkB,GAAG,IAAI;IAItC;;OAEG;IACH,SAAS,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI;IAK3C;;OAEG;IACH,QAAQ,CAAC,MAAM,EAAE,YAAY,GAAG,SAAS;IAkBzC;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,SAAS,EAAE;IAQnD;;OAEG;IACH,UAAU,CAAC,MAAM,EAAE,SAAS,GAAG,YAAY;IAiB3C;;OAEG;IACH,SAAS,IAAI,WAAW;IAuBxB;;OAEG;IACH,MAAM,CAAC,WAAW,CAAC,MAAM,EAAE,WAAW,GAAG,eAAe;CAqBzD;AAED;;;GAGG;AAEH;;;GAGG;AACH,wBAAgB,cAAc,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,GAAG,MAAM,CAyBjE;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,GAAG,MAAM,CAkChE;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,GAAG,MAAM,CAwCrE;AAED;;GAEG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,SAAS,CAAkB;IACnC,OAAO,CAAC,OAAO,CAAc;IAC7B,OAAO,CAAC,eAAe,CAAwB;IAC/C,OAAO,CAAC,aAAa,CAAU;gBAEnB,SAAS,EAAE,MAAM,EAAE,aAAa,UAAO;IAOnD;;OAEG;IACH,UAAU,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,IAAI;IAazC;;OAEG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS;IAItC;;OAEG;IACH,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI;IAK/C;;OAEG;IACH,IAAI,IAAI,MAAM;IAId;;OAEG;IACH,WAAW,IAAI;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE;CAOtE"}
1
+ {"version":3,"file":"ScalarQuantizer.d.ts","sourceRoot":"","sources":["../../src/quantization/ScalarQuantizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAIH,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,YAAY,CAAC;IAClB,GAAG,EAAE,YAAY,CAAC;IAClB,KAAK,EAAE,YAAY,CAAC;IACpB,MAAM,EAAE,YAAY,CAAC;CACtB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,OAAO,CAAkB;gBAErB,SAAS,EAAE,MAAM;IAI7B;;OAEG;IACH,KAAK,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,IAAI;IA8CpC;;OAEG;IACH,SAAS,IAAI,OAAO;IAIpB;;OAEG;IACH,SAAS,IAAI,kBAAkB,GAAG,IAAI;IAItC,OAAO,CAAC,cAAc;IA+BtB;;OAEG;IACH,SAAS,CAAC,MAAM,EAAE,kBAAkB,GAAG,IAAI;IAM3C;;OAEG;IACH,QAAQ,CAAC,MAAM,EAAE,YAAY,GAAG,SAAS;IAczC;;;;;;;OAOG;IACH,YAAY,CAAC,MAAM,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,GAAG,IAAI;IAkBjF;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,SAAS,EAAE;IAQnD;;OAEG;IACH,UAAU,CAAC,MAAM,EAAE,SAAS,GAAG,YAAY;IAqB3C;;OAEG;IACH,SAAS,IAAI,WAAW;IAuBxB;;OAEG;IACH,MAAM,CAAC,WAAW,CAAC,MAAM,EAAE,WAAW,GAAG,eAAe;CAkCzD;AAED;;;GAGG;AAEH;;;GAGG;AACH,wBAAgB,cAAc,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,GAAG,MAAM,CA4BjE;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,GAAG,MAAM,CAqChE;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,GAAG,MAAM,CA2CrE"}