faiss 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
  84. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  85. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  86. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  87. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  88. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  89. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  90. data/vendor/faiss/faiss/MetricType.h +14 -7
  91. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  92. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  93. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  94. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  95. data/vendor/faiss/faiss/build.cpp +23 -0
  96. data/vendor/faiss/faiss/build.h +15 -0
  97. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  98. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  101. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  102. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  105. data/vendor/faiss/faiss/factory_tools.cpp +9 -0
  106. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  107. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  108. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
  109. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  113. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  114. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  115. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  116. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  117. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  120. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  130. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  136. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  139. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  140. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  141. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  142. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  143. data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
  144. data/vendor/faiss/faiss/impl/HNSW.h +61 -44
  145. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  146. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  147. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  148. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  149. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  150. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  151. data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
  152. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  153. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  154. data/vendor/faiss/faiss/impl/Panorama.h +269 -87
  155. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  156. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  157. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  158. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  159. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  160. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  161. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
  162. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  163. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  164. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  165. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
  166. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  167. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  168. data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
  169. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
  170. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
  171. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  172. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  173. data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
  174. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  175. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  176. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  177. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  178. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  182. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  183. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  184. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  185. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  191. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  192. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  193. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  194. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  196. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  197. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
  198. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  199. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  203. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  204. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  205. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  206. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  208. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  209. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  210. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  211. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
  212. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
  213. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
  214. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
  215. data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
  216. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  217. data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
  218. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  219. data/vendor/faiss/faiss/impl/io_macros.h +58 -16
  220. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  221. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  222. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  223. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
  225. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  226. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  228. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  229. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
  230. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  233. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  234. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
  235. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
  237. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
  238. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  239. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
  240. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  241. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
  244. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
  245. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  256. data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
  257. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  258. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  260. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  261. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  262. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  264. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  265. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  266. data/vendor/faiss/faiss/index_factory.cpp +90 -18
  267. data/vendor/faiss/faiss/index_io.h +40 -0
  268. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  269. data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
  270. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  271. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
  272. data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
  273. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  274. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  275. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  276. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  277. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  278. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  279. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  280. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
  285. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
  286. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  287. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
  290. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  291. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  292. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  293. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  294. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  295. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  296. data/vendor/faiss/faiss/utils/distances.h +20 -1
  297. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  298. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  299. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  300. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  301. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  302. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  304. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
  305. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  306. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  307. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  308. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  309. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  310. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  311. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
  312. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  355. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
  357. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  358. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  359. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  360. data/vendor/faiss/faiss/utils/utils.h +3 -3
  361. metadata +129 -34
  362. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  363. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  364. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  366. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  367. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  368. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  369. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  370. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  371. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  373. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  374. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  375. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  376. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  377. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  378. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -25,12 +25,224 @@
25
25
 
26
26
  namespace faiss {
27
27
 
28
+ namespace {
29
+
30
+ // Gaussian Lloyd-Max optimal quantizer centroids and boundaries for N(0,1).
31
+ // clang-format off
32
+ const float kLloydMaxCentroids1[] = {
33
+ -0.797884560802865f, 0.797884560802865f
34
+ };
35
+ const float kLloydMaxBoundaries1[] = {
36
+ 0.000000000000000f
37
+ };
38
+ const float kLloydMaxCentroids2[] = {
39
+ -1.510417608499078f, -0.452780034636484f,
40
+ 0.452780034636483f, 1.510417608499078f
41
+ };
42
+ const float kLloydMaxBoundaries2[] = {
43
+ -0.981598821567781f, 0.000000000000000f, 0.981598821567781f
44
+ };
45
+ const float kLloydMaxCentroids3[] = {
46
+ -2.151945704536914f, -1.343909278504930f,
47
+ -0.756005281205826f, -0.245094178944203f,
48
+ 0.245094178944203f, 0.756005281205825f,
49
+ 1.343909278504930f, 2.151945704536914f
50
+ };
51
+ const float kLloydMaxBoundaries3[] = {
52
+ -1.747927491520922f, -1.049957279855378f,
53
+ -0.500549730075014f, 0.000000000000000f,
54
+ 0.500549730075014f, 1.049957279855378f,
55
+ 1.747927491520922f
56
+ };
57
+ const float kLloydMaxCentroids4[] = {
58
+ -2.732589570994957f, -2.069017226531159f,
59
+ -1.618046386021649f, -1.256231197346957f,
60
+ -0.942340456486774f, -0.656759118532318f,
61
+ -0.388048299490198f, -0.128395029851116f,
62
+ 0.128395029851116f, 0.388048299490198f,
63
+ 0.656759118532318f, 0.942340456486773f,
64
+ 1.256231197346959f, 1.618046386021649f,
65
+ 2.069017226531160f, 2.732589570994943f
66
+ };
67
+ const float kLloydMaxBoundaries4[] = {
68
+ -2.400803398763058f, -1.843531806276404f,
69
+ -1.437138791684303f, -1.099285826916865f,
70
+ -0.799549787509546f, -0.522403709011258f,
71
+ -0.258221664670657f, 0.000000000000000f,
72
+ 0.258221664670657f, 0.522403709011258f,
73
+ 0.799549787509546f, 1.099285826916866f,
74
+ 1.437138791684304f, 1.843531806276404f,
75
+ 2.400803398763051f
76
+ };
77
+ const float kLloydMaxCentroids8[] = {
78
+ -4.2734901319f, -3.8270895246f, -3.5457169520f, -3.3354593381f,
79
+ -3.1655721017f, -3.0219515320f, -2.8969009924f, -2.7857394515f,
80
+ -2.6853990170f, -2.5937556343f, -2.5092755166f, -2.4308135619f,
81
+ -2.3574913691f, -2.2886197969f, -2.2236478246f, -2.1621276457f,
82
+ -2.1036901632f, -2.0480273642f, -1.9948793740f, -1.9440247677f,
83
+ -1.8952732015f, -1.8484597247f, -1.8034403315f, -1.7600884415f,
84
+ -1.7182920846f, -1.6779516274f, -1.6389779215f, -1.6012907825f,
85
+ -1.5648177311f, -1.5294929453f, -1.4952563823f, -1.4620530375f,
86
+ -1.4298323186f, -1.3985475108f, -1.3681553217f, -1.3386154890f,
87
+ -1.3098904444f, -1.2819450217f, -1.2547462051f, -1.2282629097f,
88
+ -1.2024657910f, -1.1773270781f, -1.1528204287f, -1.1289208010f,
89
+ -1.1056043421f, -1.0828482901f, -1.0606308873f, -1.0389313043f,
90
+ -1.0177295729f, -0.9970065268f, -0.9767437492f, -0.9569235264f,
91
+ -0.9375288069f, -0.9185431646f, -0.8999507663f, -0.8817363426f,
92
+ -0.8638851621f, -0.8463830081f, -0.8292161569f, -0.8123713596f,
93
+ -0.7958358242f, -0.7795971999f, -0.7636435625f, -0.7479634007f,
94
+ -0.7325456038f, -0.7173794494f, -0.7024545929f, -0.6877610560f,
95
+ -0.6732892172f, -0.6590298016f, -0.6449738716f, -0.6311128174f,
96
+ -0.6174383481f, -0.6039424829f, -0.5906175419f, -0.5774561379f,
97
+ -0.5644511676f, -0.5515958029f, -0.5388834832f, -0.5263079060f,
98
+ -0.5138630194f, -0.5015430136f, -0.4893423125f, -0.4772555660f,
99
+ -0.4652776416f, -0.4534036165f, -0.4416287701f, -0.4299485757f,
100
+ -0.4183586932f, -0.4068549615f, -0.3954333909f, -0.3840901561f,
101
+ -0.3728215889f, -0.3616241712f, -0.3504945283f, -0.3394294221f,
102
+ -0.3284257446f, -0.3174805116f, -0.3065908567f, -0.2957540250f,
103
+ -0.2849673675f, -0.2742283355f, -0.2635344752f, -0.2528834222f,
104
+ -0.2422728967f, -0.2317006985f, -0.2211647022f, -0.2106628526f,
105
+ -0.2001931607f, -0.1897536989f, -0.1793425974f, -0.1689580400f,
106
+ -0.1585982605f, -0.1482615390f, -0.1379461985f, -0.1276506012f,
107
+ -0.1173731457f, -0.1071122637f, -0.0968664166f, -0.0866340933f,
108
+ -0.0764138065f, -0.0662040909f, -0.0560034994f, -0.0458106014f,
109
+ -0.0356239797f, -0.0254422284f, -0.0152639496f, -0.0050877521f,
110
+ 0.0050877521f, 0.0152639496f, 0.0254422284f, 0.0356239797f,
111
+ 0.0458106014f, 0.0560034994f, 0.0662040909f, 0.0764138065f,
112
+ 0.0866340933f, 0.0968664166f, 0.1071122637f, 0.1173731457f,
113
+ 0.1276506012f, 0.1379461985f, 0.1482615390f, 0.1585982605f,
114
+ 0.1689580400f, 0.1793425974f, 0.1897536989f, 0.2001931607f,
115
+ 0.2106628526f, 0.2211647022f, 0.2317006985f, 0.2422728967f,
116
+ 0.2528834222f, 0.2635344752f, 0.2742283355f, 0.2849673675f,
117
+ 0.2957540250f, 0.3065908567f, 0.3174805116f, 0.3284257446f,
118
+ 0.3394294221f, 0.3504945283f, 0.3616241712f, 0.3728215889f,
119
+ 0.3840901561f, 0.3954333909f, 0.4068549615f, 0.4183586932f,
120
+ 0.4299485757f, 0.4416287701f, 0.4534036165f, 0.4652776416f,
121
+ 0.4772555660f, 0.4893423125f, 0.5015430136f, 0.5138630194f,
122
+ 0.5263079060f, 0.5388834832f, 0.5515958029f, 0.5644511676f,
123
+ 0.5774561379f, 0.5906175419f, 0.6039424829f, 0.6174383481f,
124
+ 0.6311128174f, 0.6449738716f, 0.6590298016f, 0.6732892172f,
125
+ 0.6877610560f, 0.7024545929f, 0.7173794494f, 0.7325456038f,
126
+ 0.7479634007f, 0.7636435625f, 0.7795971999f, 0.7958358242f,
127
+ 0.8123713596f, 0.8292161569f, 0.8463830081f, 0.8638851621f,
128
+ 0.8817363426f, 0.8999507663f, 0.9185431646f, 0.9375288069f,
129
+ 0.9569235264f, 0.9767437492f, 0.9970065268f, 1.0177295729f,
130
+ 1.0389313043f, 1.0606308873f, 1.0828482901f, 1.1056043421f,
131
+ 1.1289208010f, 1.1528204287f, 1.1773270781f, 1.2024657910f,
132
+ 1.2282629097f, 1.2547462051f, 1.2819450217f, 1.3098904444f,
133
+ 1.3386154890f, 1.3681553217f, 1.3985475108f, 1.4298323186f,
134
+ 1.4620530375f, 1.4952563823f, 1.5294929453f, 1.5648177311f,
135
+ 1.6012907825f, 1.6389779215f, 1.6779516274f, 1.7182920846f,
136
+ 1.7600884415f, 1.8034403315f, 1.8484597247f, 1.8952732015f,
137
+ 1.9440247677f, 1.9948793740f, 2.0480273642f, 2.1036901632f,
138
+ 2.1621276457f, 2.2236478246f, 2.2886197969f, 2.3574913691f,
139
+ 2.4308135619f, 2.5092755166f, 2.5937556343f, 2.6853990170f,
140
+ 2.7857394515f, 2.8969009924f, 3.0219515320f, 3.1655721017f,
141
+ 3.3354593381f, 3.5457169520f, 3.8270895246f, 4.2734901319f
142
+ };
143
+ const float kLloydMaxBoundaries8[] = {
144
+ -4.0502898282f, -3.6864032383f, -3.4405881450f, -3.2505157199f,
145
+ -3.0937618168f, -2.9594262622f, -2.8413202220f, -2.7355692343f,
146
+ -2.6395773257f, -2.5515155755f, -2.4700445392f, -2.3941524655f,
147
+ -2.3230555830f, -2.2561338107f, -2.1928877352f, -2.1329089044f,
148
+ -2.0758587637f, -2.0214533691f, -1.9694520708f, -1.9196489846f,
149
+ -1.8718664631f, -1.8259500281f, -1.7817643865f, -1.7391902630f,
150
+ -1.6981218560f, -1.6584647744f, -1.6201343520f, -1.5830542568f,
151
+ -1.5471553382f, -1.5123746638f, -1.4786547099f, -1.4459426781f,
152
+ -1.4141899147f, -1.3833514163f, -1.3533854053f, -1.3242529667f,
153
+ -1.2959177331f, -1.2683456134f, -1.2415045574f, -1.2153643503f,
154
+ -1.1898964346f, -1.1650737534f, -1.1408706148f, -1.1172625715f,
155
+ -1.0942263161f, -1.0717395887f, -1.0497810958f, -1.0283304386f,
156
+ -1.0073680499f, -0.9868751380f, -0.9668336378f, -0.9472261667f,
157
+ -0.9280359858f, -0.9092469654f, -0.8908435544f, -0.8728107524f,
158
+ -0.8551340851f, -0.8377995825f, -0.8207937582f, -0.8041035919f,
159
+ -0.7877165121f, -0.7716203812f, -0.7558034816f, -0.7402545023f,
160
+ -0.7249625266f, -0.7099170212f, -0.6951078244f, -0.6805251366f,
161
+ -0.6661595094f, -0.6520018366f, -0.6380433445f, -0.6242755828f,
162
+ -0.6106904155f, -0.5972800124f, -0.5840368399f, -0.5709536527f,
163
+ -0.5580234853f, -0.5452396431f, -0.5325956946f, -0.5200854627f,
164
+ -0.5077030165f, -0.4954426631f, -0.4832989393f, -0.4712666038f,
165
+ -0.4593406291f, -0.4475161933f, -0.4357886729f, -0.4241536345f,
166
+ -0.4126068274f, -0.4011441762f, -0.3897617735f, -0.3784558725f,
167
+ -0.3672228800f, -0.3560593498f, -0.3449619752f, -0.3339275834f,
168
+ -0.3229531281f, -0.3120356842f, -0.3011724408f, -0.2903606962f,
169
+ -0.2795978515f, -0.2688814053f, -0.2582089487f, -0.2475781595f,
170
+ -0.2369867976f, -0.2264327004f, -0.2159137774f, -0.2054280067f,
171
+ -0.1949734298f, -0.1845481481f, -0.1741503187f, -0.1637781502f,
172
+ -0.1534298998f, -0.1431038688f, -0.1327983999f, -0.1225118735f,
173
+ -0.1122427047f, -0.1019893401f, -0.0917502549f, -0.0815239499f,
174
+ -0.0713089487f, -0.0611037951f, -0.0509070504f, -0.0407172906f,
175
+ -0.0305331041f, -0.0203530890f, -0.0101758509f, 0.0000000000f,
176
+ 0.0101758509f, 0.0203530890f, 0.0305331041f, 0.0407172906f,
177
+ 0.0509070504f, 0.0611037951f, 0.0713089487f, 0.0815239499f,
178
+ 0.0917502549f, 0.1019893401f, 0.1122427047f, 0.1225118735f,
179
+ 0.1327983999f, 0.1431038688f, 0.1534298998f, 0.1637781502f,
180
+ 0.1741503187f, 0.1845481481f, 0.1949734298f, 0.2054280067f,
181
+ 0.2159137774f, 0.2264327004f, 0.2369867976f, 0.2475781595f,
182
+ 0.2582089487f, 0.2688814053f, 0.2795978515f, 0.2903606962f,
183
+ 0.3011724408f, 0.3120356842f, 0.3229531281f, 0.3339275834f,
184
+ 0.3449619752f, 0.3560593498f, 0.3672228800f, 0.3784558725f,
185
+ 0.3897617735f, 0.4011441762f, 0.4126068274f, 0.4241536345f,
186
+ 0.4357886729f, 0.4475161933f, 0.4593406291f, 0.4712666038f,
187
+ 0.4832989393f, 0.4954426631f, 0.5077030165f, 0.5200854627f,
188
+ 0.5325956946f, 0.5452396431f, 0.5580234853f, 0.5709536527f,
189
+ 0.5840368399f, 0.5972800124f, 0.6106904155f, 0.6242755828f,
190
+ 0.6380433445f, 0.6520018366f, 0.6661595094f, 0.6805251366f,
191
+ 0.6951078244f, 0.7099170212f, 0.7249625266f, 0.7402545023f,
192
+ 0.7558034816f, 0.7716203812f, 0.7877165121f, 0.8041035919f,
193
+ 0.8207937582f, 0.8377995825f, 0.8551340851f, 0.8728107524f,
194
+ 0.8908435544f, 0.9092469654f, 0.9280359858f, 0.9472261667f,
195
+ 0.9668336378f, 0.9868751380f, 1.0073680499f, 1.0283304386f,
196
+ 1.0497810958f, 1.0717395887f, 1.0942263161f, 1.1172625715f,
197
+ 1.1408706148f, 1.1650737534f, 1.1898964346f, 1.2153643503f,
198
+ 1.2415045574f, 1.2683456134f, 1.2959177331f, 1.3242529667f,
199
+ 1.3533854053f, 1.3833514163f, 1.4141899147f, 1.4459426781f,
200
+ 1.4786547099f, 1.5123746638f, 1.5471553382f, 1.5830542568f,
201
+ 1.6201343520f, 1.6584647744f, 1.6981218560f, 1.7391902630f,
202
+ 1.7817643865f, 1.8259500281f, 1.8718664631f, 1.9196489846f,
203
+ 1.9694520708f, 2.0214533691f, 2.0758587637f, 2.1329089044f,
204
+ 2.1928877352f, 2.2561338107f, 2.3230555830f, 2.3941524655f,
205
+ 2.4700445392f, 2.5515155755f, 2.6395773257f, 2.7355692343f,
206
+ 2.8413202220f, 2.9594262622f, 3.0937618168f, 3.2505157199f,
207
+ 3.4405881450f, 3.6864032383f, 4.0502898282f
208
+ };
209
+ // clang-format on
210
+
211
+ struct LloydMaxTable {
212
+ const float* centroids;
213
+ const float* boundaries;
214
+ };
215
+
216
+ const LloydMaxTable kLloydMaxTables[] = {
217
+ {nullptr, nullptr}, // 0
218
+ {kLloydMaxCentroids1, kLloydMaxBoundaries1}, // 1
219
+ {kLloydMaxCentroids2, kLloydMaxBoundaries2}, // 2
220
+ {kLloydMaxCentroids3, kLloydMaxBoundaries3}, // 3
221
+ {kLloydMaxCentroids4, kLloydMaxBoundaries4}, // 4
222
+ {nullptr, nullptr}, // 5 (unused)
223
+ {nullptr, nullptr}, // 6 (unused)
224
+ {nullptr, nullptr}, // 7 (unused)
225
+ {kLloydMaxCentroids8, kLloydMaxBoundaries8}, // 8
226
+ };
227
+
228
+ void populate_lloyd_max_trained(size_t mse_bits, std::vector<float>& trained) {
229
+ FAISS_THROW_IF_NOT(mse_bits >= 1 && mse_bits <= 8);
230
+ FAISS_THROW_IF_NOT(kLloydMaxTables[mse_bits].centroids != nullptr);
231
+ size_t k = size_t(1) << mse_bits;
232
+ const auto& t = kLloydMaxTables[mse_bits];
233
+ trained.resize(k + (k - 1));
234
+ std::copy(t.centroids, t.centroids + k, trained.begin());
235
+ std::copy(t.boundaries, t.boundaries + k - 1, trained.begin() + k);
236
+ }
237
+
238
+ } // namespace
239
+
28
240
  /*******************************************************************
29
241
  * ScalarQuantizer implementation
30
242
  ********************************************************************/
31
243
 
32
- ScalarQuantizer::ScalarQuantizer(size_t d, QuantizerType qtype)
33
- : Quantizer(d), qtype(qtype) {
244
+ ScalarQuantizer::ScalarQuantizer(size_t d_in, QuantizerType qtype_in)
245
+ : Quantizer(d_in), qtype(qtype_in) {
34
246
  set_derived_sizes();
35
247
  }
36
248
 
@@ -38,15 +250,29 @@ ScalarQuantizer::ScalarQuantizer() {}
38
250
 
39
251
  void ScalarQuantizer::set_derived_sizes() {
40
252
  switch (qtype) {
253
+ case QT_1bit_tqmse:
254
+ code_size = (d + 7) / 8;
255
+ bits = 1;
256
+ break;
257
+ case QT_2bit_tqmse:
258
+ code_size = (d * 2 + 7) / 8;
259
+ bits = 2;
260
+ break;
261
+ case QT_3bit_tqmse:
262
+ code_size = (d * 3 + 7) / 8;
263
+ bits = 3;
264
+ break;
41
265
  case QT_8bit:
42
266
  case QT_8bit_uniform:
43
267
  case QT_8bit_direct:
44
268
  case QT_8bit_direct_signed:
269
+ case QT_8bit_tqmse:
45
270
  code_size = d;
46
271
  bits = 8;
47
272
  break;
48
273
  case QT_4bit:
49
274
  case QT_4bit_uniform:
275
+ case QT_4bit_tqmse:
50
276
  code_size = (d + 1) / 2;
51
277
  bits = 4;
52
278
  break;
@@ -62,6 +288,28 @@ void ScalarQuantizer::set_derived_sizes() {
62
288
  code_size = d * 2;
63
289
  bits = 16;
64
290
  break;
291
+ case QT_0bit:
292
+ code_size = 0;
293
+ bits = 0;
294
+ break;
295
+ case QT_2bit_tq:
296
+ case QT_3bit_tq:
297
+ case QT_4bit_tq:
298
+ case QT_5bit_tq: {
299
+ size_t nb_bits = (qtype == QT_2bit_tq) ? 2
300
+ : (qtype == QT_3bit_tq) ? 3
301
+ : (qtype == QT_4bit_tq) ? 4
302
+ : (qtype == QT_5bit_tq) ? 5
303
+ : 0;
304
+ FAISS_THROW_IF_NOT_MSG(nb_bits > 0, "unexpected TurboQ qtype");
305
+ size_t mse_bits = nb_bits - 1;
306
+ size_t mse_bytes = mse_bits * ((d + 7) / 8);
307
+ size_t qjl_bytes = (d + 7) / 8;
308
+ code_size = mse_bytes + qjl_bytes +
309
+ sizeof(scalar_quantizer::SQTurboQFactors);
310
+ bits = nb_bits;
311
+ break;
312
+ }
65
313
  default:
66
314
  break;
67
315
  }
@@ -71,6 +319,10 @@ void ScalarQuantizer::train(size_t n, const float* x) {
71
319
  using scalar_quantizer::train_NonUniform;
72
320
  using scalar_quantizer::train_Uniform;
73
321
 
322
+ if (qtype == QT_0bit) {
323
+ return; // nothing to train for centroid-only mode
324
+ }
325
+
74
326
  int bit_per_dim = qtype == QT_4bit_uniform ? 4
75
327
  : qtype == QT_4bit ? 4
76
328
  : qtype == QT_6bit ? 6
@@ -81,6 +333,8 @@ void ScalarQuantizer::train(size_t n, const float* x) {
81
333
  switch (qtype) {
82
334
  case QT_4bit_uniform:
83
335
  case QT_8bit_uniform:
336
+ FAISS_THROW_IF_NOT(n > 0);
337
+ FAISS_THROW_IF_NOT(x != nullptr);
84
338
  train_Uniform(
85
339
  rangestat,
86
340
  rangestat_arg,
@@ -92,6 +346,8 @@ void ScalarQuantizer::train(size_t n, const float* x) {
92
346
  case QT_4bit:
93
347
  case QT_8bit:
94
348
  case QT_6bit:
349
+ FAISS_THROW_IF_NOT(n > 0);
350
+ FAISS_THROW_IF_NOT(x != nullptr);
95
351
  train_NonUniform(
96
352
  rangestat,
97
353
  rangestat_arg,
@@ -107,13 +363,61 @@ void ScalarQuantizer::train(size_t n, const float* x) {
107
363
  case QT_8bit_direct_signed:
108
364
  // no training necessary
109
365
  break;
366
+ case QT_1bit_tqmse:
367
+ populate_lloyd_max_trained(1, trained);
368
+ break;
369
+ case QT_2bit_tqmse:
370
+ populate_lloyd_max_trained(2, trained);
371
+ break;
372
+ case QT_3bit_tqmse:
373
+ populate_lloyd_max_trained(3, trained);
374
+ break;
375
+ case QT_4bit_tqmse:
376
+ populate_lloyd_max_trained(4, trained);
377
+ break;
378
+ case QT_8bit_tqmse:
379
+ populate_lloyd_max_trained(8, trained);
380
+ break;
381
+ case QT_2bit_tq:
382
+ case QT_3bit_tq:
383
+ case QT_4bit_tq:
384
+ case QT_5bit_tq: {
385
+ size_t mse_bits = bits - 1;
386
+ populate_lloyd_max_trained(mse_bits, trained);
387
+ // Pack seed and qjl_type at end of trained for dispatch
388
+ float seed_f[2];
389
+ TurboQuantRefine::pack_seed(turboq_refine.seed, seed_f);
390
+ trained.push_back(seed_f[0]);
391
+ trained.push_back(seed_f[1]);
392
+ trained.push_back(static_cast<float>(turboq_refine.qjl_type));
393
+ turboq_refine.init_projection(d);
394
+ break;
395
+ }
110
396
  default:
111
397
  break;
112
398
  }
113
399
  }
114
400
 
401
+ void ScalarQuantizer::TurboQuantRefine::init_projection(size_t d) {
402
+ if (use_fwht()) {
403
+ padded_d = 1;
404
+ while (padded_d < d) {
405
+ padded_d <<= 1;
406
+ }
407
+ fwht_signs.resize(padded_d);
408
+ RandomGenerator rng(seed);
409
+ for (size_t i = 0; i < padded_d; i++) {
410
+ fwht_signs[i] = (rng.rand_int(2) == 0) ? 1.0f : -1.0f;
411
+ }
412
+ } else {
413
+ rr_matrix.resize(d * d);
414
+ float_randn(rr_matrix.data(), d * d, seed);
415
+ matrix_qr(static_cast<int>(d), static_cast<int>(d), rr_matrix.data());
416
+ }
417
+ }
418
+
115
419
  ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
116
- return with_simd_level([&]<SIMDLevel SL>() -> SQuantizer* {
420
+ return with_simd_level_spr([&]<SIMDLevel SL>() -> SQuantizer* {
117
421
  if constexpr (SL != SIMDLevel::NONE) {
118
422
  auto* q = scalar_quantizer::sq_select_quantizer<SL>(
119
423
  qtype, d, trained);
@@ -128,20 +432,27 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
128
432
 
129
433
  void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
130
434
  const {
435
+ if (code_size == 0) {
436
+ return; // QT_0bit: nothing to encode
437
+ }
131
438
  std::unique_ptr<SQuantizer> squant(select_quantizer());
132
439
 
133
440
  memset(codes, 0, code_size * n);
134
441
  #pragma omp parallel for
135
- for (int64_t i = 0; i < n; i++) {
442
+ for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
136
443
  squant->encode_vector(x + i * d, codes + i * code_size);
137
444
  }
138
445
  }
139
446
 
140
447
  void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
448
+ if (code_size == 0) {
449
+ memset(x, 0, sizeof(float) * d * n);
450
+ return; // QT_0bit: no per-vector data, zero-fill
451
+ }
141
452
  std::unique_ptr<SQuantizer> squant(select_quantizer());
142
453
 
143
454
  #pragma omp parallel for
144
- for (int64_t i = 0; i < n; i++) {
455
+ for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
145
456
  squant->decode_vector(codes + i * code_size, x + i * d);
146
457
  }
147
458
  }
@@ -149,7 +460,7 @@ void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
149
460
  ScalarQuantizer::SQDistanceComputer* ScalarQuantizer::get_distance_computer(
150
461
  MetricType metric) const {
151
462
  FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
152
- return with_simd_level([&]<SIMDLevel SL>() -> SQDistanceComputer* {
463
+ return with_simd_level_spr([&]<SIMDLevel SL>() -> SQDistanceComputer* {
153
464
  if constexpr (SL != SIMDLevel::NONE) {
154
465
  auto* dc = scalar_quantizer::sq_select_distance_computer<SL>(
155
466
  metric, qtype, d, trained);
@@ -168,7 +479,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner(
168
479
  bool store_pairs,
169
480
  const IDSelector* sel,
170
481
  bool by_residual) const {
171
- return with_simd_level([&]<SIMDLevel SL>() -> InvertedListScanner* {
482
+ return with_simd_level_spr([&]<SIMDLevel SL>() -> InvertedListScanner* {
172
483
  if constexpr (SL != SIMDLevel::NONE) {
173
484
  auto* s = scalar_quantizer::sq_select_InvertedListScanner<SL>(
174
485
  qtype,
@@ -7,6 +7,8 @@
7
7
 
8
8
  #pragma once
9
9
 
10
+ #include <cstring>
11
+
10
12
  #include <faiss/impl/AuxIndexStructures.h>
11
13
  #include <faiss/impl/DistanceComputer.h>
12
14
  #include <faiss/impl/Quantizer.h>
@@ -33,6 +35,17 @@ struct ScalarQuantizer : Quantizer {
33
35
  QT_bf16,
34
36
  QT_8bit_direct_signed, ///< fast indexing of signed int8s ranging from
35
37
  ///< [-128 to 127]
38
+ QT_0bit, ///< 0 bits per component, centroid-only distance (for IVF)
39
+ QT_1bit_tqmse, ///< TurboQuant MSE-optimized, 1 bit per component
40
+ QT_2bit_tqmse, ///< TurboQuant MSE-optimized, 2 bits per component
41
+ QT_3bit_tqmse, ///< TurboQuant MSE-optimized, 3 bits per component
42
+ QT_4bit_tqmse, ///< TurboQuant MSE-optimized, 4 bits per component
43
+ QT_8bit_tqmse, ///< TurboQuant MSE-optimized, 8 bits per component
44
+ QT_2bit_tq, ///< Full TurboQuant (1-bit MSE + 1-bit QJL + factors)
45
+ QT_3bit_tq, ///< Full TurboQuant (2-bit MSE + 1-bit QJL + factors)
46
+ QT_4bit_tq, ///< Full TurboQuant (3-bit MSE + 1-bit QJL + factors)
47
+ QT_5bit_tq, ///< Full TurboQuant (4-bit MSE + 1-bit QJL + factors)
48
+ QT_count
36
49
  };
37
50
 
38
51
  QuantizerType qtype = QT_8bit;
@@ -58,7 +71,7 @@ struct ScalarQuantizer : Quantizer {
58
71
  /// trained values (including the range)
59
72
  std::vector<float> trained;
60
73
 
61
- ScalarQuantizer(size_t d, QuantizerType qtype);
74
+ ScalarQuantizer(size_t d_in, QuantizerType qtype_in);
62
75
  ScalarQuantizer();
63
76
 
64
77
  /// updates internal values based on qtype and d
@@ -100,11 +113,74 @@ struct ScalarQuantizer : Quantizer {
100
113
 
101
114
  virtual float query_to_code(const uint8_t* code) const = 0;
102
115
 
116
+ /// Compute four query-to-code distances in one call. Default loops
117
+ /// query_to_code four times; per-SIMD specializations may batch the
118
+ /// inner dim loop across the four codes to amortize query state and
119
+ /// expose ILP across independent accumulators.
120
+ virtual void query_to_codes_batch_4(
121
+ const uint8_t* code_0,
122
+ const uint8_t* code_1,
123
+ const uint8_t* code_2,
124
+ const uint8_t* code_3,
125
+ float& dis0,
126
+ float& dis1,
127
+ float& dis2,
128
+ float& dis3) const {
129
+ dis0 = query_to_code(code_0);
130
+ dis1 = query_to_code(code_1);
131
+ dis2 = query_to_code(code_2);
132
+ dis3 = query_to_code(code_3);
133
+ }
134
+
103
135
  float distance_to_code(const uint8_t* code) final {
104
136
  return query_to_code(code);
105
137
  }
106
138
  };
107
139
 
140
+ /// TurboQuant full (QT_*_tq) refinement state, isolated from the
141
+ /// main ScalarQuantizer to avoid polluting it with TQ-specific data.
142
+ struct TurboQuantRefine {
143
+ static bool is_turboq_full(QuantizerType qt) {
144
+ return qt >= QT_2bit_tq && qt <= QT_5bit_tq;
145
+ }
146
+
147
+ static void pack_seed(uint64_t seed, float out[2]) {
148
+ static_assert(sizeof(uint64_t) == 2 * sizeof(float));
149
+ std::memcpy(out, &seed, sizeof(uint64_t));
150
+ }
151
+
152
+ static uint64_t unpack_seed(float lo, float hi) {
153
+ float tmp[2] = {lo, hi};
154
+ uint64_t s;
155
+ static_assert(sizeof(uint64_t) == 2 * sizeof(float));
156
+ std::memcpy(&s, tmp, sizeof(uint64_t));
157
+ return s;
158
+ }
159
+
160
+ uint8_t qjl_type = 0;
161
+ uint64_t seed = 42;
162
+ size_t padded_d = 0;
163
+ std::vector<float> fwht_signs;
164
+ std::vector<float> rr_matrix;
165
+ size_t nb_bits_lo = 0;
166
+ size_t n_hi_dims = 0;
167
+
168
+ void init_projection(size_t d);
169
+ bool use_fwht() const {
170
+ return qjl_type == 0;
171
+ }
172
+
173
+ struct DistanceComputer : SQDistanceComputer {
174
+ virtual void configure(uint8_t qb, bool int_qjl) = 0;
175
+ virtual void set_prescreen_threshold(
176
+ const float* t,
177
+ bool minimize) = 0;
178
+ virtual void clear_prescreen_threshold() = 0;
179
+ };
180
+ };
181
+
182
+ TurboQuantRefine turboq_refine;
183
+
108
184
  SQDistanceComputer* get_distance_computer(
109
185
  MetricType metric = METRIC_L2) const;
110
186
 
@@ -19,8 +19,8 @@ ThreadedIndex<IndexT>::ThreadedIndex(bool threaded)
19
19
  : ThreadedIndex(0, threaded) {}
20
20
 
21
21
  template <typename IndexT>
22
- ThreadedIndex<IndexT>::ThreadedIndex(int d, bool threaded)
23
- : IndexT(d), isThreaded_(threaded) {}
22
+ ThreadedIndex<IndexT>::ThreadedIndex(int d_in, bool threaded)
23
+ : IndexT(d_in), isThreaded_(threaded) {}
24
24
 
25
25
  template <typename IndexT>
26
26
  ThreadedIndex<IndexT>::~ThreadedIndex() {
@@ -122,11 +122,12 @@ void ThreadedIndex<IndexT>::runOnIndex(std::function<void(int, IndexT*)> f) {
122
122
  if (isThreaded_) {
123
123
  std::vector<std::future<bool>> v;
124
124
 
125
- for (int i = 0; i < this->indices_.size(); ++i) {
125
+ for (size_t i = 0; i < this->indices_.size(); ++i) {
126
126
  auto& p = this->indices_[i];
127
127
  auto indexPtr = p.first;
128
+ int idx = static_cast<int>(i);
128
129
  v.emplace_back(
129
- p.second->add([f, i, indexPtr]() { f(i, indexPtr); }));
130
+ p.second->add([f, idx, indexPtr]() { f(idx, indexPtr); }));
130
131
  }
131
132
 
132
133
  waitAndHandleFutures(v);
@@ -135,13 +136,14 @@ void ThreadedIndex<IndexT>::runOnIndex(std::function<void(int, IndexT*)> f) {
135
136
  // while letting everything else run to completion
136
137
  std::vector<std::pair<int, std::exception_ptr>> exceptions;
137
138
 
138
- for (int i = 0; i < this->indices_.size(); ++i) {
139
+ for (size_t i = 0; i < this->indices_.size(); ++i) {
139
140
  auto& p = this->indices_[i];
140
141
  try {
141
- f(i, p.first);
142
+ f(static_cast<int>(i), p.first);
142
143
  } catch (...) {
143
144
  exceptions.emplace_back(
144
- std::make_pair(i, std::current_exception()));
145
+ std::make_pair(
146
+ static_cast<int>(i), std::current_exception()));
145
147
  }
146
148
  }
147
149
 
@@ -164,10 +166,10 @@ void ThreadedIndex<IndexT>::reset() {
164
166
  }
165
167
 
166
168
  template <typename IndexT>
167
- void ThreadedIndex<IndexT>::onAfterAddIndex(IndexT* index) {}
169
+ void ThreadedIndex<IndexT>::onAfterAddIndex(IndexT* /* index */) {}
168
170
 
169
171
  template <typename IndexT>
170
- void ThreadedIndex<IndexT>::onAfterRemoveIndex(IndexT* index) {}
172
+ void ThreadedIndex<IndexT>::onAfterRemoveIndex(IndexT* /* index */) {}
171
173
 
172
174
  template <typename IndexT>
173
175
  void ThreadedIndex<IndexT>::waitAndHandleFutures(
@@ -176,14 +178,15 @@ void ThreadedIndex<IndexT>::waitAndHandleFutures(
176
178
  // exceptions that are generated
177
179
  std::vector<std::pair<int, std::exception_ptr>> exceptions;
178
180
 
179
- for (int i = 0; i < v.size(); ++i) {
181
+ for (size_t i = 0; i < v.size(); ++i) {
180
182
  auto& fut = v[i];
181
183
 
182
184
  try {
183
185
  fut.get();
184
186
  } catch (...) {
185
187
  exceptions.emplace_back(
186
- std::make_pair(i, std::current_exception()));
188
+ std::make_pair(
189
+ static_cast<int>(i), std::current_exception()));
187
190
  }
188
191
  }
189
192
 
@@ -18,19 +18,19 @@ namespace faiss {
18
18
  // A size of ~1M seems to be the threshold where the hash set wins.
19
19
  size_t visited_table_hashset_threshold = 500000;
20
20
 
21
- VisitedTable::VisitedTable(size_t size, std::optional<bool> use_hashset)
22
- : visno(use_hashset.value_or(size >= visited_table_hashset_threshold)
23
- ? 0
24
- : 1) {
25
- if (visno != 0) {
26
- visited.resize(size, 0);
21
+ std::unique_ptr<VisitedTable> VisitedTable::create(
22
+ size_t size,
23
+ std::optional<bool> use_hashset) {
24
+ bool use_set =
25
+ use_hashset.value_or(size >= visited_table_hashset_threshold);
26
+ if (use_set) {
27
+ return std::make_unique<VisitedTableSet>();
27
28
  }
29
+ return std::make_unique<VisitedTableVector>(size);
28
30
  }
29
31
 
30
- void VisitedTable::advance() {
31
- if (visno == 0) {
32
- visited_set.clear();
33
- } else if (visno < 254) {
32
+ void VisitedTableVector::advance() {
33
+ if (visno < 254) {
34
34
  // 254 rather than 255 because sometimes we use visno and visno+1
35
35
  ++visno;
36
36
  } else {