faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -22,6 +22,190 @@ static float sqr(float x) {
22
22
  return x * x;
23
23
  }
24
24
 
25
+ constexpr size_t kTurboQuantMaxBits = 8;
26
+ // TurboQuant builds a 1-D optimal scalar quantizer analytically. We approximate
27
+ // the target density on a uniform grid over [-1, 1]; the grid is kept dense
28
+ // enough both in absolute terms and per output centroid.
29
+ constexpr size_t kTurboQuantGridMin = 1 << 15;
30
+ constexpr size_t kTurboQuantGridPerCentroid = 512;
31
+ constexpr int kTurboQuantMaxIter = 100;
32
+ constexpr double kTurboQuantTol = 1e-8;
33
+
34
+ void build_TurboQuantMSECodebook(
35
+ size_t d,
36
+ size_t nbits,
37
+ std::vector<float>& centroids,
38
+ std::vector<float>& boundaries) {
39
+ FAISS_THROW_IF_NOT_FMT(
40
+ nbits <= kTurboQuantMaxBits,
41
+ "invalid TurboQuant nbits %zu (must be in [0, %zu])",
42
+ nbits,
43
+ kTurboQuantMaxBits);
44
+
45
+ if (nbits == 0) {
46
+ centroids.clear();
47
+ boundaries.clear();
48
+ return;
49
+ }
50
+
51
+ const size_t k = size_t(1) << nbits;
52
+
53
+ if (d == 1) {
54
+ // In 1-D, a unit vector can only be -1 or +1, so the marginal
55
+ // distribution collapses to two atoms. The TurboQuant codebook is
56
+ // therefore a repeated pair of endpoint centroids.
57
+ centroids.resize(k);
58
+ for (size_t i = 0; i < k; i++) {
59
+ centroids[i] = i < k / 2 ? -1.0f : 1.0f;
60
+ }
61
+ boundaries.resize(k - 1);
62
+ for (size_t i = 0; i + 1 < k; i++) {
63
+ boundaries[i] = 0.5f * (centroids[i] + centroids[i + 1]);
64
+ }
65
+ return;
66
+ }
67
+
68
+ // For d > 1, TurboQuant uses the marginal distribution of one coordinate of
69
+ // a random unit vector in R^d. On [-1, 1], this density is proportional to
70
+ // (1 - x^2)^((d - 3) / 2), which is a symmetric beta-law after a change of
71
+ // variables. The code below discretizes that density.
72
+ const size_t ngrid =
73
+ std::max(kTurboQuantGridMin, k * kTurboQuantGridPerCentroid);
74
+ const double step = 2.0 / ngrid;
75
+ const double alpha = 0.5 * (double(d) - 3.0);
76
+
77
+ std::vector<double> xs(ngrid);
78
+ // prefix_w stores the cumulative mass of the discretized density and
79
+ // prefix_wx stores its cumulative first moment, so interval means can be
80
+ // recovered in O(1).
81
+ std::vector<double> prefix_w(ngrid + 1, 0.0);
82
+ std::vector<double> prefix_wx(ngrid + 1, 0.0);
83
+
84
+ for (size_t i = 0; i < ngrid; i++) {
85
+ const double x = -1.0 + (i + 0.5) * step;
86
+ const double one_minus_x2 = std::max(0.0, 1.0 - x * x);
87
+ double w;
88
+ if (alpha == 0.0) { // when d == 3
89
+ w = 1.0;
90
+ } else {
91
+ // (1-x^2)^((d-3)/2)
92
+ w = std::pow(one_minus_x2, alpha);
93
+ }
94
+ if (!std::isfinite(w) || w < 0.0) {
95
+ w = 0.0;
96
+ }
97
+ xs[i] = x;
98
+ prefix_w[i + 1] = prefix_w[i] + w;
99
+ prefix_wx[i + 1] = prefix_wx[i] + w * x;
100
+ }
101
+
102
+ auto range_mean = [&](size_t i0, size_t i1, double fallback) {
103
+ const double w = prefix_w[i1] - prefix_w[i0];
104
+ if (w <= 0.0) {
105
+ return fallback;
106
+ }
107
+ return (prefix_wx[i1] - prefix_wx[i0]) / w;
108
+ };
109
+
110
+ const double total_w = prefix_w.back();
111
+ std::vector<size_t> cuts(k + 1, 0);
112
+ cuts[k] = ngrid;
113
+
114
+ // Initialize with k equal-mass cells under the target density. This gives
115
+ // a stable starting point before the Lloyd refinements below.
116
+ for (size_t i = 1; i < k; i++) {
117
+ const double target = total_w * i / k;
118
+ cuts[i] = std::lower_bound(prefix_w.begin(), prefix_w.end(), target) -
119
+ prefix_w.begin();
120
+ cuts[i] = std::min(cuts[i], ngrid);
121
+ }
122
+
123
+ std::vector<double> centroids_d(k);
124
+ for (size_t i = 0; i < k; i++) {
125
+ const double left = -1.0 + 2.0 * i / k;
126
+ const double right = -1.0 + 2.0 * (i + 1) / k;
127
+ // First estimate of each centroid: the conditional mean of its initial
128
+ // equal-mass cell, with a uniform-cell midpoint as a fallback.
129
+ centroids_d[i] = range_mean(cuts[i], cuts[i + 1], 0.5 * (left + right));
130
+ }
131
+
132
+ std::vector<double> boundaries_d(k > 0 ? k - 1 : 0);
133
+
134
+ // Refine the 1-D codebook with a weighted Lloyd iteration over the
135
+ // discretized marginal density on [-1, 1]:
136
+ // 1. boundaries_d are the Voronoi separators implied by neighboring
137
+ // centroids.
138
+ // 2. cuts map each boundary interval back to a contiguous range of the
139
+ // integration grid xs[].
140
+ // 3. each centroid becomes the weighted mean of the samples currently in
141
+ // its cell, clipped to stay within its neighboring boundaries.
142
+ //
143
+ // The loop stops once the largest centroid update is below kTurboQuantTol.
144
+ for (int iter = 0; iter < kTurboQuantMaxIter; iter++) {
145
+ // Midpoints between adjacent centroids define the current Voronoi
146
+ // partition of [-1, 1].
147
+ for (size_t i = 0; i + 1 < k; i++) {
148
+ boundaries_d[i] = 0.5 * (centroids_d[i] + centroids_d[i + 1]);
149
+ }
150
+
151
+ cuts[0] = 0;
152
+ cuts[k] = ngrid;
153
+ // Reassign the discretized density samples to the Voronoi cell induced
154
+ // by each boundary. Because xs is sorted, the reassignment reduces to
155
+ // finding the first grid point strictly greater than each boundary.
156
+ for (size_t i = 1; i < k; i++) {
157
+ cuts[i] = std::upper_bound(
158
+ xs.begin(), xs.end(), boundaries_d[i - 1]) -
159
+ xs.begin();
160
+ }
161
+
162
+ double max_delta = 0.0;
163
+ for (size_t i = 0; i < k; i++) {
164
+ const double left = i == 0 ? -1.0 : boundaries_d[i - 1];
165
+ const double right = i + 1 == k ? 1.0 : boundaries_d[i];
166
+ // Lloyd update: replace the centroid with the weighted average of
167
+ // the mass assigned to its cell. Empty cells fall back to the cell
168
+ // midpoint, and we clamp to [left, right] to preserve ordering.
169
+ double c = range_mean(cuts[i], cuts[i + 1], 0.5 * (left + right));
170
+ c = std::min(std::max(c, left), right);
171
+ max_delta = std::max(max_delta, std::abs(c - centroids_d[i]));
172
+ centroids_d[i] = c;
173
+ }
174
+
175
+ if (max_delta < kTurboQuantTol) {
176
+ break;
177
+ }
178
+ }
179
+
180
+ std::sort(centroids_d.begin(), centroids_d.end());
181
+
182
+ centroids.resize(k);
183
+ boundaries.resize(k - 1);
184
+ for (size_t i = 0; i < k; i++) {
185
+ centroids[i] = centroids_d[i];
186
+ }
187
+ for (size_t i = 0; i + 1 < k; i++) {
188
+ boundaries[i] = 0.5f * (centroids[i] + centroids[i + 1]);
189
+ }
190
+ }
191
+
192
+ void train_TurboQuantMSE(size_t d, size_t nbits, std::vector<float>& trained) {
193
+ FAISS_THROW_IF_NOT_FMT(
194
+ nbits > 0, "invalid TurboQuant SQ nbits %zu (must be > 0)", nbits);
195
+ std::vector<float> centroids;
196
+ std::vector<float> boundaries;
197
+ build_TurboQuantMSECodebook(d, nbits, centroids, boundaries);
198
+ const size_t k = centroids.size();
199
+
200
+ trained.resize(k + (k - 1));
201
+ for (size_t i = 0; i < k; i++) {
202
+ trained[i] = centroids[i];
203
+ }
204
+ for (size_t i = 0; i + 1 < k; i++) {
205
+ trained[k + i] = boundaries[i];
206
+ }
207
+ }
208
+
25
209
  void train_Uniform(
26
210
  RangeStat rs,
27
211
  float rs_arg,
@@ -37,7 +221,7 @@ void train_Uniform(
37
221
  if (rs == ScalarQuantizer::RS_minmax) {
38
222
  vmin = HUGE_VAL;
39
223
  vmax = -HUGE_VAL;
40
- for (size_t i = 0; i < n; i++) {
224
+ for (idx_t i = 0; i < n; i++) {
41
225
  if (x[i] < vmin) {
42
226
  vmin = x[i];
43
227
  }
@@ -50,7 +234,7 @@ void train_Uniform(
50
234
  vmax += vexp;
51
235
  } else if (rs == ScalarQuantizer::RS_meanstd) {
52
236
  double sum = 0, sum2 = 0;
53
- for (size_t i = 0; i < n; i++) {
237
+ for (idx_t i = 0; i < n; i++) {
54
238
  sum += x[i];
55
239
  sum2 += x[i] * x[i];
56
240
  }
@@ -81,7 +265,7 @@ void train_Uniform(
81
265
  float sx = 0;
82
266
  {
83
267
  vmin = HUGE_VAL, vmax = -HUGE_VAL;
84
- for (size_t i = 0; i < n; i++) {
268
+ for (idx_t i = 0; i < n; i++) {
85
269
  if (x[i] < vmin) {
86
270
  vmin = x[i];
87
271
  }
@@ -161,9 +345,9 @@ void train_NonUniform(
161
345
  if (rs == ScalarQuantizer::RS_minmax) {
162
346
  memcpy(vmin, x, sizeof(*x) * d);
163
347
  memcpy(vmax, x, sizeof(*x) * d);
164
- for (size_t i = 1; i < n; i++) {
348
+ for (idx_t i = 1; i < n; i++) {
165
349
  const float* xi = x + i * d;
166
- for (size_t j = 0; j < d; j++) {
350
+ for (int j = 0; j < d; j++) {
167
351
  if (xi[j] < vmin[j]) {
168
352
  vmin[j] = xi[j];
169
353
  }
@@ -173,7 +357,7 @@ void train_NonUniform(
173
357
  }
174
358
  }
175
359
  float* vdiff = vmax;
176
- for (size_t j = 0; j < d; j++) {
360
+ for (int j = 0; j < d; j++) {
177
361
  float vexp = (vmax[j] - vmin[j]) * rs_arg;
178
362
  vmin[j] -= vexp;
179
363
  vmax[j] += vexp;
@@ -182,9 +366,9 @@ void train_NonUniform(
182
366
  } else {
183
367
  // transpose
184
368
  std::vector<float> xt(n * d);
185
- for (size_t i = 1; i < n; i++) {
369
+ for (idx_t i = 1; i < n; i++) {
186
370
  const float* xi = x + i * d;
187
- for (size_t j = 0; j < d; j++) {
371
+ for (int j = 0; j < d; j++) {
188
372
  xt[j * n + i] = xi[j];
189
373
  }
190
374
  }
@@ -37,6 +37,18 @@ void train_NonUniform(
37
37
  int k,
38
38
  const float* x,
39
39
  std::vector<float>& trained);
40
+
41
+ /** Build the TurboQuant MSE codebook using the beta-distribution-optimal
42
+ * quantizer from the TurboQuant paper. The codebook is analytical
43
+ * (depends only on d and nbits, no training data needed).
44
+ *
45
+ * @param d vector dimensionality (used for beta-distribution shape)
46
+ * @param nbits bits per component (1-8)
47
+ * @param trained output: [centroids (k floats), boundaries (k-1 floats)]
48
+ * where k = 2^nbits
49
+ */
50
+ void train_TurboQuantMSE(size_t d, size_t nbits, std::vector<float>& trained);
51
+
40
52
  } // namespace scalar_quantizer
41
53
 
42
54
  } // namespace faiss
@@ -23,86 +23,107 @@
23
23
 
24
24
  namespace faiss {
25
25
 
26
- /*********************** x86 SIMD dispatch cases */
26
+ /** Defining which SIMD levels are available for a given function is via a
27
+ * binary mask. Here we predefine the most common masks.
28
+ * */
27
29
 
28
- #ifdef COMPILE_SIMD_AVX2
29
- #define DISPATCH_SIMDLevel_AVX2(f, ...) \
30
- case SIMDLevel::AVX2: \
31
- return f<SIMDLevel::AVX2>(__VA_ARGS__)
32
- #else
33
- #define DISPATCH_SIMDLevel_AVX2(f, ...)
34
- #endif
30
+ constexpr int AVAILABLE_SIMD_LEVELS_NONE = (1 << int(SIMDLevel::NONE));
35
31
 
36
- #ifdef COMPILE_SIMD_AVX512
37
- #define DISPATCH_SIMDLevel_AVX512(f, ...) \
38
- case SIMDLevel::AVX512: \
39
- return f<SIMDLevel::AVX512>(__VA_ARGS__)
40
- #else
41
- #define DISPATCH_SIMDLevel_AVX512(f, ...)
42
- #endif
32
+ constexpr int AVAILABLE_SIMD_LEVELS_AVX2_NEON = AVAILABLE_SIMD_LEVELS_NONE |
33
+ (1 << int(SIMDLevel::AVX2)) | (1 << int(SIMDLevel::ARM_NEON));
34
+
35
+ // A0: same + AVX512 + RISCV_RVV
36
+ constexpr int AVAILABLE_SIMD_LEVELS_A0 = AVAILABLE_SIMD_LEVELS_AVX2_NEON |
37
+ (1 << int(SIMDLevel::AVX512)) | (1 << int(SIMDLevel::RISCV_RVV));
38
+
39
+ // A1: same + ARM_SVE (for functions with dedicated SVE implementations)
40
+ constexpr int AVAILABLE_SIMD_LEVELS_A1 =
41
+ AVAILABLE_SIMD_LEVELS_A0 | (1 << int(SIMDLevel::ARM_SVE));
42
+
43
+ // A2: NONE + AVX2 + ARM_SVE only (for functions with only these
44
+ // implementations)
45
+ constexpr int AVAILABLE_SIMD_LEVELS_A2 = AVAILABLE_SIMD_LEVELS_NONE |
46
+ (1 << int(SIMDLevel::AVX2)) | (1 << int(SIMDLevel::ARM_SVE));
47
+
48
+ constexpr int AVAILABLE_SIMD_LEVELS_ALL = -1;
49
+
50
+ /** The complete dispatching function. It takes into account:
51
+ * - the currently selected SIMD level
52
+ * - the compiled in SIMD levels (given by COMPILE_SIMD_XXX)
53
+ * - the available SIMD implementations for that particular function (given by
54
+ * available_levels)
55
+ */
56
+
57
+ template <int available_levels, typename LambdaType>
58
+ inline auto with_selected_simd_levels(LambdaType&& action) {
59
+ #ifdef FAISS_ENABLE_DD
60
+ switch (SIMDConfig::level) {
61
+ // For x86 -- try from highest to lowest level
43
62
 
44
63
  #ifdef COMPILE_SIMD_AVX512_SPR
45
- #define DISPATCH_SIMDLevel_AVX512_SPR(f, ...) \
46
- case SIMDLevel::AVX512_SPR: \
47
- return f<SIMDLevel::AVX512_SPR>(__VA_ARGS__)
48
- #else
49
- #define DISPATCH_SIMDLevel_AVX512_SPR(f, ...)
64
+ case SIMDLevel::AVX512_SPR:
65
+ if constexpr (
66
+ available_levels & (1 << int(SIMDLevel::AVX512_SPR))) {
67
+ return action.template operator()<SIMDLevel::AVX512_SPR>();
68
+ }
69
+ [[fallthrough]];
50
70
  #endif
51
71
 
52
- /*********************** ARM SIMD dispatch cases */
72
+ #ifdef COMPILE_SIMD_AVX512
73
+ case SIMDLevel::AVX512:
74
+ if constexpr (available_levels & (1 << int(SIMDLevel::AVX512))) {
75
+ return action.template operator()<SIMDLevel::AVX512>();
76
+ }
77
+ [[fallthrough]];
78
+ #endif
53
79
 
54
- #ifdef COMPILE_SIMD_ARM_NEON
55
- #define DISPATCH_SIMDLevel_ARM_NEON(f, ...) \
56
- case SIMDLevel::ARM_NEON: \
57
- return f<SIMDLevel::ARM_NEON>(__VA_ARGS__)
58
- #else
59
- #define DISPATCH_SIMDLevel_ARM_NEON(f, ...)
80
+ #ifdef COMPILE_SIMD_AVX2
81
+ case SIMDLevel::AVX2:
82
+ if constexpr (available_levels & (1 << int(SIMDLevel::AVX2))) {
83
+ return action.template operator()<SIMDLevel::AVX2>();
84
+ }
85
+ [[fallthrough]];
60
86
  #endif
61
87
 
88
+ // For ARM, try from highest to lowest level
62
89
  #ifdef COMPILE_SIMD_ARM_SVE
63
- #define DISPATCH_SIMDLevel_ARM_SVE(f, ...) \
64
- case SIMDLevel::ARM_SVE: \
65
- return f<SIMDLevel::ARM_SVE>(__VA_ARGS__)
66
- #else
67
- #define DISPATCH_SIMDLevel_ARM_SVE(f, ...)
90
+ case SIMDLevel::ARM_SVE:
91
+ if constexpr (available_levels & (1 << int(SIMDLevel::ARM_SVE))) {
92
+ return action.template operator()<SIMDLevel::ARM_SVE>();
93
+ }
94
+ [[fallthrough]];
68
95
  #endif
69
96
 
70
- /*********************** Main dispatch macro */
71
-
72
- #ifdef FAISS_ENABLE_DD
97
+ #ifdef COMPILE_SIMD_ARM_NEON
98
+ case SIMDLevel::ARM_NEON:
99
+ if constexpr (available_levels & (1 << int(SIMDLevel::ARM_NEON))) {
100
+ return action.template operator()<SIMDLevel::ARM_NEON>();
101
+ }
102
+ [[fallthrough]];
103
+ #endif
73
104
 
74
- // DD mode: runtime dispatch based on SIMDConfig::level
75
- #define DISPATCH_SIMDLevel(f, ...) \
76
- switch (SIMDConfig::level) { \
77
- case SIMDLevel::NONE: \
78
- return f<SIMDLevel::NONE>(__VA_ARGS__); \
79
- DISPATCH_SIMDLevel_AVX2(f, __VA_ARGS__); \
80
- DISPATCH_SIMDLevel_AVX512(f, __VA_ARGS__); \
81
- DISPATCH_SIMDLevel_AVX512_SPR(f, __VA_ARGS__); \
82
- DISPATCH_SIMDLevel_ARM_NEON(f, __VA_ARGS__); \
83
- DISPATCH_SIMDLevel_ARM_SVE(f, __VA_ARGS__); \
84
- default: \
85
- FAISS_THROW_MSG("Invalid SIMD level"); \
105
+ #ifdef COMPILE_SIMD_RISCV_RVV
106
+ case SIMDLevel::RISCV_RVV:
107
+ if constexpr (available_levels & (1 << int(SIMDLevel::RISCV_RVV))) {
108
+ return action.template operator()<SIMDLevel::RISCV_RVV>();
109
+ }
110
+ [[fallthrough]];
111
+ #endif
112
+ default:
113
+ return action.template operator()<SIMDLevel::NONE>();
114
+ }
115
+ #else // static dispatch
116
+ // In static mode, SINGLE_SIMD_LEVEL is a constexpr resolved at compile
117
+ // time. If the compiled level is not in the available set, fall through
118
+ // to NONE (mirroring the DD fallthrough behavior). Only SINGLE_SIMD_LEVEL
119
+ // and NONE have compiled specializations.
120
+ if constexpr (available_levels & (1 << int(SINGLE_SIMD_LEVEL))) {
121
+ return action.template operator()<SINGLE_SIMD_LEVEL>();
122
+ } else {
123
+ return action.template operator()<SIMDLevel::NONE>();
86
124
  }
87
-
88
- #else // Static mode
89
-
90
- // Static mode: direct call to compiled-in SIMD level (no runtime switch)
91
- #if defined(COMPILE_SIMD_AVX512_SPR)
92
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::AVX512_SPR>(__VA_ARGS__)
93
- #elif defined(COMPILE_SIMD_AVX512)
94
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::AVX512>(__VA_ARGS__)
95
- #elif defined(COMPILE_SIMD_AVX2)
96
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::AVX2>(__VA_ARGS__)
97
- #elif defined(COMPILE_SIMD_ARM_SVE)
98
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::ARM_SVE>(__VA_ARGS__)
99
- #elif defined(COMPILE_SIMD_ARM_NEON)
100
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::ARM_NEON>(__VA_ARGS__)
101
- #else
102
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::NONE>(__VA_ARGS__)
103
125
  #endif
104
-
105
- #endif // FAISS_ENABLE_DD
126
+ }
106
127
 
107
128
  /**
108
129
  * Dispatch to a lambda with SIMDLevel as a compile-time constant.
@@ -126,6 +147,8 @@ namespace faiss {
126
147
  * });
127
148
  *
128
149
  * The lambda must be a generic lambda with a SIMDLevel template parameter.
150
+ * By default, the lambda uses levels AVX2 + AVX512 + NEON + RVV, since these
151
+ * are the most common cases.
129
152
  *
130
153
  * @param action A generic lambda with signature `template<SIMDLevel> T
131
154
  * operator()()`
@@ -133,7 +156,18 @@ namespace faiss {
133
156
  */
134
157
  template <typename LambdaType>
135
158
  inline auto with_simd_level(LambdaType&& action) {
136
- DISPATCH_SIMDLevel(action.template operator());
159
+ return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A0>(
160
+ std::forward<LambdaType>(action));
161
+ }
162
+
163
+ /**
164
+ * Use for functions implemented with simdXintY (256-bit) operations
165
+ * that don't have dedicated AVX512 or SVE implementations.
166
+ */
167
+ template <typename LambdaType>
168
+ inline auto with_simd_level_256bit(LambdaType&& action) {
169
+ return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_AVX2_NEON>(
170
+ std::forward<LambdaType>(action));
137
171
  }
138
172
 
139
173
  } // namespace faiss
@@ -0,0 +1,57 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ /** Abstractions for 256-bit and 512-bit SIMD registers.
11
+ *
12
+ * The objective is to separate the different interpretations of the same
13
+ * registers (as a vector of uint8, uint16 or uint32), to provide printing
14
+ * functions.
15
+ *
16
+ * The types are templatized on SIMDLevel. Each platform header provides
17
+ * explicit specializations for the appropriate level. Code without explicit
18
+ * SL context uses SINGLE_SIMD_LEVEL (see simd_levels.h).
19
+ */
20
+
21
+ #include <faiss/utils/simd_levels.h>
22
+
23
+ namespace faiss {
24
+
25
+ // 256-bit primary templates
26
+ template <SIMDLevel SL>
27
+ struct simd256bit_tpl {};
28
+ template <SIMDLevel SL>
29
+ struct simd16uint16_tpl : simd256bit_tpl<SL> {};
30
+ template <SIMDLevel SL>
31
+ struct simd32uint8_tpl : simd256bit_tpl<SL> {};
32
+ template <SIMDLevel SL>
33
+ struct simd8uint32_tpl : simd256bit_tpl<SL> {};
34
+ template <SIMDLevel SL>
35
+ struct simd8float32_tpl : simd256bit_tpl<SL> {};
36
+
37
+ // 512-bit primary templates
38
+ template <SIMDLevel SL>
39
+ struct simd512bit_tpl {};
40
+ template <SIMDLevel SL>
41
+ struct simd32uint16_tpl : simd512bit_tpl<SL> {};
42
+ template <SIMDLevel SL>
43
+ struct simd64uint8_tpl : simd512bit_tpl<SL> {};
44
+ template <SIMDLevel SL>
45
+ struct simd16float32_tpl : simd512bit_tpl<SL> {};
46
+
47
+ } // namespace faiss
48
+
49
+ // NONE specialization — always included.
50
+ // Provides simd16uint16_tpl<NONE> etc. (scalar fallback).
51
+ // On PPC64: uses PPC-optimized scalar code (hand-tuned loop unrolling).
52
+ // Elsewhere: generic scalar implementation.
53
+ #if defined(__PPC64__)
54
+ #include <faiss/impl/simdlib/simdlib_ppc64.h>
55
+ #else
56
+ #include <faiss/impl/simdlib/simdlib_emulated.h>
57
+ #endif