faiss 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
  84. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  85. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  86. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  87. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  88. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  89. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  90. data/vendor/faiss/faiss/MetricType.h +14 -7
  91. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  92. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  93. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  94. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  95. data/vendor/faiss/faiss/build.cpp +23 -0
  96. data/vendor/faiss/faiss/build.h +15 -0
  97. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  98. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  101. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  102. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  105. data/vendor/faiss/faiss/factory_tools.cpp +9 -0
  106. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  107. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  108. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
  109. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  113. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  114. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  115. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  116. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  117. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  120. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  130. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  136. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  139. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  140. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  141. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  142. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  143. data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
  144. data/vendor/faiss/faiss/impl/HNSW.h +61 -44
  145. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  146. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  147. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  148. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  149. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  150. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  151. data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
  152. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  153. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  154. data/vendor/faiss/faiss/impl/Panorama.h +269 -87
  155. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  156. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  157. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  158. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  159. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  160. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  161. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
  162. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  163. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  164. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  165. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
  166. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  167. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  168. data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
  169. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
  170. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
  171. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  172. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  173. data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
  174. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  175. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  176. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  177. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  178. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  182. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  183. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  184. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  185. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  191. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  192. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  193. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  194. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  196. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  197. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
  198. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  199. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  203. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  204. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  205. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  206. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  208. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  209. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  210. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  211. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
  212. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
  213. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
  214. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
  215. data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
  216. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  217. data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
  218. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  219. data/vendor/faiss/faiss/impl/io_macros.h +58 -16
  220. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  221. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  222. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  223. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
  225. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  226. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  228. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  229. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
  230. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  233. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  234. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
  235. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
  237. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
  238. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  239. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
  240. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  241. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
  244. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
  245. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  256. data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
  257. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  258. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  260. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  261. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  262. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  264. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  265. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  266. data/vendor/faiss/faiss/index_factory.cpp +90 -18
  267. data/vendor/faiss/faiss/index_io.h +40 -0
  268. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  269. data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
  270. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  271. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
  272. data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
  273. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  274. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  275. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  276. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  277. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  278. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  279. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  280. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
  285. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
  286. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  287. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
  290. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  291. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  292. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  293. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  294. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  295. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  296. data/vendor/faiss/faiss/utils/distances.h +20 -1
  297. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  298. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  299. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  300. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  301. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  302. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  304. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
  305. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  306. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  307. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  308. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  309. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  310. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  311. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
  312. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  355. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
  357. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  358. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  359. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  360. data/vendor/faiss/faiss/utils/utils.h +3 -3
  361. metadata +129 -34
  362. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  363. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  364. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  366. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  367. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  368. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  369. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  370. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  371. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  373. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  374. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  375. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  376. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  377. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  378. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -22,6 +22,190 @@ static float sqr(float x) {
22
22
  return x * x;
23
23
  }
24
24
 
25
+ constexpr size_t kTurboQuantMaxBits = 8;
26
+ // TurboQuant builds a 1-D optimal scalar quantizer analytically. We approximate
27
+ // the target density on a uniform grid over [-1, 1]; the grid is kept dense
28
+ // enough both in absolute terms and per output centroid.
29
+ constexpr size_t kTurboQuantGridMin = 1 << 15;
30
+ constexpr size_t kTurboQuantGridPerCentroid = 512;
31
+ constexpr int kTurboQuantMaxIter = 100;
32
+ constexpr double kTurboQuantTol = 1e-8;
33
+
34
+ void build_TurboQuantMSECodebook(
35
+ size_t d,
36
+ size_t nbits,
37
+ std::vector<float>& centroids,
38
+ std::vector<float>& boundaries) {
39
+ FAISS_THROW_IF_NOT_FMT(
40
+ nbits <= kTurboQuantMaxBits,
41
+ "invalid TurboQuant nbits %zu (must be in [0, %zu])",
42
+ nbits,
43
+ kTurboQuantMaxBits);
44
+
45
+ if (nbits == 0) {
46
+ centroids.clear();
47
+ boundaries.clear();
48
+ return;
49
+ }
50
+
51
+ const size_t k = size_t(1) << nbits;
52
+
53
+ if (d == 1) {
54
+ // In 1-D, a unit vector can only be -1 or +1, so the marginal
55
+ // distribution collapses to two atoms. The TurboQuant codebook is
56
+ // therefore a repeated pair of endpoint centroids.
57
+ centroids.resize(k);
58
+ for (size_t i = 0; i < k; i++) {
59
+ centroids[i] = i < k / 2 ? -1.0f : 1.0f;
60
+ }
61
+ boundaries.resize(k - 1);
62
+ for (size_t i = 0; i + 1 < k; i++) {
63
+ boundaries[i] = 0.5f * (centroids[i] + centroids[i + 1]);
64
+ }
65
+ return;
66
+ }
67
+
68
+ // For d > 1, TurboQuant uses the marginal distribution of one coordinate of
69
+ // a random unit vector in R^d. On [-1, 1], this density is proportional to
70
+ // (1 - x^2)^((d - 3) / 2), which is a symmetric beta-law after a change of
71
+ // variables. The code below discretizes that density.
72
+ const size_t ngrid =
73
+ std::max(kTurboQuantGridMin, k * kTurboQuantGridPerCentroid);
74
+ const double step = 2.0 / ngrid;
75
+ const double alpha = 0.5 * (double(d) - 3.0);
76
+
77
+ std::vector<double> xs(ngrid);
78
+ // prefix_w stores the cumulative mass of the discretized density and
79
+ // prefix_wx stores its cumulative first moment, so interval means can be
80
+ // recovered in O(1).
81
+ std::vector<double> prefix_w(ngrid + 1, 0.0);
82
+ std::vector<double> prefix_wx(ngrid + 1, 0.0);
83
+
84
+ for (size_t i = 0; i < ngrid; i++) {
85
+ const double x = -1.0 + (i + 0.5) * step;
86
+ const double one_minus_x2 = std::max(0.0, 1.0 - x * x);
87
+ double w;
88
+ if (alpha == 0.0) { // when d == 3
89
+ w = 1.0;
90
+ } else {
91
+ // (1-x^2)^((d-3)/2)
92
+ w = std::pow(one_minus_x2, alpha);
93
+ }
94
+ if (!std::isfinite(w) || w < 0.0) {
95
+ w = 0.0;
96
+ }
97
+ xs[i] = x;
98
+ prefix_w[i + 1] = prefix_w[i] + w;
99
+ prefix_wx[i + 1] = prefix_wx[i] + w * x;
100
+ }
101
+
102
+ auto range_mean = [&](size_t i0, size_t i1, double fallback) {
103
+ const double w = prefix_w[i1] - prefix_w[i0];
104
+ if (w <= 0.0) {
105
+ return fallback;
106
+ }
107
+ return (prefix_wx[i1] - prefix_wx[i0]) / w;
108
+ };
109
+
110
+ const double total_w = prefix_w.back();
111
+ std::vector<size_t> cuts(k + 1, 0);
112
+ cuts[k] = ngrid;
113
+
114
+ // Initialize with k equal-mass cells under the target density. This gives
115
+ // a stable starting point before the Lloyd refinements below.
116
+ for (size_t i = 1; i < k; i++) {
117
+ const double target = total_w * i / k;
118
+ cuts[i] = std::lower_bound(prefix_w.begin(), prefix_w.end(), target) -
119
+ prefix_w.begin();
120
+ cuts[i] = std::min(cuts[i], ngrid);
121
+ }
122
+
123
+ std::vector<double> centroids_d(k);
124
+ for (size_t i = 0; i < k; i++) {
125
+ const double left = -1.0 + 2.0 * i / k;
126
+ const double right = -1.0 + 2.0 * (i + 1) / k;
127
+ // First estimate of each centroid: the conditional mean of its initial
128
+ // equal-mass cell, with a uniform-cell midpoint as a fallback.
129
+ centroids_d[i] = range_mean(cuts[i], cuts[i + 1], 0.5 * (left + right));
130
+ }
131
+
132
+ std::vector<double> boundaries_d(k > 0 ? k - 1 : 0);
133
+
134
+ // Refine the 1-D codebook with a weighted Lloyd iteration over the
135
+ // discretized marginal density on [-1, 1]:
136
+ // 1. boundaries_d are the Voronoi separators implied by neighboring
137
+ // centroids.
138
+ // 2. cuts map each boundary interval back to a contiguous range of the
139
+ // integration grid xs[].
140
+ // 3. each centroid becomes the weighted mean of the samples currently in
141
+ // its cell, clipped to stay within its neighboring boundaries.
142
+ //
143
+ // The loop stops once the largest centroid update is below kTurboQuantTol.
144
+ for (int iter = 0; iter < kTurboQuantMaxIter; iter++) {
145
+ // Midpoints between adjacent centroids define the current Voronoi
146
+ // partition of [-1, 1].
147
+ for (size_t i = 0; i + 1 < k; i++) {
148
+ boundaries_d[i] = 0.5 * (centroids_d[i] + centroids_d[i + 1]);
149
+ }
150
+
151
+ cuts[0] = 0;
152
+ cuts[k] = ngrid;
153
+ // Reassign the discretized density samples to the Voronoi cell induced
154
+ // by each boundary. Because xs is sorted, the reassignment reduces to
155
+ // finding the first grid point strictly greater than each boundary.
156
+ for (size_t i = 1; i < k; i++) {
157
+ cuts[i] = std::upper_bound(
158
+ xs.begin(), xs.end(), boundaries_d[i - 1]) -
159
+ xs.begin();
160
+ }
161
+
162
+ double max_delta = 0.0;
163
+ for (size_t i = 0; i < k; i++) {
164
+ const double left = i == 0 ? -1.0 : boundaries_d[i - 1];
165
+ const double right = i + 1 == k ? 1.0 : boundaries_d[i];
166
+ // Lloyd update: replace the centroid with the weighted average of
167
+ // the mass assigned to its cell. Empty cells fall back to the cell
168
+ // midpoint, and we clamp to [left, right] to preserve ordering.
169
+ double c = range_mean(cuts[i], cuts[i + 1], 0.5 * (left + right));
170
+ c = std::min(std::max(c, left), right);
171
+ max_delta = std::max(max_delta, std::abs(c - centroids_d[i]));
172
+ centroids_d[i] = c;
173
+ }
174
+
175
+ if (max_delta < kTurboQuantTol) {
176
+ break;
177
+ }
178
+ }
179
+
180
+ std::sort(centroids_d.begin(), centroids_d.end());
181
+
182
+ centroids.resize(k);
183
+ boundaries.resize(k - 1);
184
+ for (size_t i = 0; i < k; i++) {
185
+ centroids[i] = centroids_d[i];
186
+ }
187
+ for (size_t i = 0; i + 1 < k; i++) {
188
+ boundaries[i] = 0.5f * (centroids[i] + centroids[i + 1]);
189
+ }
190
+ }
191
+
192
+ void train_TurboQuantMSE(size_t d, size_t nbits, std::vector<float>& trained) {
193
+ FAISS_THROW_IF_NOT_FMT(
194
+ nbits > 0, "invalid TurboQuant SQ nbits %zu (must be > 0)", nbits);
195
+ std::vector<float> centroids;
196
+ std::vector<float> boundaries;
197
+ build_TurboQuantMSECodebook(d, nbits, centroids, boundaries);
198
+ const size_t k = centroids.size();
199
+
200
+ trained.resize(k + (k - 1));
201
+ for (size_t i = 0; i < k; i++) {
202
+ trained[i] = centroids[i];
203
+ }
204
+ for (size_t i = 0; i + 1 < k; i++) {
205
+ trained[k + i] = boundaries[i];
206
+ }
207
+ }
208
+
25
209
  void train_Uniform(
26
210
  RangeStat rs,
27
211
  float rs_arg,
@@ -37,7 +221,7 @@ void train_Uniform(
37
221
  if (rs == ScalarQuantizer::RS_minmax) {
38
222
  vmin = HUGE_VAL;
39
223
  vmax = -HUGE_VAL;
40
- for (size_t i = 0; i < n; i++) {
224
+ for (idx_t i = 0; i < n; i++) {
41
225
  if (x[i] < vmin) {
42
226
  vmin = x[i];
43
227
  }
@@ -50,7 +234,7 @@ void train_Uniform(
50
234
  vmax += vexp;
51
235
  } else if (rs == ScalarQuantizer::RS_meanstd) {
52
236
  double sum = 0, sum2 = 0;
53
- for (size_t i = 0; i < n; i++) {
237
+ for (idx_t i = 0; i < n; i++) {
54
238
  sum += x[i];
55
239
  sum2 += x[i] * x[i];
56
240
  }
@@ -81,7 +265,7 @@ void train_Uniform(
81
265
  float sx = 0;
82
266
  {
83
267
  vmin = HUGE_VAL, vmax = -HUGE_VAL;
84
- for (size_t i = 0; i < n; i++) {
268
+ for (idx_t i = 0; i < n; i++) {
85
269
  if (x[i] < vmin) {
86
270
  vmin = x[i];
87
271
  }
@@ -161,9 +345,9 @@ void train_NonUniform(
161
345
  if (rs == ScalarQuantizer::RS_minmax) {
162
346
  memcpy(vmin, x, sizeof(*x) * d);
163
347
  memcpy(vmax, x, sizeof(*x) * d);
164
- for (size_t i = 1; i < n; i++) {
348
+ for (idx_t i = 1; i < n; i++) {
165
349
  const float* xi = x + i * d;
166
- for (size_t j = 0; j < d; j++) {
350
+ for (int j = 0; j < d; j++) {
167
351
  if (xi[j] < vmin[j]) {
168
352
  vmin[j] = xi[j];
169
353
  }
@@ -173,7 +357,7 @@ void train_NonUniform(
173
357
  }
174
358
  }
175
359
  float* vdiff = vmax;
176
- for (size_t j = 0; j < d; j++) {
360
+ for (int j = 0; j < d; j++) {
177
361
  float vexp = (vmax[j] - vmin[j]) * rs_arg;
178
362
  vmin[j] -= vexp;
179
363
  vmax[j] += vexp;
@@ -182,9 +366,9 @@ void train_NonUniform(
182
366
  } else {
183
367
  // transpose
184
368
  std::vector<float> xt(n * d);
185
- for (size_t i = 1; i < n; i++) {
369
+ for (idx_t i = 1; i < n; i++) {
186
370
  const float* xi = x + i * d;
187
- for (size_t j = 0; j < d; j++) {
371
+ for (int j = 0; j < d; j++) {
188
372
  xt[j * n + i] = xi[j];
189
373
  }
190
374
  }
@@ -37,6 +37,18 @@ void train_NonUniform(
37
37
  int k,
38
38
  const float* x,
39
39
  std::vector<float>& trained);
40
+
41
+ /** Build the TurboQuant MSE codebook using the beta-distribution-optimal
42
+ * quantizer from the TurboQuant paper. The codebook is analytical
43
+ * (depends only on d and nbits, no training data needed).
44
+ *
45
+ * @param d vector dimensionality (used for beta-distribution shape)
46
+ * @param nbits bits per component (1-8)
47
+ * @param trained output: [centroids (k floats), boundaries (k-1 floats)]
48
+ * where k = 2^nbits
49
+ */
50
+ void train_TurboQuantMSE(size_t d, size_t nbits, std::vector<float>& trained);
51
+
40
52
  } // namespace scalar_quantizer
41
53
 
42
54
  } // namespace faiss
@@ -23,86 +23,145 @@
23
23
 
24
24
  namespace faiss {
25
25
 
26
- /*********************** x86 SIMD dispatch cases */
26
+ /** Defining which SIMD levels are available for a given function is via a
27
+ * binary mask. Here we predefine the most common masks.
28
+ * */
29
+
30
+ constexpr int AVAILABLE_SIMD_LEVELS_NONE = (1 << int(SIMDLevel::NONE));
31
+
32
+ constexpr int AVAILABLE_SIMD_LEVELS_AVX2_NEON = AVAILABLE_SIMD_LEVELS_NONE |
33
+ (1 << int(SIMDLevel::AVX2)) | (1 << int(SIMDLevel::ARM_NEON));
34
+
35
+ // A0: same + AVX512 + RISCV_RVV
36
+ constexpr int AVAILABLE_SIMD_LEVELS_A0 = AVAILABLE_SIMD_LEVELS_AVX2_NEON |
37
+ (1 << int(SIMDLevel::AVX512)) | (1 << int(SIMDLevel::RISCV_RVV));
38
+
39
+ // A0_SPR: same as A0 + AVX512_SPR (for functions with a dedicated SPR
40
+ // specialization on top of an AVX512 fallback). Currently used by the
41
+ // RaBitQ popcount kernels, which use VPOPCNTDQ on SPR+.
42
+ constexpr int AVAILABLE_SIMD_LEVELS_A0_SPR =
43
+ AVAILABLE_SIMD_LEVELS_A0 | (1 << int(SIMDLevel::AVX512_SPR));
44
+
45
+ // A1: same + ARM_SVE (for functions with dedicated SVE implementations)
46
+ constexpr int AVAILABLE_SIMD_LEVELS_A1 =
47
+ AVAILABLE_SIMD_LEVELS_A0 | (1 << int(SIMDLevel::ARM_SVE));
48
+
49
+ // A2: NONE + AVX2 + ARM_SVE only (for functions with only these
50
+ // implementations)
51
+ constexpr int AVAILABLE_SIMD_LEVELS_A2 = AVAILABLE_SIMD_LEVELS_NONE |
52
+ (1 << int(SIMDLevel::AVX2)) | (1 << int(SIMDLevel::ARM_SVE));
53
+
54
+ constexpr int AVAILABLE_SIMD_LEVELS_ALL = -1;
55
+
56
+ constexpr SIMDLevel get_simd_fallback(SIMDLevel level) {
57
+ switch (level) {
58
+ case SIMDLevel::AVX512_SPR:
59
+ return SIMDLevel::AVX512;
60
+ case SIMDLevel::AVX512:
61
+ return SIMDLevel::AVX2;
62
+ case SIMDLevel::ARM_SVE:
63
+ return SIMDLevel::ARM_NEON;
64
+ case SIMDLevel::AVX2:
65
+ case SIMDLevel::ARM_NEON:
66
+ case SIMDLevel::RISCV_RVV:
67
+ return SIMDLevel::NONE;
68
+ default:
69
+ return SIMDLevel::NONE;
70
+ }
71
+ }
27
72
 
28
- #ifdef COMPILE_SIMD_AVX2
29
- #define DISPATCH_SIMDLevel_AVX2(f, ...) \
30
- case SIMDLevel::AVX2: \
31
- return f<SIMDLevel::AVX2>(__VA_ARGS__)
32
- #else
33
- #define DISPATCH_SIMDLevel_AVX2(f, ...)
34
- #endif
73
+ template <int available_levels, SIMDLevel current_level, typename LambdaType>
74
+ inline auto dispatch_with_fallback(LambdaType&& action) {
75
+ if constexpr (available_levels & (1 << int(current_level))) {
76
+ return action.template operator()<current_level>();
77
+ } else if constexpr (current_level != SIMDLevel::NONE) {
78
+ return dispatch_with_fallback<
79
+ available_levels,
80
+ get_simd_fallback(current_level)>(
81
+ std::forward<LambdaType>(action));
82
+ } else {
83
+ return action.template operator()<SIMDLevel::NONE>();
84
+ }
85
+ }
35
86
 
36
- #ifdef COMPILE_SIMD_AVX512
37
- #define DISPATCH_SIMDLevel_AVX512(f, ...) \
38
- case SIMDLevel::AVX512: \
39
- return f<SIMDLevel::AVX512>(__VA_ARGS__)
40
- #else
41
- #define DISPATCH_SIMDLevel_AVX512(f, ...)
42
- #endif
87
+ /** The complete dispatching function. It takes into account:
88
+ * - the currently selected SIMD level
89
+ * - the compiled in SIMD levels (given by COMPILE_SIMD_XXX)
90
+ * - the available SIMD implementations for that particular function (given by
91
+ * available_levels)
92
+ */
93
+
94
+ template <int available_levels, typename LambdaType>
95
+ inline auto with_selected_simd_levels(LambdaType&& action) {
96
+ #ifdef FAISS_ENABLE_DD
97
+ switch (SIMDConfig::level) {
98
+ // For x86 -- try from highest to lowest level
43
99
 
44
100
  #ifdef COMPILE_SIMD_AVX512_SPR
45
- #define DISPATCH_SIMDLevel_AVX512_SPR(f, ...) \
46
- case SIMDLevel::AVX512_SPR: \
47
- return f<SIMDLevel::AVX512_SPR>(__VA_ARGS__)
48
- #else
49
- #define DISPATCH_SIMDLevel_AVX512_SPR(f, ...)
101
+ case SIMDLevel::AVX512_SPR:
102
+ if constexpr (
103
+ available_levels & (1 << int(SIMDLevel::AVX512_SPR))) {
104
+ return action.template operator()<SIMDLevel::AVX512_SPR>();
105
+ }
106
+ [[fallthrough]];
50
107
  #endif
51
108
 
52
- /*********************** ARM SIMD dispatch cases */
109
+ #ifdef COMPILE_SIMD_AVX512
110
+ case SIMDLevel::AVX512:
111
+ if constexpr (available_levels & (1 << int(SIMDLevel::AVX512))) {
112
+ return action.template operator()<SIMDLevel::AVX512>();
113
+ }
114
+ [[fallthrough]];
115
+ #endif
53
116
 
54
- #ifdef COMPILE_SIMD_ARM_NEON
55
- #define DISPATCH_SIMDLevel_ARM_NEON(f, ...) \
56
- case SIMDLevel::ARM_NEON: \
57
- return f<SIMDLevel::ARM_NEON>(__VA_ARGS__)
58
- #else
59
- #define DISPATCH_SIMDLevel_ARM_NEON(f, ...)
117
+ #ifdef COMPILE_SIMD_AVX2
118
+ case SIMDLevel::AVX2:
119
+ if constexpr (available_levels & (1 << int(SIMDLevel::AVX2))) {
120
+ return action.template operator()<SIMDLevel::AVX2>();
121
+ }
122
+ [[fallthrough]];
60
123
  #endif
61
124
 
125
+ // For ARM, try from highest to lowest level
62
126
  #ifdef COMPILE_SIMD_ARM_SVE
63
- #define DISPATCH_SIMDLevel_ARM_SVE(f, ...) \
64
- case SIMDLevel::ARM_SVE: \
65
- return f<SIMDLevel::ARM_SVE>(__VA_ARGS__)
66
- #else
67
- #define DISPATCH_SIMDLevel_ARM_SVE(f, ...)
127
+ case SIMDLevel::ARM_SVE:
128
+ if constexpr (available_levels & (1 << int(SIMDLevel::ARM_SVE))) {
129
+ return action.template operator()<SIMDLevel::ARM_SVE>();
130
+ }
131
+ [[fallthrough]];
68
132
  #endif
69
133
 
70
- /*********************** Main dispatch macro */
71
-
72
- #ifdef FAISS_ENABLE_DD
134
+ #ifdef COMPILE_SIMD_ARM_NEON
135
+ case SIMDLevel::ARM_NEON:
136
+ if constexpr (available_levels & (1 << int(SIMDLevel::ARM_NEON))) {
137
+ return action.template operator()<SIMDLevel::ARM_NEON>();
138
+ }
139
+ [[fallthrough]];
140
+ #endif
73
141
 
74
- // DD mode: runtime dispatch based on SIMDConfig::level
75
- #define DISPATCH_SIMDLevel(f, ...) \
76
- switch (SIMDConfig::level) { \
77
- case SIMDLevel::NONE: \
78
- return f<SIMDLevel::NONE>(__VA_ARGS__); \
79
- DISPATCH_SIMDLevel_AVX2(f, __VA_ARGS__); \
80
- DISPATCH_SIMDLevel_AVX512(f, __VA_ARGS__); \
81
- DISPATCH_SIMDLevel_AVX512_SPR(f, __VA_ARGS__); \
82
- DISPATCH_SIMDLevel_ARM_NEON(f, __VA_ARGS__); \
83
- DISPATCH_SIMDLevel_ARM_SVE(f, __VA_ARGS__); \
84
- default: \
85
- FAISS_THROW_MSG("Invalid SIMD level"); \
142
+ #ifdef COMPILE_SIMD_RISCV_RVV
143
+ case SIMDLevel::RISCV_RVV:
144
+ if constexpr (available_levels & (1 << int(SIMDLevel::RISCV_RVV))) {
145
+ return action.template operator()<SIMDLevel::RISCV_RVV>();
146
+ }
147
+ [[fallthrough]];
148
+ #endif
149
+ default:
150
+ return action.template operator()<SIMDLevel::NONE>();
86
151
  }
87
-
88
- #else // Static mode
89
-
90
- // Static mode: direct call to compiled-in SIMD level (no runtime switch)
91
- #if defined(COMPILE_SIMD_AVX512_SPR)
92
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::AVX512_SPR>(__VA_ARGS__)
93
- #elif defined(COMPILE_SIMD_AVX512)
94
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::AVX512>(__VA_ARGS__)
95
- #elif defined(COMPILE_SIMD_AVX2)
96
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::AVX2>(__VA_ARGS__)
97
- #elif defined(COMPILE_SIMD_ARM_SVE)
98
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::ARM_SVE>(__VA_ARGS__)
99
- #elif defined(COMPILE_SIMD_ARM_NEON)
100
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::ARM_NEON>(__VA_ARGS__)
101
- #else
102
- #define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::NONE>(__VA_ARGS__)
152
+ #else // static dispatch
153
+ // In static mode, SINGLE_SIMD_LEVEL is a constexpr resolved at compile
154
+ // time. We mirror the DD fallthrough behavior at compile time via
155
+ // dispatch_with_fallback, which recursively walks get_simd_fallback:
156
+ // x86: AVX512_SPR -> AVX512 -> AVX2 -> NONE
157
+ // ARM: ARM_SVE -> ARM_NEON -> NONE
158
+ // RISCV: RISCV_RVV -> NONE
159
+ // The first level in the chain that appears in available_levels is
160
+ // selected; if none match, NONE is used unconditionally.
161
+ return dispatch_with_fallback<available_levels, SINGLE_SIMD_LEVEL>(
162
+ std::forward<LambdaType>(action));
103
163
  #endif
104
-
105
- #endif // FAISS_ENABLE_DD
164
+ }
106
165
 
107
166
  /**
108
167
  * Dispatch to a lambda with SIMDLevel as a compile-time constant.
@@ -126,6 +185,8 @@ namespace faiss {
126
185
  * });
127
186
  *
128
187
  * The lambda must be a generic lambda with a SIMDLevel template parameter.
188
+ * By default, the lambda uses levels AVX2 + AVX512 + NEON + RVV, since these
189
+ * are the most common cases.
129
190
  *
130
191
  * @param action A generic lambda with signature `template<SIMDLevel> T
131
192
  * operator()()`
@@ -133,7 +194,37 @@ namespace faiss {
133
194
  */
134
195
  template <typename LambdaType>
135
196
  inline auto with_simd_level(LambdaType&& action) {
136
- DISPATCH_SIMDLevel(action.template operator());
197
+ return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A0>(
198
+ std::forward<LambdaType>(action));
199
+ }
200
+
201
+ /**
202
+ * Use for functions with AVX512_SPR-specific implementations.
203
+ */
204
+ template <typename LambdaType>
205
+ inline auto with_simd_level_spr(LambdaType&& action) {
206
+ return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A0_SPR>(
207
+ std::forward<LambdaType>(action));
208
+ }
209
+
210
+ /**
211
+ * Use for functions implemented with simdXintY (256-bit) operations
212
+ * that don't have dedicated AVX512 or SVE implementations.
213
+ */
214
+ template <typename LambdaType>
215
+ inline auto with_simd_level_256bit(LambdaType&& action) {
216
+ return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_AVX2_NEON>(
217
+ std::forward<LambdaType>(action));
218
+ }
219
+
220
+ /**
221
+ * Use for functions that have A0-level implementations plus an AVX512_SPR
222
+ * specialization (e.g. using VPOPCNTDQ).
223
+ */
224
+ template <typename LambdaType>
225
+ inline auto with_simd_level_a0_spr(LambdaType&& action) {
226
+ return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A0_SPR>(
227
+ std::forward<LambdaType>(action));
137
228
  }
138
229
 
139
230
  } // namespace faiss
@@ -0,0 +1,57 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ /** Abstractions for 256-bit and 512-bit SIMD registers.
11
+ *
12
+ * The objective is to separate the different interpretations of the same
13
+ * registers (as a vector of uint8, uint16 or uint32), to provide printing
14
+ * functions.
15
+ *
16
+ * The types are templatized on SIMDLevel. Each platform header provides
17
+ * explicit specializations for the appropriate level. Code without explicit
18
+ * SL context uses SINGLE_SIMD_LEVEL (see simd_levels.h).
19
+ */
20
+
21
+ #include <faiss/utils/simd_levels.h>
22
+
23
+ namespace faiss {
24
+
25
+ // 256-bit primary templates
26
+ template <SIMDLevel SL>
27
+ struct simd256bit_tpl {};
28
+ template <SIMDLevel SL>
29
+ struct simd16uint16_tpl : simd256bit_tpl<SL> {};
30
+ template <SIMDLevel SL>
31
+ struct simd32uint8_tpl : simd256bit_tpl<SL> {};
32
+ template <SIMDLevel SL>
33
+ struct simd8uint32_tpl : simd256bit_tpl<SL> {};
34
+ template <SIMDLevel SL>
35
+ struct simd8float32_tpl : simd256bit_tpl<SL> {};
36
+
37
+ // 512-bit primary templates
38
+ template <SIMDLevel SL>
39
+ struct simd512bit_tpl {};
40
+ template <SIMDLevel SL>
41
+ struct simd32uint16_tpl : simd512bit_tpl<SL> {};
42
+ template <SIMDLevel SL>
43
+ struct simd64uint8_tpl : simd512bit_tpl<SL> {};
44
+ template <SIMDLevel SL>
45
+ struct simd16float32_tpl : simd512bit_tpl<SL> {};
46
+
47
+ } // namespace faiss
48
+
49
+ // NONE specialization — always included.
50
+ // Provides simd16uint16_tpl<NONE> etc. (scalar fallback).
51
+ // On PPC64: uses PPC-optimized scalar code (hand-tuned loop unrolling).
52
+ // Elsewhere: generic scalar implementation.
53
+ #if defined(__PPC64__)
54
+ #include <faiss/impl/simdlib/simdlib_ppc64.h>
55
+ #else
56
+ #include <faiss/impl/simdlib/simdlib_emulated.h>
57
+ #endif