faiss 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
  84. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  85. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  86. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  87. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  88. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  89. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  90. data/vendor/faiss/faiss/MetricType.h +14 -7
  91. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  92. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  93. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  94. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  95. data/vendor/faiss/faiss/build.cpp +23 -0
  96. data/vendor/faiss/faiss/build.h +15 -0
  97. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  98. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  101. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  102. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  105. data/vendor/faiss/faiss/factory_tools.cpp +9 -0
  106. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  107. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  108. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
  109. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  113. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  114. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  115. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  116. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  117. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  120. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  130. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  136. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  139. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  140. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  141. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  142. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  143. data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
  144. data/vendor/faiss/faiss/impl/HNSW.h +61 -44
  145. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  146. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  147. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  148. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  149. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  150. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  151. data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
  152. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  153. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  154. data/vendor/faiss/faiss/impl/Panorama.h +269 -87
  155. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  156. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  157. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  158. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  159. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  160. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  161. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
  162. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  163. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  164. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  165. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
  166. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  167. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  168. data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
  169. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
  170. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
  171. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  172. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  173. data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
  174. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  175. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  176. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  177. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  178. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  182. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  183. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  184. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  185. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  191. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  192. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  193. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  194. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  196. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  197. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
  198. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  199. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  203. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  204. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  205. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  206. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  208. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  209. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  210. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  211. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
  212. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
  213. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
  214. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
  215. data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
  216. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  217. data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
  218. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  219. data/vendor/faiss/faiss/impl/io_macros.h +58 -16
  220. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  221. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  222. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  223. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
  225. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  226. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  228. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  229. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
  230. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  233. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  234. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
  235. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
  237. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
  238. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  239. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
  240. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  241. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
  244. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
  245. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  256. data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
  257. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  258. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  260. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  261. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  262. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  264. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  265. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  266. data/vendor/faiss/faiss/index_factory.cpp +90 -18
  267. data/vendor/faiss/faiss/index_io.h +40 -0
  268. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  269. data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
  270. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  271. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
  272. data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
  273. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  274. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  275. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  276. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  277. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  278. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  279. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  280. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
  285. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
  286. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  287. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
  290. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  291. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  292. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  293. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  294. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  295. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  296. data/vendor/faiss/faiss/utils/distances.h +20 -1
  297. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  298. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  299. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  300. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  301. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  302. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  304. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
  305. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  306. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  307. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  308. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  309. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  310. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  311. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
  312. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  355. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
  357. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  358. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  359. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  360. data/vendor/faiss/faiss/utils/utils.h +3 -3
  361. metadata +129 -34
  362. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  363. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  364. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  366. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  367. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  368. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  369. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  370. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  371. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  373. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  374. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  375. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  376. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  377. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  378. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -0,0 +1,54 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/impl/hnsw/LockVector.h>
9
+
10
+ #include <cstdlib>
11
+
12
+ #include <utility>
13
+
14
+ namespace faiss {
15
+
16
+ LockVector::LockVector(LockVector&& other) noexcept
17
+ : data_(other.data_), size_(other.size_), capacity_(other.capacity_) {
18
+ other.data_ = nullptr;
19
+ other.size_ = 0;
20
+ other.capacity_ = 0;
21
+ }
22
+
23
+ void LockVector::prepare(size_t new_size) {
24
+ if (new_size <= size_) {
25
+ return;
26
+ }
27
+ if (new_size > capacity_) {
28
+ // Ensure geometric capacity growth.
29
+ size_t new_cap = std::max(new_size, capacity_ * 2);
30
+ // Just destroy old and init fresh; omp_lock_t is not copyable.
31
+ clear();
32
+ data_ = static_cast<omp_lock_t*>(malloc(new_cap * sizeof(omp_lock_t)));
33
+ FAISS_THROW_IF_NOT(data_ != nullptr);
34
+ capacity_ = new_cap;
35
+ }
36
+ for (size_t i = size_; i < new_size; i++) {
37
+ omp_init_lock(&data_[i]);
38
+ }
39
+ size_ = new_size;
40
+ }
41
+
42
+ void LockVector::clear() {
43
+ if (data_) {
44
+ for (size_t i = 0; i < size_; i++) {
45
+ omp_destroy_lock(&data_[i]);
46
+ }
47
+ free(data_);
48
+ data_ = nullptr;
49
+ }
50
+ size_ = 0;
51
+ capacity_ = 0;
52
+ }
53
+
54
+ } // namespace faiss
@@ -0,0 +1,64 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <omp.h>
11
+
12
+ #include <faiss/impl/FaissAssert.h>
13
+
14
+ namespace faiss {
15
+
16
+ /// Contiguous, growable array of locks with geometric growth.
17
+ class LockVector {
18
+ public:
19
+ LockVector() = default;
20
+ explicit LockVector(size_t n) {
21
+ prepare(n);
22
+ }
23
+ // Copy ctor for clone(), initialized as empty.
24
+ LockVector(const LockVector&) : LockVector() {}
25
+ LockVector(LockVector&& other) noexcept;
26
+
27
+ ~LockVector() {
28
+ clear();
29
+ }
30
+
31
+ LockVector& operator=(const LockVector&) = delete;
32
+ LockVector& operator=(LockVector&& other) = delete;
33
+
34
+ size_t size() const {
35
+ return size_;
36
+ }
37
+
38
+ // Ensure size is at least 'new_size'. No locks may be held.
39
+ void prepare(size_t new_size);
40
+ // Release all locks and free memory. No locks may be held.
41
+ void clear();
42
+
43
+ void lock(size_t i) {
44
+ FAISS_CHECK_RANGE_DEBUG(i, 0, size_);
45
+ omp_set_lock(&data_[i]);
46
+ }
47
+
48
+ void unlock(size_t i) {
49
+ FAISS_CHECK_RANGE_DEBUG(i, 0, size_);
50
+ omp_unset_lock(&data_[i]);
51
+ }
52
+
53
+ bool try_lock(size_t i) {
54
+ FAISS_CHECK_RANGE_DEBUG(i, 0, size_);
55
+ return omp_test_lock(&data_[i]);
56
+ }
57
+
58
+ private:
59
+ omp_lock_t* data_ = nullptr;
60
+ size_t size_ = 0;
61
+ size_t capacity_ = 0;
62
+ };
63
+
64
+ } // namespace faiss
@@ -0,0 +1,83 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/impl/hnsw/MinimaxHeap.h>
9
+
10
+ #include <faiss/impl/simd_dispatch.h>
11
+
12
+ namespace faiss {
13
+
14
+ // Runtime-dispatched pop_min (NONE + AVX2 + AVX512 only).
15
+ constexpr int MINIMAX_HEAP_SIMD_LEVELS = (1 << int(SIMDLevel::NONE)) |
16
+ (1 << int(SIMDLevel::AVX2)) | (1 << int(SIMDLevel::AVX512));
17
+
18
+ template <class HC_>
19
+ int MinimaxHeapT<HC_>::pop_min(float* vmin_out) {
20
+ return with_selected_simd_levels<MINIMAX_HEAP_SIMD_LEVELS>(
21
+ [&]<SIMDLevel SL>() {
22
+ return pop_min_tpl<HC_, SL>(this, vmin_out);
23
+ });
24
+ }
25
+
26
+ // Primary-template scalar implementation. Used directly when SL==NONE
27
+ template <class HC>
28
+ int pop_min_simd_none(MinimaxHeapT<HC>* heap, float* vmin_out) {
29
+ int k = heap->k;
30
+ int* ids = heap->ids.data();
31
+ float* dis = heap->dis.data();
32
+ assert(k > 0);
33
+ // Returns the "best" entry. This is an O(n) operation.
34
+ int i = k - 1;
35
+ while (i >= 0) {
36
+ if (ids[i] != -1) {
37
+ break;
38
+ }
39
+ i--;
40
+ }
41
+ if (i == -1) {
42
+ return -1;
43
+ }
44
+ int imin = i;
45
+ float vmin = dis[i];
46
+ i--;
47
+ while (i >= 0) {
48
+ // HC::cmp(vmin, dis[i]) → "dis[i] is better than vmin".
49
+ if (ids[i] != -1 && HC::cmp(vmin, dis[i])) {
50
+ vmin = dis[i];
51
+ imin = i;
52
+ }
53
+ i--;
54
+ }
55
+ if (vmin_out) {
56
+ *vmin_out = vmin;
57
+ }
58
+ int ret = ids[imin];
59
+ ids[imin] = -1;
60
+ --heap->nvalid;
61
+ return ret;
62
+ }
63
+
64
+ // declare for min and max heap at simd level NONE
65
+ template <>
66
+ int pop_min_tpl<CMin<float, int32_t>, SIMDLevel::NONE>(
67
+ MinimaxHeapT<CMin<float, int32_t>>* heap,
68
+ float* vmin_out) {
69
+ return pop_min_simd_none(heap, vmin_out);
70
+ }
71
+
72
+ template <>
73
+ int pop_min_tpl<CMax<float, int32_t>, SIMDLevel::NONE>(
74
+ MinimaxHeapT<CMax<float, int32_t>>* heap,
75
+ float* vmin_out) {
76
+ return pop_min_simd_none(heap, vmin_out);
77
+ }
78
+
79
+ // Explicit instantiations of pop_min for the two HC variants
80
+ template int MinimaxHeapT<CMax<float, int32_t>>::pop_min(float*);
81
+ template int MinimaxHeapT<CMin<float, int32_t>>::pop_min(float*);
82
+
83
+ } // namespace faiss
@@ -0,0 +1,113 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cassert>
11
+ #include <cmath>
12
+ #include <cstdint>
13
+ #include <vector>
14
+
15
+ #include <faiss/utils/Heap.h>
16
+ #include <faiss/utils/ordered_key_value.h>
17
+ #include <faiss/utils/simd_levels.h>
18
+
19
+ namespace faiss {
20
+
21
+ /** Heap structure that allows fast access and updates.
22
+ *
23
+ * Templated on the comparator HC_ so that the same data structure can
24
+ * service both distance-style searches (HC_ = CMax<float, int32_t>, smaller
25
+ * is better) and similarity-style searches (HC_ = CMin<float, int32_t>,
26
+ * larger is better). For the distance variant the underlying heap is a
27
+ * max-heap and "pop_min" returns the closest element; for similarity the
28
+ * underlying heap is a min-heap and "pop_min" returns the most similar
29
+ * element.
30
+ */
31
+ template <class HC_ = CMax<float, int32_t>>
32
+ struct MinimaxHeapT {
33
+ using HC = HC_;
34
+ using storage_idx_t = int32_t;
35
+
36
+ int n;
37
+ int k;
38
+ int nvalid;
39
+
40
+ std::vector<storage_idx_t> ids;
41
+ std::vector<float> dis;
42
+
43
+ explicit MinimaxHeapT(int n_in)
44
+ : n(n_in), k(0), nvalid(0), ids(n_in), dis(n_in) {}
45
+
46
+ void push(storage_idx_t i, float v) {
47
+ // Treat NaN distances as the "worst" value so heap ordering is
48
+ // preserved (insertion is then guaranteed to fall through the
49
+ // not-better-than-top early-reject branch when the heap is full).
50
+ if (std::isnan(v)) {
51
+ v = HC::neutral();
52
+ }
53
+ if (k == n) {
54
+ // top of the heap is the "worst" entry under HC. If the new
55
+ // value is not strictly better than the worst, drop it.
56
+ // HC::cmp(top, v) means "v is better than top" for both CMax
57
+ // (cmp = a > b → top > v → v < top) and CMin (cmp = a < b →
58
+ // top < v → v > top).
59
+ if (!HC::cmp(dis[0], v)) {
60
+ return;
61
+ }
62
+ if (ids[0] != -1) {
63
+ --nvalid;
64
+ }
65
+ faiss::heap_pop<HC>(k--, dis.data(), ids.data());
66
+ }
67
+ faiss::heap_push<HC>(++k, dis.data(), ids.data(), v, i);
68
+ ++nvalid;
69
+ }
70
+
71
+ float max() const {
72
+ return dis[0];
73
+ }
74
+
75
+ int size() const {
76
+ return nvalid;
77
+ }
78
+
79
+ void clear() {
80
+ nvalid = k = 0;
81
+ }
82
+
83
+ /// Runtime-dispatched best-element extraction (NONE + AVX2 + AVX512).
84
+ int pop_min(float* vmin_out = nullptr);
85
+
86
+ int count_below(float thresh) {
87
+ int n_below = 0;
88
+ for (int i = 0; i < k; i++) {
89
+ // Count entries that are strictly "better than" thresh.
90
+ // HC::cmp(thresh, dis[i]) → for CMax: thresh > dis[i]
91
+ // (i.e., dis[i] < thresh, the historical L2 semantics);
92
+ // for CMin: thresh < dis[i] (similarity above threshold).
93
+ if (HC::cmp(thresh, dis[i])) {
94
+ n_below++;
95
+ }
96
+ }
97
+ return n_below;
98
+ }
99
+ };
100
+
101
+ // Default `MinimaxHeap` keeps the historical max-heap semantics (smaller
102
+ // distance is better). The CMin instantiation is used when the owning
103
+ // HNSW has `is_similarity = true`. The alias itself is declared once,
104
+ // alongside the forward declaration in HNSW.h, to avoid duplicate
105
+ // `using` declarations that SWIG treats as redundant.
106
+
107
+ // Forward declarations of the SIMD specializations. The actual bodies live
108
+ // in the SIMD-specific translation units (avx2.cpp, avx512.cpp) and are
109
+ // resolved at link time.
110
+ template <class HC_, SIMDLevel SL>
111
+ int pop_min_tpl(MinimaxHeapT<HC_>* heap, float* vmin_out);
112
+
113
+ } // namespace faiss
@@ -0,0 +1,150 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_AVX2
9
+
10
+ #include <faiss/impl/hnsw/MinimaxHeap.h>
11
+
12
+ #include <immintrin.h>
13
+ #include <cassert>
14
+ #include <limits>
15
+ #include <type_traits>
16
+
17
+ namespace faiss {
18
+
19
+ namespace {
20
+
21
+ /// Templated AVX2 implementation of "pop best" for both CMax (returns
22
+ /// the smallest distance) and CMin (returns the largest similarity).
23
+ /// The only differences between the two flavors are: (1) the initial
24
+ /// "worst possible" value, (2) the running-best update comparison
25
+ /// (`_CMP_LT_OS` vs `_CMP_GT_OS`), and (3) the tiebreaker direction.
26
+ template <class HC>
27
+ int pop_best_avx2(MinimaxHeapT<HC>& heap, float* vmin_out) {
28
+ using storage_idx_t = typename MinimaxHeapT<HC>::storage_idx_t;
29
+ static_assert(
30
+ std::is_same<storage_idx_t, int32_t>::value,
31
+ "This code expects storage_idx_t to be int32_t");
32
+ assert(heap.k > 0);
33
+
34
+ // For CMax (distance) the "best" candidate is the smallest value, so
35
+ // we initialize the running best to +inf. For CMin (similarity) the
36
+ // best is the largest value, so we initialize to -inf.
37
+ constexpr float worst_v = HC::is_max
38
+ ? std::numeric_limits<float>::infinity()
39
+ : -std::numeric_limits<float>::infinity();
40
+
41
+ int32_t best_idx = -1;
42
+ float best_dis = worst_v;
43
+
44
+ size_t iii = 0;
45
+
46
+ __m256i best_indices = _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, -1);
47
+ __m256 best_distances = _mm256_set1_ps(worst_v);
48
+ __m256i current_indices = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
49
+ __m256i offset = _mm256_set1_epi32(8);
50
+
51
+ // Track the rightmost index whose distance equals the running best.
52
+ // -1 index values are filtered out via m1mask.
53
+ const size_t k8 = (heap.k / 8) * 8;
54
+ for (; iii < k8; iii += 8) {
55
+ __m256i indices =
56
+ _mm256_loadu_si256((const __m256i*)(heap.ids.data() + iii));
57
+ __m256 distances = _mm256_loadu_ps(heap.dis.data() + iii);
58
+
59
+ // Mask out -1 indices (invalid entries).
60
+ __m256i m1mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), indices);
61
+
62
+ // dmask is "true where best is already (strictly) better than the
63
+ // candidate" — entries the candidate should NOT update. For CMax,
64
+ // best < candidate means we keep best (we want the smallest);
65
+ // for CMin we keep best when best > candidate (we want the largest).
66
+ __m256i dmask;
67
+ if constexpr (HC::is_max) {
68
+ dmask = _mm256_castps_si256(
69
+ _mm256_cmp_ps(best_distances, distances, _CMP_LT_OS));
70
+ } else {
71
+ dmask = _mm256_castps_si256(
72
+ _mm256_cmp_ps(best_distances, distances, _CMP_GT_OS));
73
+ }
74
+ __m256 finalmask = _mm256_castsi256_ps(_mm256_or_si256(m1mask, dmask));
75
+
76
+ const __m256i best_indices_new = _mm256_castps_si256(_mm256_blendv_ps(
77
+ _mm256_castsi256_ps(current_indices),
78
+ _mm256_castsi256_ps(best_indices),
79
+ finalmask));
80
+
81
+ const __m256 best_distances_new =
82
+ _mm256_blendv_ps(distances, best_distances, finalmask);
83
+
84
+ best_indices = best_indices_new;
85
+ best_distances = best_distances_new;
86
+
87
+ current_indices = _mm256_add_epi32(current_indices, offset);
88
+ }
89
+
90
+ // Vectorizing the horizontal reduction is doable but not practical.
91
+ int32_t vidx8[8];
92
+ float vdis8[8];
93
+ _mm256_storeu_ps(vdis8, best_distances);
94
+ _mm256_storeu_si256((__m256i*)vidx8, best_indices);
95
+
96
+ for (size_t j = 0; j < 8; j++) {
97
+ const bool strictly_better =
98
+ HC::is_max ? (best_dis > vdis8[j]) : (best_dis < vdis8[j]);
99
+ if (strictly_better || (best_dis == vdis8[j] && best_idx < vidx8[j])) {
100
+ best_idx = vidx8[j];
101
+ best_dis = vdis8[j];
102
+ }
103
+ }
104
+
105
+ // Tail (under 8 entries). Vectorizing is doable but not practical.
106
+ for (; iii < static_cast<size_t>(heap.k); iii++) {
107
+ if (heap.ids[iii] == -1) {
108
+ continue;
109
+ }
110
+ const bool weakly_better = HC::is_max ? (best_dis >= heap.dis[iii])
111
+ : (best_dis <= heap.dis[iii]);
112
+ if (weakly_better) {
113
+ best_dis = heap.dis[iii];
114
+ best_idx = iii;
115
+ }
116
+ }
117
+
118
+ if (best_idx == -1) {
119
+ return -1;
120
+ }
121
+
122
+ if (vmin_out) {
123
+ *vmin_out = best_dis;
124
+ }
125
+ int ret = heap.ids[best_idx];
126
+ heap.ids[best_idx] = -1;
127
+ --heap.nvalid;
128
+ return ret;
129
+ }
130
+
131
+ } // namespace
132
+
133
+ // Explicit specializations for AVX2
134
+ template <>
135
+ int pop_min_tpl<CMax<float, int32_t>, SIMDLevel::AVX2>(
136
+ MinimaxHeapT<CMax<float, int32_t>>* heap,
137
+ float* vmin_out) {
138
+ return pop_best_avx2<CMax<float, int32_t>>(*heap, vmin_out);
139
+ }
140
+
141
+ template <>
142
+ int pop_min_tpl<CMin<float, int32_t>, SIMDLevel::AVX2>(
143
+ MinimaxHeapT<CMin<float, int32_t>>* heap,
144
+ float* vmin_out) {
145
+ return pop_best_avx2<CMin<float, int32_t>>(*heap, vmin_out);
146
+ }
147
+
148
+ } // namespace faiss
149
+
150
+ #endif // COMPILE_SIMD_AVX2
@@ -0,0 +1,142 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_AVX512
9
+
10
+ #include <faiss/impl/hnsw/MinimaxHeap.h>
11
+
12
+ #include <immintrin.h>
13
+ #include <cassert>
14
+ #include <limits>
15
+ #include <type_traits>
16
+
17
+ namespace faiss {
18
+
19
+ namespace {
20
+
21
+ /// Templated AVX512 implementation of "pop best" for both CMax (returns
22
+ /// the smallest distance) and CMin (returns the largest similarity).
23
+ template <class HC>
24
+ int pop_best_avx512(MinimaxHeapT<HC>& heap, float* vmin_out) {
25
+ using storage_idx_t = typename MinimaxHeapT<HC>::storage_idx_t;
26
+ static_assert(
27
+ std::is_same<storage_idx_t, int32_t>::value,
28
+ "This code expects storage_idx_t to be int32_t");
29
+ assert(heap.k > 0);
30
+
31
+ constexpr float worst_v = HC::is_max
32
+ ? std::numeric_limits<float>::infinity()
33
+ : -std::numeric_limits<float>::infinity();
34
+
35
+ int32_t best_idx = -1;
36
+ float best_dis = worst_v;
37
+
38
+ __m512i best_indices = _mm512_set1_epi32(-1);
39
+ __m512 best_distances = _mm512_set1_ps(worst_v);
40
+ __m512i current_indices = _mm512_setr_epi32(
41
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42
+ __m512i offset = _mm512_set1_epi32(16);
43
+
44
+ auto best_vs_cand_mask = [](__m512 best_d, __m512 cand_d) -> __mmask16 {
45
+ // Returns the mask of lanes where the current best is already
46
+ // (strictly) better than the candidate.
47
+ if constexpr (HC::is_max) {
48
+ return _mm512_cmp_ps_mask(best_d, cand_d, _CMP_LT_OS);
49
+ } else {
50
+ return _mm512_cmp_ps_mask(best_d, cand_d, _CMP_GT_OS);
51
+ }
52
+ };
53
+
54
+ const size_t k16 = (heap.k / 16) * 16;
55
+ for (size_t iii = 0; iii < k16; iii += 16) {
56
+ __m512i indices =
57
+ _mm512_loadu_si512((const __m512i*)(heap.ids.data() + iii));
58
+ __m512 distances = _mm512_loadu_ps(heap.dis.data() + iii);
59
+
60
+ __mmask16 m1mask =
61
+ _mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
62
+ __mmask16 dmask = best_vs_cand_mask(best_distances, distances);
63
+ __mmask16 finalmask = m1mask | dmask;
64
+
65
+ const __m512i best_indices_new = _mm512_mask_blend_epi32(
66
+ finalmask, current_indices, best_indices);
67
+ const __m512 best_distances_new =
68
+ _mm512_mask_blend_ps(finalmask, distances, best_distances);
69
+
70
+ best_indices = best_indices_new;
71
+ best_distances = best_distances_new;
72
+
73
+ current_indices = _mm512_add_epi32(current_indices, offset);
74
+ }
75
+
76
+ // Leftovers.
77
+ if (k16 != static_cast<size_t>(heap.k)) {
78
+ const __mmask16 kmask = (1 << (heap.k - k16)) - 1;
79
+
80
+ __m512i indices = _mm512_mask_loadu_epi32(
81
+ _mm512_set1_epi32(-1), kmask, heap.ids.data() + k16);
82
+ __m512 distances = _mm512_maskz_loadu_ps(kmask, heap.dis.data() + k16);
83
+
84
+ __mmask16 m1mask =
85
+ _mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
86
+ __mmask16 dmask = best_vs_cand_mask(best_distances, distances);
87
+ __mmask16 finalmask = m1mask | dmask;
88
+
89
+ const __m512i best_indices_new = _mm512_mask_blend_epi32(
90
+ finalmask, current_indices, best_indices);
91
+ const __m512 best_distances_new =
92
+ _mm512_mask_blend_ps(finalmask, distances, best_distances);
93
+
94
+ best_indices = best_indices_new;
95
+ best_distances = best_distances_new;
96
+ }
97
+
98
+ // Horizontal best: min for CMax (distance), max for CMin (similarity).
99
+ if constexpr (HC::is_max) {
100
+ best_dis = _mm512_reduce_min_ps(best_distances);
101
+ } else {
102
+ best_dis = _mm512_reduce_max_ps(best_distances);
103
+ }
104
+ // Tiebreak by picking the rightmost (largest) index among lanes
105
+ // matching the best distance, matching the original behavior.
106
+ __mmask16 best_lane_mask =
107
+ _mm512_cmpeq_ps_mask(best_distances, _mm512_set1_ps(best_dis));
108
+ best_idx = _mm512_mask_reduce_max_epi32(best_lane_mask, best_indices);
109
+
110
+ if (best_idx == -1) {
111
+ return -1;
112
+ }
113
+
114
+ if (vmin_out) {
115
+ *vmin_out = best_dis;
116
+ }
117
+ int ret = heap.ids[best_idx];
118
+ heap.ids[best_idx] = -1;
119
+ --heap.nvalid;
120
+ return ret;
121
+ }
122
+
123
+ } // namespace
124
+
125
+ // Explicit specializations for AVX512
126
+ template <>
127
+ int pop_min_tpl<CMax<float, int32_t>, SIMDLevel::AVX512>(
128
+ MinimaxHeapT<CMax<float, int32_t>>* heap,
129
+ float* vmin_out) {
130
+ return pop_best_avx512<CMax<float, int32_t>>(*heap, vmin_out);
131
+ }
132
+
133
+ template <>
134
+ int pop_min_tpl<CMin<float, int32_t>, SIMDLevel::AVX512>(
135
+ MinimaxHeapT<CMin<float, int32_t>>* heap,
136
+ float* vmin_out) {
137
+ return pop_best_avx512<CMin<float, int32_t>>(*heap, vmin_out);
138
+ }
139
+
140
+ } // namespace faiss
141
+
142
+ #endif // COMPILE_SIMD_AVX512