faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -0,0 +1,169 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // Definitions of the SIMDLevel-templatized accum_and_*_tab functions.
9
+ // Only included by per-ISA .cpp files (avx2.cpp, neon.cpp).
10
+ // Do NOT include this from common translation units.
11
+ //
12
+ // Common TUs include rq_beam_search_tab.h (declarations only).
13
+
14
+ #pragma once
15
+
16
+ #include <cstddef>
17
+ #include <cstdint>
18
+
19
+ #include <faiss/impl/approx_topk/rq_beam_search_tab.h>
20
+ #include <faiss/impl/simdlib/simdlib.h>
21
+
22
+ namespace faiss {
23
+
24
+ template <size_t M, size_t NK, SIMDLevel SL>
25
+ void accum_and_store_tab(
26
+ const size_t m_offset,
27
+ const float* const __restrict codebook_cross_norms,
28
+ const uint64_t* const __restrict codebook_offsets,
29
+ const int32_t* const __restrict codes_i,
30
+ const size_t b,
31
+ const size_t ldc,
32
+ const size_t K,
33
+ float* const __restrict output) {
34
+ using simd_float = simd8float32_tpl<SL>;
35
+
36
+ const float* cbs[M];
37
+ for (size_t ij = 0; ij < M; ij++) {
38
+ const size_t code = static_cast<size_t>(codes_i[b * m_offset + ij]);
39
+ cbs[ij] = &codebook_cross_norms[(codebook_offsets[ij] + code) * ldc];
40
+ }
41
+
42
+ const size_t K8 = (K / (8 * NK)) * (8 * NK);
43
+
44
+ for (size_t kk = 0; kk < K8; kk += 8 * NK) {
45
+ simd_float regs[NK];
46
+ for (size_t ik = 0; ik < NK; ik++) {
47
+ regs[ik] = simd_float(cbs[0] + kk + ik * 8);
48
+ }
49
+
50
+ for (size_t ij = 1; ij < M; ij++) {
51
+ for (size_t ik = 0; ik < NK; ik++) {
52
+ regs[ik] += simd_float(cbs[ij] + kk + ik * 8);
53
+ }
54
+ }
55
+
56
+ for (size_t ik = 0; ik < NK; ik++) {
57
+ regs[ik].storeu(output + kk + ik * 8);
58
+ }
59
+ }
60
+
61
+ for (size_t kk = K8; kk < K; kk++) {
62
+ float reg = cbs[0][kk];
63
+ for (size_t ij = 1; ij < M; ij++) {
64
+ reg += cbs[ij][kk];
65
+ }
66
+ output[kk] = reg;
67
+ }
68
+ }
69
+
70
+ template <size_t M, size_t NK, SIMDLevel SL>
71
+ void accum_and_add_tab(
72
+ const size_t m_offset,
73
+ const float* const __restrict codebook_cross_norms,
74
+ const uint64_t* const __restrict codebook_offsets,
75
+ const int32_t* const __restrict codes_i,
76
+ const size_t b,
77
+ const size_t ldc,
78
+ const size_t K,
79
+ float* const __restrict output) {
80
+ using simd_float = simd8float32_tpl<SL>;
81
+
82
+ const float* cbs[M];
83
+ for (size_t ij = 0; ij < M; ij++) {
84
+ const size_t code = static_cast<size_t>(codes_i[b * m_offset + ij]);
85
+ cbs[ij] = &codebook_cross_norms[(codebook_offsets[ij] + code) * ldc];
86
+ }
87
+
88
+ const size_t K8 = (K / (8 * NK)) * (8 * NK);
89
+
90
+ for (size_t kk = 0; kk < K8; kk += 8 * NK) {
91
+ simd_float regs[NK];
92
+ for (size_t ik = 0; ik < NK; ik++) {
93
+ regs[ik] = simd_float(cbs[0] + kk + ik * 8);
94
+ }
95
+
96
+ for (size_t ij = 1; ij < M; ij++) {
97
+ for (size_t ik = 0; ik < NK; ik++) {
98
+ regs[ik] += simd_float(cbs[ij] + kk + ik * 8);
99
+ }
100
+ }
101
+
102
+ for (size_t ik = 0; ik < NK; ik++) {
103
+ simd_float existing(output + kk + ik * 8);
104
+ existing += regs[ik];
105
+ existing.storeu(output + kk + ik * 8);
106
+ }
107
+ }
108
+
109
+ for (size_t kk = K8; kk < K; kk++) {
110
+ float reg = cbs[0][kk];
111
+ for (size_t ij = 1; ij < M; ij++) {
112
+ reg += cbs[ij][kk];
113
+ }
114
+ output[kk] += reg;
115
+ }
116
+ }
117
+
118
+ template <size_t M, size_t NK, SIMDLevel SL>
119
+ void accum_and_finalize_tab(
120
+ const float* const __restrict codebook_cross_norms,
121
+ const uint64_t* const __restrict codebook_offsets,
122
+ const int32_t* const __restrict codes_i,
123
+ const size_t b,
124
+ const size_t ldc,
125
+ const size_t K,
126
+ const float* const __restrict distances_i,
127
+ const float* const __restrict cd_common,
128
+ float* const __restrict output) {
129
+ using simd_float = simd8float32_tpl<SL>;
130
+
131
+ const float* cbs[M];
132
+ for (size_t ij = 0; ij < M; ij++) {
133
+ const size_t code = static_cast<size_t>(codes_i[b * M + ij]);
134
+ cbs[ij] = &codebook_cross_norms[(codebook_offsets[ij] + code) * ldc];
135
+ }
136
+
137
+ const size_t K8 = (K / (8 * NK)) * (8 * NK);
138
+
139
+ for (size_t kk = 0; kk < K8; kk += 8 * NK) {
140
+ simd_float regs[NK];
141
+ for (size_t ik = 0; ik < NK; ik++) {
142
+ regs[ik] = simd_float(cbs[0] + kk + ik * 8);
143
+ }
144
+
145
+ for (size_t ij = 1; ij < M; ij++) {
146
+ for (size_t ik = 0; ik < NK; ik++) {
147
+ regs[ik] += simd_float(cbs[ij] + kk + ik * 8);
148
+ }
149
+ }
150
+
151
+ simd_float two(2.0f);
152
+ for (size_t ik = 0; ik < NK; ik++) {
153
+ simd_float common_v(cd_common + kk + ik * 8);
154
+ common_v = fmadd(two, regs[ik], common_v);
155
+ common_v += simd_float(distances_i[b]);
156
+ common_v.storeu(output + b * K + kk + ik * 8);
157
+ }
158
+ }
159
+
160
+ for (size_t kk = K8; kk < K; kk++) {
161
+ float reg = cbs[0][kk];
162
+ for (size_t ij = 1; ij < M; ij++) {
163
+ reg += cbs[ij][kk];
164
+ }
165
+ output[b * K + kk] = distances_i[b] + cd_common[kk] + 2 * reg;
166
+ }
167
+ }
168
+
169
+ } // namespace faiss
@@ -0,0 +1,117 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ /// @file rq_beam_search_tab.h
9
+ /// @brief Declarations for SIMDLevel-templatized codebook accumulation
10
+ /// functions.
11
+ ///
12
+ /// These functions accumulate codebook cross-norm tables for beam search
13
+ /// encoding in the Residual Quantizer. They compute the distance
14
+ /// contributions from previously encoded codebooks using SIMD-accelerated
15
+ /// register accumulation.
16
+ ///
17
+ /// Definitions are in rq_beam_search_tab-inl.h (only included by per-ISA
18
+ /// .cpp files). The common TU only sees these declarations, so no extern
19
+ /// template suppression is needed — the linker resolves to the explicit
20
+ /// instantiations in avx2.cpp / neon.cpp.
21
+
22
+ #pragma once
23
+
24
+ #include <cstddef>
25
+ #include <cstdint>
26
+
27
+ #include <faiss/utils/simd_levels.h>
28
+
29
+ namespace faiss {
30
+
31
+ /// Accumulate cross-norms for M codebooks and store the result.
32
+ ///
33
+ /// Loads M codebook rows (selected by codes_i) and sums them using
34
+ /// NK×8-wide SIMD chunks, writing the result to output. Used to
35
+ /// initialize the temporary buffer in the m≥8 path.
36
+ ///
37
+ /// @tparam M number of codebook rows to accumulate
38
+ /// @tparam NK number of 8-float SIMD chunks per loop iteration
39
+ /// @tparam SL SIMD level (AVX2, ARM_NEON, etc.)
40
+ /// @param m_offset stride between beam entries in codes_i
41
+ /// @param codebook_cross_norms cross-norm table, shape (total_codes, ldc)
42
+ /// @param codebook_offsets per-codebook offset into cross-norm table
43
+ /// @param codes_i code indices for the current query
44
+ /// @param b beam index
45
+ /// @param ldc leading dimension of cross-norm table (≥ K)
46
+ /// @param K number of centroids in the current codebook
47
+ /// @param output output buffer, size K (overwritten)
48
+ template <size_t M, size_t NK, SIMDLevel SL>
49
+ void accum_and_store_tab(
50
+ size_t m_offset,
51
+ const float* __restrict codebook_cross_norms,
52
+ const uint64_t* __restrict codebook_offsets,
53
+ const int32_t* __restrict codes_i,
54
+ size_t b,
55
+ size_t ldc,
56
+ size_t K,
57
+ float* __restrict output);
58
+
59
+ /// Accumulate cross-norms for M codebooks and add to existing output.
60
+ ///
61
+ /// Like accum_and_store_tab, but adds the accumulated result to the
62
+ /// existing values in output (output[k] += sum). Used for subsequent
63
+ /// chunks of 8 codebooks in the m≥8 path.
64
+ ///
65
+ /// @tparam M number of codebook rows to accumulate
66
+ /// @tparam NK number of 8-float SIMD chunks per loop iteration
67
+ /// @tparam SL SIMD level (AVX2, ARM_NEON, etc.)
68
+ /// @param m_offset stride between beam entries in codes_i
69
+ /// @param codebook_cross_norms cross-norm table
70
+ /// @param codebook_offsets per-codebook offset
71
+ /// @param codes_i code indices
72
+ /// @param b beam index
73
+ /// @param ldc leading dimension of cross-norm table
74
+ /// @param K number of centroids
75
+ /// @param output output buffer, size K (accumulated into)
76
+ template <size_t M, size_t NK, SIMDLevel SL>
77
+ void accum_and_add_tab(
78
+ size_t m_offset,
79
+ const float* __restrict codebook_cross_norms,
80
+ const uint64_t* __restrict codebook_offsets,
81
+ const int32_t* __restrict codes_i,
82
+ size_t b,
83
+ size_t ldc,
84
+ size_t K,
85
+ float* __restrict output);
86
+
87
+ /// Accumulate cross-norms for M codebooks and finalize distances.
88
+ ///
89
+ /// Accumulates M codebook rows, then computes the final centroid distance:
90
+ /// output[b*K + k] = distances_i[b] + cd_common[k] + 2 * sum[k]
91
+ /// Used for m=1..7 where the entire accumulation fits in registers.
92
+ ///
93
+ /// @tparam M number of codebook rows to accumulate (equals m)
94
+ /// @tparam NK number of 8-float SIMD chunks per loop iteration
95
+ /// @tparam SL SIMD level (AVX2, ARM_NEON, etc.)
96
+ /// @param codebook_cross_norms cross-norm table
97
+ /// @param codebook_offsets per-codebook offset
98
+ /// @param codes_i code indices (stride is M)
99
+ /// @param b beam index
100
+ /// @param ldc leading dimension of cross-norm table
101
+ /// @param K number of centroids
102
+ /// @param distances_i per-beam input distances, size beam_size
103
+ /// @param cd_common common distance term, size K
104
+ /// @param output output centroid distances (b*K offset)
105
+ template <size_t M, size_t NK, SIMDLevel SL>
106
+ void accum_and_finalize_tab(
107
+ const float* __restrict codebook_cross_norms,
108
+ const uint64_t* __restrict codebook_offsets,
109
+ const int32_t* __restrict codes_i,
110
+ size_t b,
111
+ size_t ldc,
112
+ size_t K,
113
+ const float* __restrict distances_i,
114
+ const float* __restrict cd_common,
115
+ float* __restrict output);
116
+
117
+ } // namespace faiss
@@ -0,0 +1,146 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // Out-of-line definition of HeapWithBucketsCMaxFloat::bs_addn using
9
+ // simdlib types. Only included by per-ISA .cpp files (avx2.cpp, neon.cpp).
10
+ // Do NOT include this from common translation units.
11
+
12
+ #pragma once
13
+
14
+ #include <cstdint>
15
+ #include <limits>
16
+
17
+ #include <faiss/impl/approx_topk/approx_topk.h>
18
+ #include <faiss/impl/simdlib/simdlib.h>
19
+ #include <faiss/utils/Heap.h>
20
+ #include <faiss/utils/simd_levels.h>
21
+
22
+ namespace faiss {
23
+
24
+ // Element-wise max of two simd8float32 vectors, implemented via
25
+ // cmplt_min_max_fast (which computes both min and max).
26
+ template <SIMDLevel SL>
27
+ inline simd8float32_tpl<SL> simd8float32_max(
28
+ simd8float32_tpl<SL> a,
29
+ simd8float32_tpl<SL> b) {
30
+ simd8float32_tpl<SL> min_val, max_val;
31
+ simd8uint32_tpl<SL> dummy(0u), dmin, dmax;
32
+ cmplt_min_max_fast(a, dummy, b, dummy, min_val, dmin, max_val, dmax);
33
+ return max_val;
34
+ }
35
+
36
+ template <uint32_t NBUCKETS, uint32_t N, SIMDLevel SL>
37
+ void HeapWithBucketsCMaxFloat<NBUCKETS, N, SL>::bs_addn(
38
+ const uint32_t beam_size,
39
+ const uint32_t n_per_beam,
40
+ const float* const __restrict distances,
41
+ const uint32_t k,
42
+ float* const __restrict bh_val,
43
+ int32_t* const __restrict bh_ids) {
44
+ using C = CMax<float, int>;
45
+ using simd_float = simd8float32_tpl<SL>;
46
+ using simd_uint = simd8uint32_tpl<SL>;
47
+
48
+ for (uint32_t beam_index = 0; beam_index < beam_size; beam_index++) {
49
+ simd_float min_distances_i[NBUCKETS / 8][N];
50
+ simd_uint min_indices_i[NBUCKETS / 8][N];
51
+
52
+ for (uint32_t j = 0; j < NBUCKETS / 8; j++) {
53
+ for (uint32_t p = 0; p < N; p++) {
54
+ min_distances_i[j][p] =
55
+ simd_float(std::numeric_limits<float>::max());
56
+ min_indices_i[j][p] = simd_uint(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u);
57
+ }
58
+ }
59
+
60
+ simd_uint current_indices(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u);
61
+ simd_uint indices_delta(NBUCKETS);
62
+
63
+ const uint32_t nb = (n_per_beam / NBUCKETS) * NBUCKETS;
64
+
65
+ // put the data into buckets
66
+ for (uint32_t ip = 0; ip < nb; ip += NBUCKETS) {
67
+ for (uint32_t j = 0; j < NBUCKETS / 8; j++) {
68
+ const simd_float distances_reg(
69
+ distances + j * 8 + ip + n_per_beam * beam_index);
70
+
71
+ simd_float distance_candidate = distances_reg;
72
+ simd_uint indices_candidate = current_indices;
73
+
74
+ for (uint32_t p = 0; p < N; p++) {
75
+ // Use cmplt_min_max_fast for comparison, min values,
76
+ // min indices, and max indices.
77
+ simd_float min_d_new, max_d_unused;
78
+ simd_uint min_idx_new, max_idx_new;
79
+ cmplt_min_max_fast(
80
+ distance_candidate,
81
+ indices_candidate,
82
+ min_distances_i[j][p],
83
+ min_indices_i[j][p],
84
+ min_d_new,
85
+ min_idx_new,
86
+ max_d_unused,
87
+ max_idx_new);
88
+
89
+ // The max distance uses distances_reg (the original
90
+ // input), NOT distance_candidate. This is a deliberate
91
+ // approximation that breaks the data dependency chain.
92
+ simd_float max_d_new = simd8float32_max<SL>(
93
+ min_distances_i[j][p], distances_reg);
94
+
95
+ distance_candidate = max_d_new;
96
+ indices_candidate = max_idx_new;
97
+
98
+ min_distances_i[j][p] = min_d_new;
99
+ min_indices_i[j][p] = min_idx_new;
100
+ }
101
+ }
102
+
103
+ current_indices = current_indices + indices_delta;
104
+ }
105
+
106
+ // fix the indices
107
+ for (uint32_t j = 0; j < NBUCKETS / 8; j++) {
108
+ const simd_uint offset(n_per_beam * beam_index + j * 8);
109
+ for (uint32_t p = 0; p < N; p++) {
110
+ min_indices_i[j][p] = min_indices_i[j][p] + offset;
111
+ }
112
+ }
113
+
114
+ // merge every bucket into the regular heap
115
+ for (uint32_t p = 0; p < N; p++) {
116
+ for (uint32_t j = 0; j < NBUCKETS / 8; j++) {
117
+ uint32_t min_indices_scalar[8];
118
+ float min_distances_scalar[8];
119
+
120
+ min_indices_i[j][p].storeu(min_indices_scalar);
121
+ min_distances_i[j][p].storeu(min_distances_scalar);
122
+
123
+ for (size_t j8 = 0; j8 < 8; j8++) {
124
+ const auto value = min_distances_scalar[j8];
125
+ const auto index =
126
+ static_cast<int32_t>(min_indices_scalar[j8]);
127
+ if (C::cmp2(bh_val[0], value, bh_ids[0], index)) {
128
+ heap_replace_top<C>(k, bh_val, bh_ids, value, index);
129
+ }
130
+ }
131
+ }
132
+ }
133
+
134
+ // process leftovers
135
+ for (uint32_t ip = nb; ip < n_per_beam; ip++) {
136
+ const int32_t index = ip + n_per_beam * beam_index;
137
+ const float value = distances[index];
138
+
139
+ if (C::cmp(bh_val[0], value)) {
140
+ heap_replace_top<C>(k, bh_val, bh_ids, value, index);
141
+ }
142
+ }
143
+ }
144
+ }
145
+
146
+ } // namespace faiss
@@ -0,0 +1,73 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ /*
9
+ * Per-ISA implementation of Hamming distance computation for
10
+ * IndexBinaryHNSW. Included once per SIMD TU with THE_SIMD_LEVEL
11
+ * set to the desired SIMDLevel.
12
+ */
13
+
14
+ #pragma once
15
+
16
+ #ifndef THE_SIMD_LEVEL
17
+ #error "THE_SIMD_LEVEL must be defined before including this file"
18
+ #endif
19
+
20
+ // The including TU (or the per-ISA hamming_computer-*.h it pulls in first)
21
+ // is responsible for providing the HammingComputer*_tpl<SL> specializations;
22
+ // this header only needs the forward declarations and with_HammingComputer<SL>
23
+ // dispatcher from hamming_computer.h.
24
+ #include <faiss/utils/hamming_distance/hamming_computer.h>
25
+
26
+ #include <faiss/IndexBinaryFlat.h>
27
+ #include <faiss/impl/DistanceComputer.h>
28
+ #include <faiss/impl/binary_hamming/dispatch.h>
29
+ #include <faiss/utils/hamming.h>
30
+
31
+ namespace faiss {
32
+
33
+ namespace {
34
+
35
+ template <class HammingComputer>
36
+ struct FlatHammingDis : DistanceComputer {
37
+ const int code_size;
38
+ const uint8_t* b;
39
+ HammingComputer hc;
40
+
41
+ float operator()(idx_t i) override {
42
+ return hc.hamming(b + i * code_size);
43
+ }
44
+
45
+ float symmetric_dis(idx_t i, idx_t j) override {
46
+ return HammingComputerDefault_tpl<THE_SIMD_LEVEL>(
47
+ b + j * code_size, code_size)
48
+ .hamming(b + i * code_size);
49
+ }
50
+
51
+ explicit FlatHammingDis(const IndexBinaryFlat& storage)
52
+ : code_size(storage.code_size), b(storage.xb.data()), hc() {}
53
+
54
+ // NOTE: Pointers are cast from float in order to reuse the floating-point
55
+ // DistanceComputer.
56
+ void set_query(const float* x) override {
57
+ hc.set((uint8_t*)x, code_size);
58
+ }
59
+ };
60
+
61
+ } // anonymous namespace
62
+
63
+ template <>
64
+ DistanceComputer* make_binary_hnsw_distance_computer_fixSL<THE_SIMD_LEVEL>(
65
+ int code_size,
66
+ IndexBinaryFlat* flat_storage) {
67
+ return with_HammingComputer<THE_SIMD_LEVEL>(
68
+ code_size, [&]<class HammingComputer>() -> DistanceComputer* {
69
+ return new FlatHammingDis<HammingComputer>(*flat_storage);
70
+ });
71
+ }
72
+
73
+ } // namespace faiss