faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -6,8 +6,11 @@
6
6
  */
7
7
 
8
8
  #include <faiss/impl/FaissAssert.h>
9
- #include <faiss/impl/pq4_fast_scan.h>
10
- #include <faiss/impl/simd_result_handlers.h>
9
+ #include <faiss/impl/fast_scan/LookupTableScaler.h>
10
+ #include <faiss/impl/fast_scan/decompose_qbs.h>
11
+ #include <faiss/impl/fast_scan/fast_scan.h>
12
+ #include <faiss/impl/fast_scan/simd_result_handlers.h>
13
+ #include <faiss/impl/simd_dispatch.h>
11
14
 
12
15
  #include <array>
13
16
 
@@ -350,4 +353,168 @@ int pq4_pack_LUT_qbs_q_map(
350
353
  return i0;
351
354
  }
352
355
 
356
+ int pq4_qbs_to_nq(int qbs) {
357
+ int i0 = 0;
358
+ int qi = qbs;
359
+ while (qi) {
360
+ int nq = qi & 15;
361
+ qi >>= 4;
362
+ i0 += nq;
363
+ }
364
+ return i0;
365
+ }
366
+
367
+ int pq4_preferred_qbs(int n) {
368
+ // from timings in P141901742, P141902828
369
+ static int map[12] = {
370
+ 0, 1, 2, 3, 0x13, 0x23, 0x33, 0x223, 0x233, 0x333, 0x2233, 0x2333};
371
+ if (n <= 11) {
372
+ return map[n];
373
+ } else if (n <= 24) {
374
+ // override qbs: all first stages with 3 steps
375
+ // then 1 stage with the rest
376
+ int nbit = 4 * (n / 3); // nbits with only 3s
377
+ int qbs = 0x33333333 & ((1 << nbit) - 1);
378
+ qbs |= (n % 3) << nbit;
379
+ return qbs;
380
+ } else {
381
+ FAISS_THROW_FMT("number of queries %d too large", n);
382
+ }
383
+ }
384
+
385
+ } // namespace faiss
386
+
387
+ /***************************************************************
388
+ * FastScanCodeScanner: NONE specialization + dispatch wrapper.
389
+ *
390
+ * The NONE specialization provides the scalar fallback.
391
+ * Per-SIMD specializations (AVX2, AVX512, ARM_NEON) are in
392
+ * impl-avx2.cpp, impl-avx512.cpp, impl-neon.cpp respectively.
393
+ ***************************************************************/
394
+
395
+ #define THE_LEVEL_TO_DISPATCH SIMDLevel::NONE
396
+ #include <faiss/impl/fast_scan/dispatching.h> // IWYU pragma: keep
397
+ #include <faiss/impl/fast_scan/rabitq_dispatching.h> // IWYU pragma: keep
398
+ #undef THE_LEVEL_TO_DISPATCH
399
+
400
+ namespace faiss {
401
+
402
+ using namespace simd_result_handlers;
403
+
404
+ /***************************************************************
405
+ * accumulate_to_mem: NONE specialization + runtime dispatch.
406
+ ***************************************************************/
407
+
408
+ template <>
409
+ void accumulate_to_mem_impl<SIMDLevel::NONE>(
410
+ int nq,
411
+ size_t ntotal2,
412
+ int nsq,
413
+ const uint8_t* codes,
414
+ const uint8_t* LUT,
415
+ uint16_t* accu) {
416
+ StoreResultHandler<SIMDLevel::NONE> handler(accu, ntotal2);
417
+ DummyScaler<SIMDLevel::NONE> scaler;
418
+ accumulate<SIMDLevel::NONE>(
419
+ nq, ntotal2, nsq, codes, LUT, handler, scaler, 32 * nsq / 2);
420
+ }
421
+
422
+ void accumulate_to_mem(
423
+ int nq,
424
+ size_t ntotal2,
425
+ int nsq,
426
+ const uint8_t* codes,
427
+ const uint8_t* LUT,
428
+ uint16_t* accu) {
429
+ FAISS_THROW_IF_NOT(ntotal2 % 32 == 0);
430
+ with_simd_level([&]<SIMDLevel SL>() {
431
+ accumulate_to_mem_impl<SL>(nq, ntotal2, nsq, codes, LUT, accu);
432
+ });
433
+ }
434
+
435
+ } // namespace faiss
436
+
437
+ namespace faiss {
438
+
439
+ std::unique_ptr<FastScanCodeScanner> make_fast_scan_knn_scanner(
440
+ bool is_max,
441
+ int impl,
442
+ size_t nq,
443
+ size_t ntotal,
444
+ int64_t k,
445
+ float* distances,
446
+ int64_t* ids,
447
+ const IDSelector* sel,
448
+ bool with_id_map) {
449
+ return with_simd_level([&]<SIMDLevel SL>() {
450
+ return make_fast_scan_scanner_impl<SL>(
451
+ is_max, impl, nq, ntotal, k, distances, ids, sel, with_id_map);
452
+ });
453
+ }
454
+
455
+ std::unique_ptr<FastScanCodeScanner> make_range_scanner(
456
+ bool is_max,
457
+ RangeSearchResult& rres,
458
+ float radius,
459
+ size_t ntotal,
460
+ const IDSelector* sel) {
461
+ return with_simd_level([&]<SIMDLevel SL>() {
462
+ return make_range_scanner_impl<SL>(is_max, rres, radius, ntotal, sel);
463
+ });
464
+ }
465
+
466
+ std::unique_ptr<FastScanCodeScanner> make_partial_range_scanner(
467
+ bool is_max,
468
+ RangeSearchPartialResult& pres,
469
+ float radius,
470
+ size_t ntotal,
471
+ size_t q0,
472
+ size_t q1,
473
+ const IDSelector* sel) {
474
+ return with_simd_level([&]<SIMDLevel SL>() {
475
+ return make_partial_range_scanner_impl<SL>(
476
+ is_max, pres, radius, ntotal, q0, q1, sel);
477
+ });
478
+ }
479
+
480
+ std::unique_ptr<FastScanCodeScanner> rabitq_make_knn_scanner(
481
+ const IndexRaBitQFastScan* index,
482
+ bool is_max,
483
+ size_t nq,
484
+ int64_t k,
485
+ float* distances,
486
+ int64_t* ids,
487
+ const IDSelector* sel,
488
+ const FastScanDistancePostProcessing& context,
489
+ bool is_multi_bit) {
490
+ return with_simd_level([&]<SIMDLevel SL>() {
491
+ return rabitq_make_knn_scanner_impl<SL>(
492
+ index,
493
+ is_max,
494
+ nq,
495
+ k,
496
+ distances,
497
+ ids,
498
+ sel,
499
+ context,
500
+ is_multi_bit);
501
+ });
502
+ }
503
+
504
+ std::unique_ptr<FastScanCodeScanner> rabitq_ivf_make_knn_scanner(
505
+ bool is_max,
506
+ const IndexIVFRaBitQFastScan* index,
507
+ size_t nq,
508
+ size_t k,
509
+ float* distances,
510
+ int64_t* ids,
511
+ const IDSelector* sel,
512
+ const FastScanDistancePostProcessing* context,
513
+ bool multi_bit) {
514
+ return with_simd_level([&]<SIMDLevel SL>() {
515
+ return rabitq_ivf_make_knn_scanner_impl<SL>(
516
+ is_max, index, nq, k, distances, ids, sel, context, multi_bit);
517
+ });
518
+ }
519
+
353
520
  } // namespace faiss
@@ -0,0 +1,341 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cstdint>
11
+ #include <cstdlib>
12
+ #include <memory>
13
+
14
+ #include <faiss/impl/CodePacker.h>
15
+ #include <faiss/utils/simd_levels.h>
16
+
17
+ /** PQ4 SIMD packing and accumulation functions
18
+ *
19
+ * The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
20
+ * and produces an output matrix for that. It is interesting for nq * nb <= 4,
21
+ * otherwise register spilling becomes too large.
22
+ *
23
+ * The implementation of these functions is spread over 3 cpp files to reduce
24
+ * parallel compile times. Templates are instantiated explicitly.
25
+ */
26
+
27
+ namespace faiss {
28
+
29
+ struct IDSelector;
30
+ struct RangeSearchResult;
31
+ struct RangeSearchPartialResult;
32
+ struct SIMDResultHandlerToFloat;
33
+
34
+ /** Pack codes for consumption by the SIMD kernels.
35
+ * The unused bytes are set to 0.
36
+ *
37
+ * @param codes input codes, size (ntotal, ceil(M / 2))
38
+ * @param ntotal number of input codes
39
+ * @param nb output number of codes (ntotal rounded up to a multiple of
40
+ * bbs)
41
+ * @param nsq number of sub-quantizers (=M rounded up to a multiple of 2)
42
+ * @param bbs size of database blocks (multiple of 32)
43
+ * @param blocks output array, size nb * nsq / 2.
44
+ * @param code_stride optional stride between consecutive codes (0 = use
45
+ default (M + 1) / 2)
46
+ */
47
+ void pq4_pack_codes(
48
+ const uint8_t* codes,
49
+ size_t ntotal,
50
+ size_t M,
51
+ size_t nb,
52
+ size_t bbs,
53
+ size_t nsq,
54
+ uint8_t* blocks,
55
+ size_t code_stride = 0);
56
+
57
+ /** Same as pack_codes but write in a given range of the output,
58
+ * leaving the rest untouched. Assumes allocated entries are 0 on input.
59
+ *
60
+ * @param codes input codes, size (i1 - i0, ceil(M / 2))
61
+ * @param i0 first output code to write
62
+ * @param i1 last output code to write
63
+ * @param blocks output array, size at least ceil(i1 / bbs) * bbs * nsq / 2
64
+ * @param code_stride optional stride between consecutive codes (0 = use
65
+ * default (M + 1) / 2)
66
+ * @param block_stride stride in bytes between consecutive blocks.
67
+ */
68
+ void pq4_pack_codes_range(
69
+ const uint8_t* codes,
70
+ size_t M,
71
+ size_t i0,
72
+ size_t i1,
73
+ size_t bbs,
74
+ size_t nsq,
75
+ uint8_t* blocks,
76
+ size_t code_stride,
77
+ size_t block_stride);
78
+
79
+ /** get a single element from a packed codes table
80
+ *
81
+ * @param vector_id vector id
82
+ * @param sq subquantizer (< nsq)
83
+ */
84
+ uint8_t pq4_get_packed_element(
85
+ const uint8_t* data,
86
+ size_t bbs,
87
+ size_t nsq,
88
+ size_t vector_id,
89
+ size_t sq);
90
+
91
+ /** set a single element "code" into a packed codes table
92
+ *
93
+ * @param vector_id vector id
94
+ * @param sq subquantizer (< nsq)
95
+ */
96
+ void pq4_set_packed_element(
97
+ uint8_t* data,
98
+ uint8_t code,
99
+ size_t bbs,
100
+ size_t nsq,
101
+ size_t vector_id,
102
+ size_t sq);
103
+
104
+ /** CodePacker API for the PQ4 fast-scan */
105
+ struct CodePackerPQ4 : CodePacker {
106
+ size_t nsq;
107
+
108
+ CodePackerPQ4(size_t nsq, size_t bbs);
109
+
110
+ CodePacker* clone() const final;
111
+
112
+ void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
113
+ const final;
114
+ void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
115
+ const final;
116
+ };
117
+
118
+ /** Pack Look-up table for consumption by the kernel.
119
+ *
120
+ * @param nq number of queries
121
+ * @param nsq number of sub-quantizers (multiple of 2)
122
+ * @param src input array, size (nq, 16)
123
+ * @param dest output array, size (nq, 16)
124
+ */
125
+ void pq4_pack_LUT(int nq, int nsq, const uint8_t* src, uint8_t* dest);
126
+
127
+ /* compute the number of queries from a base-16 decomposition */
128
+ int pq4_qbs_to_nq(int qbs);
129
+
130
+ /** return the preferred decomposition in blocks for a nb of queries. */
131
+ int pq4_preferred_qbs(int nq);
132
+
133
+ /** Pack Look-up table for consumption by the kernel.
134
+ *
135
+ * @param qbs 4-bit encoded number of query blocks, the total number of
136
+ * queries handled (nq) is deduced from it
137
+ * @param nsq number of sub-quantizers (multiple of 2)
138
+ * @param src input array, size (nq, 16)
139
+ * @param dest output array, size (nq, 16)
140
+ * @return nq
141
+ */
142
+ int pq4_pack_LUT_qbs(int fqbs, int nsq, const uint8_t* src, uint8_t* dest);
143
+
144
+ /** Same as pq4_pack_LUT_qbs, except the source vectors are remapped with q_map
145
+ */
146
+ int pq4_pack_LUT_qbs_q_map(
147
+ int qbs,
148
+ int nsq,
149
+ const uint8_t* src,
150
+ const int* q_map,
151
+ uint8_t* dest);
152
+
153
+ /** Wrapper using simple StoreResultHandler
154
+ * and DummyScaler
155
+ *
156
+ * @param nq number of queries
157
+ * @param ntotal2 number of database elements (multiple of 32)
158
+ * @param nsq number of sub-quantizers (muliple of 2)
159
+ * @param codes packed codes array
160
+ * @param LUT packed look-up table
161
+ * @param accu array to store the results
162
+ */
163
+ void accumulate_to_mem(
164
+ int nq,
165
+ size_t ntotal2,
166
+ int nsq,
167
+ const uint8_t* codes,
168
+ const uint8_t* LUT,
169
+ uint16_t* accu);
170
+
171
+ /// Per-SIMD specialization of accumulate_to_mem (defined in per-SIMD TUs)
172
+ template <SIMDLevel SL>
173
+ void accumulate_to_mem_impl(
174
+ int nq,
175
+ size_t ntotal2,
176
+ int nsq,
177
+ const uint8_t* codes,
178
+ const uint8_t* LUT,
179
+ uint16_t* accu);
180
+
181
+ /***************************************************************
182
+ * FastScanCodeScanner: virtual base that bundles handler + kernel
183
+ * behind the SIMD dispatch boundary. Per-SIMD TUs instantiate this
184
+ * with the correct SIMDLevel so that handler and kernel share the
185
+ * same SIMD types.
186
+ ***************************************************************/
187
+
188
+ struct FastScanCodeScanner {
189
+ virtual ~FastScanCodeScanner() = default;
190
+
191
+ /// Access the underlying result handler (for begin/end/normalizer calls)
192
+ virtual SIMDResultHandlerToFloat* handler() = 0;
193
+
194
+ /// Run the search_1 accumulation loop (bbs > 32, multi-BB kernel)
195
+ virtual void accumulate_loop(
196
+ int nq,
197
+ size_t nb,
198
+ int bbs,
199
+ int nsq,
200
+ const uint8_t* codes,
201
+ const uint8_t* LUT,
202
+ int pq2x4_scale,
203
+ size_t block_stride) = 0;
204
+
205
+ /// Run the QBS accumulation loop (bbs == 32)
206
+ virtual void accumulate_loop_qbs(
207
+ int qbs,
208
+ size_t nb,
209
+ int nsq,
210
+ const uint8_t* codes,
211
+ const uint8_t* LUT,
212
+ int pq2x4_scale,
213
+ size_t block_stride) = 0;
214
+ };
215
+
216
+ /// Per-SIMD factory: explicitly specialized in each per-SIMD TU
217
+ /// (impl-avx2.cpp, impl-avx512.cpp, impl-neon.cpp, fast_scan.cpp for NONE).
218
+ /// Not called directly — use make_fast_scan_knn_scanner() instead.
219
+ template <SIMDLevel SL>
220
+ std::unique_ptr<FastScanCodeScanner> make_fast_scan_scanner_impl(
221
+ bool is_max,
222
+ int impl,
223
+ size_t nq,
224
+ size_t ntotal,
225
+ int64_t k,
226
+ float* distances,
227
+ int64_t* ids,
228
+ const IDSelector* sel,
229
+ bool with_id_map);
230
+
231
+ /// Runtime dispatch wrapper: selects the best available SIMD level
232
+ /// (via with_simd_level) and delegates to the corresponding
233
+ /// make_fast_scan_scanner_impl<SL> specialization.
234
+ std::unique_ptr<FastScanCodeScanner> make_fast_scan_knn_scanner(
235
+ bool is_max,
236
+ int impl,
237
+ size_t nq,
238
+ size_t ntotal,
239
+ int64_t k,
240
+ float* distances,
241
+ int64_t* ids,
242
+ const IDSelector* sel,
243
+ bool with_id_map = false);
244
+
245
+ /// Per-SIMD range scanner factories (defined in per-SIMD TUs via dispatching.h)
246
+ template <SIMDLevel SL>
247
+ std::unique_ptr<FastScanCodeScanner> make_range_scanner_impl(
248
+ bool is_max,
249
+ RangeSearchResult& rres,
250
+ float radius,
251
+ size_t ntotal,
252
+ const IDSelector* sel);
253
+
254
+ template <SIMDLevel SL>
255
+ std::unique_ptr<FastScanCodeScanner> make_partial_range_scanner_impl(
256
+ bool is_max,
257
+ RangeSearchPartialResult& pres,
258
+ float radius,
259
+ size_t ntotal,
260
+ size_t q0,
261
+ size_t q1,
262
+ const IDSelector* sel);
263
+
264
+ /// Runtime dispatch: range search scanner.
265
+ std::unique_ptr<FastScanCodeScanner> make_range_scanner(
266
+ bool is_max,
267
+ RangeSearchResult& rres,
268
+ float radius,
269
+ size_t ntotal,
270
+ const IDSelector* sel);
271
+
272
+ /// Runtime dispatch: partial range search scanner (per-thread).
273
+ std::unique_ptr<FastScanCodeScanner> make_partial_range_scanner(
274
+ bool is_max,
275
+ RangeSearchPartialResult& pres,
276
+ float radius,
277
+ size_t ntotal,
278
+ size_t q0,
279
+ size_t q1,
280
+ const IDSelector* sel);
281
+
282
+ /***************************************************************
283
+ * RaBitQ scanner factory: per-SIMD specializations live in
284
+ * rabitq_dispatching.h, included by each per-SIMD TU.
285
+ ***************************************************************/
286
+
287
+ struct IndexRaBitQFastScan;
288
+ struct IndexIVFRaBitQFastScan;
289
+ struct FastScanDistancePostProcessing;
290
+
291
+ /// Per-SIMD factory (primary template; specializations in rabitq_dispatching.h)
292
+ template <SIMDLevel SL>
293
+ std::unique_ptr<FastScanCodeScanner> rabitq_make_knn_scanner_impl(
294
+ const IndexRaBitQFastScan* index,
295
+ bool is_max,
296
+ size_t nq,
297
+ int64_t k,
298
+ float* distances,
299
+ int64_t* ids,
300
+ const IDSelector* sel,
301
+ const FastScanDistancePostProcessing& context,
302
+ bool is_multi_bit);
303
+
304
+ /// Runtime dispatch wrapper for rabitq_make_knn_scanner_impl
305
+ std::unique_ptr<FastScanCodeScanner> rabitq_make_knn_scanner(
306
+ const IndexRaBitQFastScan* index,
307
+ bool is_max,
308
+ size_t nq,
309
+ int64_t k,
310
+ float* distances,
311
+ int64_t* ids,
312
+ const IDSelector* sel,
313
+ const FastScanDistancePostProcessing& context,
314
+ bool is_multi_bit);
315
+
316
+ /// Per-SIMD IVF RaBitQ scanner factory.
317
+ template <SIMDLevel SL>
318
+ std::unique_ptr<FastScanCodeScanner> rabitq_ivf_make_knn_scanner_impl(
319
+ bool is_max,
320
+ const IndexIVFRaBitQFastScan* index,
321
+ size_t nq,
322
+ size_t k,
323
+ float* distances,
324
+ int64_t* ids,
325
+ const IDSelector* sel,
326
+ const FastScanDistancePostProcessing* context,
327
+ bool multi_bit);
328
+
329
+ /// Runtime dispatch wrapper for IVF RaBitQ scanner.
330
+ std::unique_ptr<FastScanCodeScanner> rabitq_ivf_make_knn_scanner(
331
+ bool is_max,
332
+ const IndexIVFRaBitQFastScan* index,
333
+ size_t nq,
334
+ size_t k,
335
+ float* distances,
336
+ int64_t* ids,
337
+ const IDSelector* sel,
338
+ const FastScanDistancePostProcessing* context,
339
+ bool multi_bit);
340
+
341
+ } // namespace faiss
@@ -0,0 +1,36 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_AVX2
9
+
10
+ #define THE_LEVEL_TO_DISPATCH SIMDLevel::AVX2
11
+ #include <faiss/impl/fast_scan/dispatching.h> // IWYU pragma: keep
12
+ #include <faiss/impl/fast_scan/rabitq_dispatching.h> // IWYU pragma: keep
13
+
14
+ #include <faiss/impl/fast_scan/decompose_qbs.h>
15
+
16
+ namespace faiss {
17
+
18
+ using namespace simd_result_handlers;
19
+
20
+ template <>
21
+ void accumulate_to_mem_impl<SIMDLevel::AVX2>(
22
+ int nq,
23
+ size_t ntotal2,
24
+ int nsq,
25
+ const uint8_t* codes,
26
+ const uint8_t* LUT,
27
+ uint16_t* accu) {
28
+ StoreResultHandler<SIMDLevel::AVX2> handler(accu, ntotal2);
29
+ DummyScaler<SIMDLevel::AVX2> scaler;
30
+ accumulate<SIMDLevel::AVX2>(
31
+ nq, ntotal2, nsq, codes, LUT, handler, scaler, 32 * nsq / 2);
32
+ }
33
+
34
+ } // namespace faiss
35
+
36
+ #endif // COMPILE_SIMD_AVX2
@@ -0,0 +1,40 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifdef COMPILE_SIMD_AVX512
9
+
10
+ #define THE_LEVEL_TO_DISPATCH SIMDLevel::AVX512
11
+ #include <faiss/impl/fast_scan/dispatching.h> // IWYU pragma: keep
12
+ #include <faiss/impl/fast_scan/rabitq_dispatching.h> // IWYU pragma: keep
13
+
14
+ #include <faiss/impl/fast_scan/decompose_qbs.h>
15
+
16
+ namespace faiss {
17
+
18
+ using namespace simd_result_handlers;
19
+
20
+ template <>
21
+ void accumulate_to_mem_impl<SIMDLevel::AVX512>(
22
+ int nq,
23
+ size_t ntotal2,
24
+ int nsq,
25
+ const uint8_t* codes,
26
+ const uint8_t* LUT,
27
+ uint16_t* accu) {
28
+ // Use AVX2-level handler (256-bit StoreResultHandler) since the 512-bit
29
+ // kernels reduce to AVX2-level simd16uint16 via FixedStorage512.
30
+ StoreResultHandler<SIMDLevel::AVX2> handler(accu, ntotal2);
31
+ DummyScaler<SIMDLevel::AVX512> scaler;
32
+ // kernel_accumulate_block in decompose_qbs.h selects pq4_kernel_qbs_512
33
+ // via #ifdef __AVX512F__ (which is set for this TU).
34
+ accumulate<SIMDLevel::AVX512>(
35
+ nq, ntotal2, nsq, codes, LUT, handler, scaler, 32 * nsq / 2);
36
+ }
37
+
38
+ } // namespace faiss
39
+
40
+ #endif // COMPILE_SIMD_AVX512