faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -9,10 +9,11 @@
9
9
 
10
10
  #include <algorithm>
11
11
  #include <type_traits>
12
+ #include <utility>
12
13
  #include <vector>
13
14
 
15
+ #include <faiss/impl/simdlib/simdlib_dispatch.h>
14
16
  #include <faiss/utils/Heap.h>
15
- #include <faiss/utils/simdlib.h>
16
17
 
17
18
  #include <faiss/impl/FaissAssert.h>
18
19
  #include <faiss/impl/IDSelector.h>
@@ -26,49 +27,109 @@
26
27
 
27
28
  namespace faiss {
28
29
 
29
- struct SIMDResultHandler {
30
+ namespace {
31
+
32
+ // a helper that checks whether a ResultHandler has a .sel member
33
+ template <typename T, typename = void>
34
+ struct has_sel_member : std::false_type {};
35
+ template <typename T>
36
+ struct has_sel_member<T, std::void_t<decltype(T::sel)>> : std::true_type {};
37
+ template <typename T>
38
+ inline constexpr bool has_sel_member_v = has_sel_member<T>::value;
39
+
40
+ /// Check if all vectors in a block are filtered out by the IDSelector.
41
+ /// Returns true if the block should be skipped (all vectors filtered).
42
+ /// Requires set_block_origin() to have been called before this.
43
+ /// Compiles to nothing (returns false) when ResultHandler has no sel member.
44
+ template <class ResultHandler>
45
+ inline bool whether_all_vectors_filtered_out(
46
+ ResultHandler& res,
47
+ size_t block_size) {
48
+ if constexpr (!has_sel_member_v<ResultHandler>) {
49
+ return false;
50
+ }
51
+ if (res.sel != nullptr) {
52
+ for (size_t jj = 0; jj < block_size; jj++) {
53
+ if (res.sel->is_member(res.adjust_id(0, jj))) {
54
+ return false;
55
+ }
56
+ }
57
+ return true;
58
+ }
59
+ return false;
60
+ }
61
+
62
+ /// Loop over blocks of Step vectors, advancing codes by block_stride each
63
+ /// iteration. Calls set_block_origin(0, j0) and skips blocks where all
64
+ /// vectors are filtered out. The body lambda receives j0.
65
+ template <size_t Step, class ResultHandler, class Body>
66
+ inline void for_each_block(
67
+ size_t ntotal,
68
+ const uint8_t*& codes,
69
+ size_t block_stride,
70
+ ResultHandler& res,
71
+ Body&& body) {
72
+ for (size_t j0 = 0; j0 < ntotal; j0 += Step, codes += block_stride) {
73
+ res.set_block_origin(0, j0);
74
+ if constexpr (has_sel_member_v<ResultHandler>) {
75
+ if (whether_all_vectors_filtered_out(
76
+ res, std::min<size_t>(Step, res.ntotal - j0))) {
77
+ continue;
78
+ }
79
+ }
80
+ body(j0);
81
+ }
82
+ }
83
+
84
+ } // namespace
85
+
86
+ /* Result handler that will return float results eventually.
87
+ * Non-template base so it can serve as a polymorphic interface
88
+ * (e.g. FastScanCodeScanner::handler() returns SIMDResultHandlerToFloat*). */
89
+ struct SIMDResultHandlerToFloat {
30
90
  // used to dispatch templates
31
91
  bool is_CMax = false;
32
92
  uint8_t sizeof_ids = 0;
33
93
  bool with_fields = false;
34
94
 
35
- /** called when 32 distances are computed and provided in two
36
- * simd16uint16. (q, b) indicate which entry it is in the block. */
37
- virtual void handle(
38
- size_t q,
39
- size_t b,
40
- simd16uint16 d0,
41
- simd16uint16 d1) = 0;
42
-
43
- /// set the sub-matrix that is being computed
44
- virtual void set_block_origin(size_t i0, size_t j0) = 0;
45
-
46
- virtual ~SIMDResultHandler() {}
47
- };
95
+ // the number of elements that were successfully processed up to date,
96
+ // for example, hitting the threshold for the range search.
97
+ // the variable is used to track whether an early stop condition
98
+ // should be hit due to having zero search progress.
99
+ size_t in_range_num = 0;
48
100
 
49
- /* Result handler that will return float results eventually */
50
- struct SIMDResultHandlerToFloat : SIMDResultHandler {
51
- size_t nq; // number of queries
52
- size_t ntotal; // ignore excess elements after ntotal
101
+ size_t nq = 0; // number of queries
102
+ size_t ntotal = 0; // ignore excess elements after ntotal
53
103
 
54
104
  /// these fields are used mainly for the IVF variants (with_id_map=true)
55
105
  const idx_t* id_map = nullptr; // map offset in invlist to vector id
56
106
  const int* q_map = nullptr; // map q to global query
107
+ const uint8_t* list_codes_ptr = nullptr; // raw block data for current list
57
108
  const uint16_t* dbias =
58
109
  nullptr; // table of biases to add to each query (for IVF L2 search)
59
110
  const float* normalizers = nullptr; // size 2 * nq, to convert
60
111
 
112
+ size_t scan_cnt = 0; // scanned vector number (except filtered)
113
+
114
+ SIMDResultHandlerToFloat() = default;
61
115
  SIMDResultHandlerToFloat(size_t nq, size_t ntotal)
62
116
  : nq(nq), ntotal(ntotal) {}
63
117
 
64
118
  virtual void begin(const float* norms) {
65
119
  normalizers = norms;
120
+ scan_cnt = 0;
66
121
  }
67
122
 
68
123
  // called at end of search to convert int16 distances to float, before
69
124
  // normalizers are deallocated
70
125
  virtual void end() {
71
126
  normalizers = nullptr;
127
+ scan_cnt = 0;
128
+ }
129
+
130
+ // Get the number of scanned vectors
131
+ size_t count_scanned_rows() {
132
+ return scan_cnt;
72
133
  }
73
134
 
74
135
  // Number of updates made to the underlying data structure.
@@ -88,19 +149,38 @@ struct SIMDResultHandlerToFloat : SIMDResultHandler {
88
149
  */
89
150
  virtual void set_list_context(
90
151
  size_t /* list_no */,
91
- const std::vector<int>& /* probe_map */) {
92
- // Default implementation does nothing
93
- // Derived handlers can override if they need this context
94
- }
152
+ const std::vector<int>& /* probe_map */) {}
153
+
154
+ virtual void set_block_origin(size_t i0, size_t j0) = 0;
155
+
156
+ virtual ~SIMDResultHandlerToFloat() = default;
95
157
  };
96
158
 
97
- FAISS_API extern bool simd_result_handlers_accept_virtual;
159
+ /** SL-specific base: adds virtual handle() whose simd16uint16 type
160
+ * matches the SL template parameter. Concrete handlers override
161
+ * with `final` so the compiler can devirtualize and inline. */
162
+ template <SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
163
+ struct SIMDResultHandler : SIMDResultHandlerToFloat {
164
+ using SIMDResultHandlerToFloat::SIMDResultHandlerToFloat;
165
+
166
+ static constexpr SIMDLevel SL256 = simd256_level_selector<SL>::value;
167
+ using simd16uint16 = simd16uint16_tpl<SL256>;
168
+
169
+ virtual void handle(
170
+ size_t /* q */,
171
+ size_t /* b */,
172
+ simd16uint16 /* d0 */,
173
+ simd16uint16 /* d1 */) {}
174
+ };
98
175
 
99
176
  namespace simd_result_handlers {
100
177
 
101
178
  /** Dummy structure that just computes a checksum on results
102
179
  * (to avoid the computation to be optimized away) */
103
- struct DummyResultHandler : SIMDResultHandler {
180
+ template <SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
181
+ struct DummyResultHandler : SIMDResultHandler<SL> {
182
+ using typename SIMDResultHandler<SL>::simd16uint16;
183
+
104
184
  size_t cs = 0;
105
185
 
106
186
  void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) final {
@@ -108,15 +188,16 @@ struct DummyResultHandler : SIMDResultHandler {
108
188
  }
109
189
 
110
190
  void set_block_origin(size_t, size_t) final {}
111
-
112
- ~DummyResultHandler() {}
113
191
  };
114
192
 
115
193
  /** memorize results in a nq-by-nb matrix.
116
194
  *
117
195
  * j0 is the current upper-left block of the matrix
118
196
  */
119
- struct StoreResultHandler : SIMDResultHandler {
197
+ template <SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
198
+ struct StoreResultHandler : SIMDResultHandler<SL> {
199
+ using typename SIMDResultHandler<SL>::simd16uint16;
200
+
120
201
  uint16_t* data;
121
202
  size_t ld; // total number of columns
122
203
  size_t i0 = 0;
@@ -137,8 +218,10 @@ struct StoreResultHandler : SIMDResultHandler {
137
218
  };
138
219
 
139
220
  /** stores results in fixed-size matrix. */
140
- template <int NQ, int BB>
141
- struct FixedStorageHandler : SIMDResultHandler {
221
+ template <int NQ, int BB, SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
222
+ struct FixedStorageHandler : SIMDResultHandler<SL> {
223
+ using typename SIMDResultHandler<SL>::simd16uint16;
224
+
142
225
  simd16uint16 dis[NQ][BB];
143
226
  int i0 = 0;
144
227
 
@@ -160,14 +243,13 @@ struct FixedStorageHandler : SIMDResultHandler {
160
243
  }
161
244
  }
162
245
  }
163
-
164
- virtual ~FixedStorageHandler() {}
165
246
  };
166
247
 
167
248
  /** Result handler that compares distances to check if they need to be kept */
168
- template <class C, bool with_id_map>
169
- struct ResultHandlerCompare : SIMDResultHandlerToFloat {
249
+ template <class C, bool with_id_map, SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
250
+ struct ResultHandlerCompare : SIMDResultHandler<SL> {
170
251
  using TI = typename C::TI;
252
+ using typename SIMDResultHandler<SL>::simd16uint16;
171
253
 
172
254
  bool disable = false;
173
255
 
@@ -177,7 +259,7 @@ struct ResultHandlerCompare : SIMDResultHandlerToFloat {
177
259
  const IDSelector* sel;
178
260
 
179
261
  ResultHandlerCompare(size_t nq, size_t ntotal, const IDSelector* sel_in)
180
- : SIMDResultHandlerToFloat(nq, ntotal), sel{sel_in} {
262
+ : SIMDResultHandler<SL>(nq, ntotal), sel{sel_in} {
181
263
  this->is_CMax = C::is_max;
182
264
  this->sizeof_ids = sizeof(typename C::TI);
183
265
  this->with_fields = with_id_map;
@@ -192,22 +274,22 @@ struct ResultHandlerCompare : SIMDResultHandlerToFloat {
192
274
  void adjust_with_origin(size_t& q, simd16uint16& d0, simd16uint16& d1) {
193
275
  q += i0;
194
276
 
195
- if (dbias) {
196
- simd16uint16 dbias16(dbias[q]);
277
+ if (this->dbias) {
278
+ simd16uint16 dbias16(this->dbias[q]);
197
279
  d0 += dbias16;
198
280
  d1 += dbias16;
199
281
  }
200
282
 
201
283
  if (with_id_map) { // FIXME test on q_map instead
202
- q = q_map[q];
284
+ q = this->q_map[q];
203
285
  }
204
286
  }
205
287
 
206
288
  // compute and adjust idx
207
289
  int64_t adjust_id(size_t b, size_t j) {
208
290
  int64_t idx = j0 + 32 * b + j;
209
- if (with_id_map) {
210
- idx = id_map[idx];
291
+ if (this->id_map) {
292
+ idx = this->id_map[idx];
211
293
  }
212
294
  return idx;
213
295
  }
@@ -233,26 +315,28 @@ struct ResultHandlerCompare : SIMDResultHandlerToFloat {
233
315
  return 0;
234
316
  }
235
317
  uint64_t idx = j0 + b * 32;
236
- if (idx + 32 > ntotal) {
237
- if (idx >= ntotal) {
318
+ if (idx + 32 > this->ntotal) {
319
+ if (idx >= this->ntotal) {
238
320
  return 0;
239
321
  }
240
- int nbit = (ntotal - idx);
322
+ int nbit = static_cast<int>(this->ntotal - idx);
241
323
  lt_mask &= (uint32_t(1) << nbit) - 1;
242
324
  }
243
325
  return lt_mask;
244
326
  }
245
-
246
- virtual ~ResultHandlerCompare() {}
247
327
  };
248
328
 
249
329
  /** Special version for k=1 */
250
- template <class C, bool with_id_map = false>
251
- struct SingleResultHandler : ResultHandlerCompare<C, with_id_map> {
330
+ template <
331
+ class C,
332
+ bool with_id_map = false,
333
+ SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
334
+ struct SingleResultHandler : ResultHandlerCompare<C, with_id_map, SL> {
252
335
  using T = typename C::T;
253
336
  using TI = typename C::TI;
254
- using RHC = ResultHandlerCompare<C, with_id_map>;
337
+ using RHC = ResultHandlerCompare<C, with_id_map, SL>;
255
338
  using RHC::normalizers;
339
+ using typename RHC::simd16uint16;
256
340
 
257
341
  std::vector<int16_t> idis;
258
342
  float* dis;
@@ -294,10 +378,13 @@ struct SingleResultHandler : ResultHandlerCompare<C, with_id_map> {
294
378
  auto real_idx = this->adjust_id(b, j);
295
379
  lt_mask -= 1 << j;
296
380
  if (this->sel->is_member(real_idx)) {
381
+ this->scan_cnt++;
297
382
  T d = d32tab[j];
298
383
  if (C::cmp(idis[q], d)) {
299
384
  idis[q] = d;
300
385
  ids[q] = real_idx;
386
+
387
+ this->in_range_num++;
301
388
  }
302
389
  }
303
390
  }
@@ -306,16 +393,19 @@ struct SingleResultHandler : ResultHandlerCompare<C, with_id_map> {
306
393
  // find first non-zero
307
394
  int j = __builtin_ctz(lt_mask);
308
395
  lt_mask -= 1 << j;
396
+ this->scan_cnt++;
309
397
  T d = d32tab[j];
310
398
  if (C::cmp(idis[q], d)) {
311
399
  idis[q] = d;
312
400
  ids[q] = this->adjust_id(b, j);
401
+
402
+ this->in_range_num++;
313
403
  }
314
404
  }
315
405
  }
316
406
  }
317
407
 
318
- void end() {
408
+ void end() override {
319
409
  for (size_t q = 0; q < this->nq; q++) {
320
410
  if (!normalizers) {
321
411
  dis[q] = idis[q];
@@ -325,16 +415,21 @@ struct SingleResultHandler : ResultHandlerCompare<C, with_id_map> {
325
415
  dis[q] = b + idis[q] * one_a;
326
416
  }
327
417
  }
418
+ this->scan_cnt = 0;
328
419
  }
329
420
  };
330
421
 
331
422
  /** Structure that collects results in a min- or max-heap */
332
- template <class C, bool with_id_map = false>
333
- struct HeapHandler : ResultHandlerCompare<C, with_id_map> {
423
+ template <
424
+ class C,
425
+ bool with_id_map = false,
426
+ SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
427
+ struct HeapHandler : ResultHandlerCompare<C, with_id_map, SL> {
334
428
  using T = typename C::T;
335
429
  using TI = typename C::TI;
336
- using RHC = ResultHandlerCompare<C, with_id_map>;
430
+ using RHC = ResultHandlerCompare<C, with_id_map, SL>;
337
431
  using RHC::normalizers;
432
+ using typename RHC::simd16uint16;
338
433
 
339
434
  std::vector<uint16_t> idis;
340
435
  std::vector<TI> iids;
@@ -406,11 +501,18 @@ struct HeapHandler : ResultHandlerCompare<C, with_id_map> {
406
501
  auto real_idx = this->adjust_id(b, j);
407
502
  lt_mask -= 1 << j;
408
503
  if (this->sel->is_member(real_idx)) {
504
+ this->scan_cnt++;
409
505
  T dis_for_j = d32tab[j];
410
506
  if (C::cmp(heap_dis[0], dis_for_j)) {
411
507
  heap_replace_top<C>(
412
- k, heap_dis, heap_ids, dis_for_j, real_idx);
508
+ k,
509
+ heap_dis,
510
+ heap_ids,
511
+ dis_for_j,
512
+ static_cast<TI>(real_idx));
413
513
  nup++;
514
+
515
+ this->in_range_num++;
414
516
  }
415
517
  }
416
518
  }
@@ -419,11 +521,19 @@ struct HeapHandler : ResultHandlerCompare<C, with_id_map> {
419
521
  // find first non-zero
420
522
  int j = __builtin_ctz(lt_mask);
421
523
  lt_mask -= 1 << j;
524
+ this->scan_cnt++;
422
525
  T dis_for_j = d32tab[j];
423
526
  if (C::cmp(heap_dis[0], dis_for_j)) {
424
527
  int64_t idx = this->adjust_id(b, j);
425
- heap_replace_top<C>(k, heap_dis, heap_ids, dis_for_j, idx);
528
+ heap_replace_top<C>(
529
+ k,
530
+ heap_dis,
531
+ heap_ids,
532
+ dis_for_j,
533
+ static_cast<TI>(idx));
426
534
  nup++;
535
+
536
+ this->in_range_num++;
427
537
  }
428
538
  }
429
539
  }
@@ -447,6 +557,7 @@ struct HeapHandler : ResultHandlerCompare<C, with_id_map> {
447
557
  heap_ids[j] = heap_ids_in[j];
448
558
  }
449
559
  }
560
+ this->scan_cnt = 0;
450
561
  }
451
562
 
452
563
  size_t num_updates() override {
@@ -460,12 +571,16 @@ struct HeapHandler : ResultHandlerCompare<C, with_id_map> {
460
571
  * reached. Then a partition sort is used to update the threshold. */
461
572
 
462
573
  /** Handler built from several ReservoirTopN (one per query) */
463
- template <class C, bool with_id_map = false>
464
- struct ReservoirHandler : ResultHandlerCompare<C, with_id_map> {
574
+ template <
575
+ class C,
576
+ bool with_id_map = false,
577
+ SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
578
+ struct ReservoirHandler : ResultHandlerCompare<C, with_id_map, SL> {
465
579
  using T = typename C::T;
466
580
  using TI = typename C::TI;
467
- using RHC = ResultHandlerCompare<C, with_id_map>;
581
+ using RHC = ResultHandlerCompare<C, with_id_map, SL>;
468
582
  using RHC::normalizers;
583
+ using typename RHC::simd16uint16;
469
584
 
470
585
  size_t capacity; // rounded up to multiple of 16
471
586
 
@@ -524,8 +639,11 @@ struct ReservoirHandler : ResultHandlerCompare<C, with_id_map> {
524
639
  auto real_idx = this->adjust_id(b, j);
525
640
  lt_mask -= 1 << j;
526
641
  if (this->sel->is_member(real_idx)) {
642
+ this->scan_cnt++;
527
643
  T dis_for_j = d32tab[j];
528
- res.add(dis_for_j, real_idx);
644
+ res.add(dis_for_j, static_cast<TI>(real_idx));
645
+
646
+ this->in_range_num++;
529
647
  }
530
648
  }
531
649
  } else {
@@ -534,7 +652,10 @@ struct ReservoirHandler : ResultHandlerCompare<C, with_id_map> {
534
652
  int j = __builtin_ctz(lt_mask);
535
653
  lt_mask -= 1 << j;
536
654
  T dis_for_j = d32tab[j];
537
- res.add(dis_for_j, this->adjust_id(b, j));
655
+ this->scan_cnt++;
656
+ res.add(dis_for_j, static_cast<TI>(this->adjust_id(b, j)));
657
+
658
+ this->in_range_num++;
538
659
  }
539
660
  }
540
661
  }
@@ -576,6 +697,7 @@ struct ReservoirHandler : ResultHandlerCompare<C, with_id_map> {
576
697
  // possibly add empty results
577
698
  heap_heapify<Cf>(n - res.i, heap_dis + res.i, heap_ids + res.i);
578
699
  }
700
+ this->scan_cnt = 0;
579
701
  }
580
702
  };
581
703
 
@@ -583,13 +705,17 @@ struct ReservoirHandler : ResultHandlerCompare<C, with_id_map> {
583
705
  * have to be scaled using the scaler.
584
706
  */
585
707
 
586
- template <class C, bool with_id_map = false>
587
- struct RangeHandler : ResultHandlerCompare<C, with_id_map> {
708
+ template <
709
+ class C,
710
+ bool with_id_map = false,
711
+ SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
712
+ struct RangeHandler : ResultHandlerCompare<C, with_id_map, SL> {
588
713
  using T = typename C::T;
589
714
  using TI = typename C::TI;
590
- using RHC = ResultHandlerCompare<C, with_id_map>;
715
+ using RHC = ResultHandlerCompare<C, with_id_map, SL>;
591
716
  using RHC::normalizers;
592
717
  using RHC::nq;
718
+ using typename RHC::simd16uint16;
593
719
 
594
720
  RangeSearchResult& rres;
595
721
  float radius;
@@ -616,7 +742,7 @@ struct RangeHandler : ResultHandlerCompare<C, with_id_map> {
616
742
  n_per_query.resize(nq + 1);
617
743
  }
618
744
 
619
- virtual void begin(const float* norms) override {
745
+ void begin(const float* norms) override {
620
746
  normalizers = norms;
621
747
  for (int q = 0; q < nq; ++q) {
622
748
  thresholds[q] =
@@ -650,6 +776,8 @@ struct RangeHandler : ResultHandlerCompare<C, with_id_map> {
650
776
  T dis = d32tab[j];
651
777
  n_per_query[q]++;
652
778
  triplets.push_back({idx_t(q + q0), real_idx, dis});
779
+
780
+ this->in_range_num++;
653
781
  }
654
782
  }
655
783
  } else {
@@ -660,6 +788,8 @@ struct RangeHandler : ResultHandlerCompare<C, with_id_map> {
660
788
  T dis = d32tab[j];
661
789
  n_per_query[q]++;
662
790
  triplets.push_back({idx_t(q + q0), this->adjust_id(b, j), dis});
791
+
792
+ this->in_range_num++;
663
793
  }
664
794
  }
665
795
  }
@@ -689,11 +819,14 @@ struct RangeHandler : ResultHandlerCompare<C, with_id_map> {
689
819
  #ifndef SWIG
690
820
 
691
821
  // handler for a subset of queries
692
- template <class C, bool with_id_map = false>
693
- struct PartialRangeHandler : RangeHandler<C, with_id_map> {
822
+ template <
823
+ class C,
824
+ bool with_id_map = false,
825
+ SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
826
+ struct PartialRangeHandler : RangeHandler<C, with_id_map, SL> {
694
827
  using T = typename C::T;
695
828
  using TI = typename C::TI;
696
- using RHC = RangeHandler<C, with_id_map>;
829
+ using RHC = RangeHandler<C, with_id_map, SL>;
697
830
  using RHC::normalizers;
698
831
  using RHC::nq, RHC::q0, RHC::triplets, RHC::n_per_query;
699
832
 
@@ -706,7 +839,11 @@ struct PartialRangeHandler : RangeHandler<C, with_id_map> {
706
839
  size_t q0,
707
840
  size_t q1,
708
841
  const IDSelector* sel_in)
709
- : RangeHandler<C, with_id_map>(*pres.res, radius, ntotal, sel_in),
842
+ : RangeHandler<C, with_id_map, SL>(
843
+ *pres.res,
844
+ radius,
845
+ ntotal,
846
+ sel_in),
710
847
  pres(pres) {
711
848
  nq = q1 - q0;
712
849
  this->q0 = q0;
@@ -748,83 +885,94 @@ struct PartialRangeHandler : RangeHandler<C, with_id_map> {
748
885
  }
749
886
  };
750
887
 
751
- #endif
752
-
753
- /********************************************************************************
754
- * Dynamic dispatching function. The consumer should have a templatized method f
755
- * that will be replaced with the actual SIMDResultHandler that is determined
756
- * dynamically.
888
+ /** Handler that collects all matching results for a single query.
889
+ *
890
+ * Unlike HeapHandler/ReservoirHandler which maintain a top-k structure,
891
+ * this handler appends every result that passes the threshold to a
892
+ * vector of (distance, id) pairs. Useful for exhaustive collection
893
+ * (e.g., "return all" searches) in the fast_scan SIMD path.
757
894
  */
895
+ template <
896
+ class C,
897
+ bool with_id_map = false,
898
+ SIMDLevel SL = SINGLE_SIMD_LEVEL_256>
899
+ struct SingleQueryResultCollectHandler
900
+ : ResultHandlerCompare<C, with_id_map, SL> {
901
+ using T = typename C::T;
902
+ using TI = typename C::TI;
903
+ using RHC = ResultHandlerCompare<C, with_id_map, SL>;
904
+ using RHC::normalizers;
905
+ using typename RHC::simd16uint16;
906
+
907
+ // Store as float (not T=uint16_t) since end() applies normalizer scaling
908
+ std::vector<std::pair<TI, float>>& collect;
909
+ const int q_id = 0;
758
910
 
759
- template <class C, bool W, class Consumer, class... Types>
760
- void dispatch_SIMDResultHandler_fixedCW(
761
- SIMDResultHandler& res,
762
- Consumer& consumer,
763
- Types... args) {
764
- if (auto resh_sh = dynamic_cast<SingleResultHandler<C, W>*>(&res)) {
765
- consumer.template f<SingleResultHandler<C, W>>(*resh_sh, args...);
766
- } else if (auto resh_hh = dynamic_cast<HeapHandler<C, W>*>(&res)) {
767
- consumer.template f<HeapHandler<C, W>>(*resh_hh, args...);
768
- } else if (auto resh_rh = dynamic_cast<ReservoirHandler<C, W>*>(&res)) {
769
- consumer.template f<ReservoirHandler<C, W>>(*resh_rh, args...);
770
- } else { // generic handler -- will not be inlined
771
- FAISS_THROW_IF_NOT_FMT(
772
- simd_result_handlers_accept_virtual,
773
- "Running virtual handler for %s",
774
- typeid(res).name());
775
- consumer.template f<SIMDResultHandler>(res, args...);
911
+ SingleQueryResultCollectHandler(
912
+ std::vector<std::pair<TI, float>>& res,
913
+ size_t ntotal,
914
+ const IDSelector* sel_in)
915
+ : RHC(1, ntotal, sel_in), collect(res) {
916
+ this->q_map = &q_id;
776
917
  }
777
- }
778
918
 
779
- template <class C, class Consumer, class... Types>
780
- void dispatch_SIMDResultHandler_fixedC(
781
- SIMDResultHandler& res,
782
- Consumer& consumer,
783
- Types... args) {
784
- if (res.with_fields) {
785
- dispatch_SIMDResultHandler_fixedCW<C, true>(res, consumer, args...);
786
- } else {
787
- dispatch_SIMDResultHandler_fixedCW<C, false>(res, consumer, args...);
919
+ void begin(const float* norms) override {
920
+ normalizers = norms;
788
921
  }
789
- }
790
922
 
791
- template <class Consumer, class... Types>
792
- void dispatch_SIMDResultHandler(
793
- SIMDResultHandler& res,
794
- Consumer& consumer,
795
- Types... args) {
796
- if (res.sizeof_ids == 0) {
797
- if (auto resh = dynamic_cast<StoreResultHandler*>(&res)) {
798
- consumer.template f<StoreResultHandler>(*resh, args...);
799
- } else if (auto resh_2 = dynamic_cast<DummyResultHandler*>(&res)) {
800
- consumer.template f<DummyResultHandler>(*resh_2, args...);
801
- } else { // generic path
802
- FAISS_THROW_IF_NOT_FMT(
803
- simd_result_handlers_accept_virtual,
804
- "Running virtual handler for %s",
805
- typeid(res).name());
806
- consumer.template f<SIMDResultHandler>(res, args...);
807
- }
808
- } else if (res.sizeof_ids == sizeof(int)) {
809
- if (res.is_CMax) {
810
- dispatch_SIMDResultHandler_fixedC<CMax<uint16_t, int>>(
811
- res, consumer, args...);
812
- } else {
813
- dispatch_SIMDResultHandler_fixedC<CMin<uint16_t, int>>(
814
- res, consumer, args...);
923
+ void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) final {
924
+ if (this->disable) {
925
+ return;
815
926
  }
816
- } else if (res.sizeof_ids == sizeof(int64_t)) {
817
- if (res.is_CMax) {
818
- dispatch_SIMDResultHandler_fixedC<CMax<uint16_t, int64_t>>(
819
- res, consumer, args...);
927
+
928
+ this->adjust_with_origin(q, d0, d1);
929
+
930
+ uint32_t lt_mask = this->get_lt_mask(C::neutral(), b, d0, d1);
931
+
932
+ if (!lt_mask) {
933
+ return;
934
+ }
935
+
936
+ ALIGNED(32) uint16_t d32tab[32];
937
+ d0.store(d32tab);
938
+ d1.store(d32tab + 16);
939
+
940
+ if (this->sel != nullptr) {
941
+ while (lt_mask) {
942
+ int j = __builtin_ctz(lt_mask);
943
+ auto real_idx = this->adjust_id(b, j);
944
+ lt_mask -= 1 << j;
945
+ if (this->sel->is_member(real_idx)) {
946
+ T dis = d32tab[j];
947
+ collect.emplace_back(real_idx, dis);
948
+ this->in_range_num++;
949
+ }
950
+ }
820
951
  } else {
821
- dispatch_SIMDResultHandler_fixedC<CMin<uint16_t, int64_t>>(
822
- res, consumer, args...);
952
+ while (lt_mask) {
953
+ int j = __builtin_ctz(lt_mask);
954
+ lt_mask -= 1 << j;
955
+ T dis = d32tab[j];
956
+ int64_t idx = this->adjust_id(b, j);
957
+ collect.emplace_back(idx, dis);
958
+ this->in_range_num++;
959
+ }
823
960
  }
824
- } else {
825
- FAISS_THROW_FMT("Unknown id size %d", res.sizeof_ids);
826
961
  }
827
- }
962
+
963
+ void end() override {
964
+ if (normalizers) {
965
+ float one_a = 1 / normalizers[0];
966
+ float b = normalizers[1];
967
+ for (size_t i = 0; i < collect.size(); i++) {
968
+ collect[i].second = collect[i].second * one_a + b;
969
+ }
970
+ }
971
+ this->scan_cnt = 0;
972
+ }
973
+ };
974
+
975
+ #endif
828
976
 
829
977
  } // namespace simd_result_handlers
830
978