faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -17,11 +17,10 @@
17
17
  #include <faiss/IndexIVFPQ.h>
18
18
  #include <faiss/impl/AuxIndexStructures.h>
19
19
  #include <faiss/impl/FaissAssert.h>
20
- #include <faiss/impl/FastScanDistancePostProcessing.h>
21
- #include <faiss/impl/LookupTableScaler.h>
22
20
  #include <faiss/impl/RaBitQUtils.h>
23
- #include <faiss/impl/pq4_fast_scan.h>
24
- #include <faiss/impl/simd_result_handlers.h>
21
+ #include <faiss/impl/fast_scan/FastScanDistancePostProcessing.h>
22
+ #include <faiss/impl/fast_scan/fast_scan.h>
23
+ #include <faiss/impl/fast_scan/simd_result_handlers.h>
25
24
  #include <faiss/invlists/BlockInvertedLists.h>
26
25
  #include <faiss/utils/hamming.h>
27
26
  #include <faiss/utils/quantize_lut.h>
@@ -29,20 +28,24 @@
29
28
 
30
29
  namespace faiss {
31
30
 
32
- using namespace simd_result_handlers;
33
-
34
31
  inline size_t roundup(size_t a, size_t b) {
35
32
  return (a + b - 1) / b * b;
36
33
  }
37
34
 
38
35
  IndexIVFFastScan::IndexIVFFastScan(
39
- Index* quantizer,
40
- size_t d,
41
- size_t nlist,
42
- size_t code_size,
36
+ Index* quantizer_in,
37
+ size_t d_in,
38
+ size_t nlist_in,
39
+ size_t code_size_in,
43
40
  MetricType metric,
44
- bool own_invlists)
45
- : IndexIVF(quantizer, d, nlist, code_size, metric, own_invlists) {
41
+ bool own_invlists_in)
42
+ : IndexIVF(
43
+ quantizer_in,
44
+ d_in,
45
+ nlist_in,
46
+ code_size_in,
47
+ metric,
48
+ own_invlists_in) {
46
49
  // unlike other indexes, we prefer no residuals for performance reasons.
47
50
  by_residual = false;
48
51
  FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
@@ -56,35 +59,40 @@ IndexIVFFastScan::IndexIVFFastScan() {
56
59
  }
57
60
 
58
61
  void IndexIVFFastScan::init_fastscan(
59
- Quantizer* fine_quantizer,
60
- size_t M,
62
+ Quantizer* fine_quantizer_in,
63
+ size_t M_in,
61
64
  size_t nbits_init,
62
- size_t nlist,
65
+ size_t nlist_in,
63
66
  MetricType /* metric */,
64
67
  int bbs_2,
65
- bool own_invlists) {
68
+ bool own_invlists_in) {
66
69
  FAISS_THROW_IF_NOT(bbs_2 % 32 == 0);
67
70
  FAISS_THROW_IF_NOT(nbits_init == 4);
68
- FAISS_THROW_IF_NOT(fine_quantizer->d == d);
71
+ FAISS_THROW_IF_NOT(fine_quantizer_in->d == static_cast<size_t>(d));
69
72
 
70
- this->fine_quantizer = fine_quantizer;
71
- this->M = M;
73
+ this->fine_quantizer = fine_quantizer_in;
74
+ this->M = M_in;
72
75
  this->nbits = nbits_init;
73
76
  this->bbs = bbs_2;
74
77
  ksub = (1 << nbits_init);
75
- M2 = roundup(M, 2);
78
+ M2 = roundup(M_in, 2);
76
79
  code_size = M2 / 2;
77
- FAISS_THROW_IF_NOT(code_size == fine_quantizer->code_size);
80
+ FAISS_THROW_IF_NOT(code_size == fine_quantizer_in->code_size);
78
81
 
79
82
  is_trained = false;
80
- if (own_invlists) {
81
- replace_invlists(new BlockInvertedLists(nlist, get_CodePacker()), true);
83
+ if (own_invlists_in) {
84
+ replace_invlists(
85
+ new BlockInvertedLists(nlist_in, get_CodePacker()), true);
82
86
  }
83
87
  }
84
88
 
85
89
  void IndexIVFFastScan::init_code_packer() {
86
90
  auto bil = dynamic_cast<BlockInvertedLists*>(invlists);
87
- FAISS_THROW_IF_NOT(bil);
91
+ if (!bil) {
92
+ // invlists is not block-packed (e.g., when own_invlists=false).
93
+ // Nothing to do — the caller manages inverted lists externally.
94
+ return;
95
+ }
88
96
  delete bil->packer; // in case there was one before
89
97
  bil->packer = get_CodePacker();
90
98
  }
@@ -239,22 +247,22 @@ void estimators_from_tables_generic(
239
247
  int64_t* heap_ids,
240
248
  const FastScanDistancePostProcessing& context) {
241
249
  using accu_t = typename C::T;
242
- size_t nscale = context.norm_scaler ? context.norm_scaler->nscale : 0;
250
+ size_t nscale = context.pq2x4_scale ? 2 : 0;
243
251
  for (size_t j = 0; j < ncodes; ++j) {
244
252
  BitstringReader bsr(codes + j * index.code_size, index.code_size);
245
253
  accu_t dis = bias;
246
254
  const dis_t* __restrict dt = dis_table;
247
255
 
248
256
  for (size_t m = 0; m < index.M - nscale; m++) {
249
- uint64_t c = bsr.read(index.nbits);
257
+ uint64_t c = bsr.read(static_cast<int>(index.nbits));
250
258
  dis += dt[c];
251
259
  dt += index.ksub;
252
260
  }
253
261
 
254
- if (context.norm_scaler) {
262
+ if (nscale) {
255
263
  for (size_t m = 0; m < nscale; m++) {
256
- uint64_t c = bsr.read(index.nbits);
257
- dis += context.norm_scaler->scale_one(dt[c]);
264
+ uint64_t c = bsr.read(static_cast<int>(index.nbits));
265
+ dis += dt[c] * context.pq2x4_scale;
258
266
  dt += index.ksub;
259
267
  }
260
268
  }
@@ -285,33 +293,33 @@ void IndexIVFFastScan::compute_LUT_uint8(
285
293
  AlignedTable<float> biases_float;
286
294
 
287
295
  compute_LUT(n, x, cq, dis_tables_float, biases_float, context);
288
- size_t nprobe = cq.nprobe;
296
+ size_t cur_nprobe = cq.nprobe;
289
297
  bool lut_is_3d = lookup_table_is_3d();
290
298
  size_t dim123 = ksub * M;
291
299
  size_t dim123_2 = ksub * M2;
292
300
  if (lut_is_3d) {
293
- dim123 *= nprobe;
294
- dim123_2 *= nprobe;
301
+ dim123 *= cur_nprobe;
302
+ dim123_2 *= cur_nprobe;
295
303
  }
296
304
  dis_tables.resize(n * dim123_2);
297
305
  if (biases_float.get()) {
298
- biases.resize(n * nprobe);
306
+ biases.resize(n * cur_nprobe);
299
307
  }
300
308
 
301
309
  // OMP for MSVC requires i to have signed integral type
302
310
  #pragma omp parallel for if (n > 100)
303
- for (int64_t i = 0; i < n; i++) {
311
+ for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
304
312
  const float* t_in = dis_tables_float.get() + i * dim123;
305
313
  const float* b_in = nullptr;
306
314
  uint8_t* t_out = dis_tables.get() + i * dim123_2;
307
315
  uint16_t* b_out = nullptr;
308
316
  if (biases_float.get()) {
309
- b_in = biases_float.get() + i * nprobe;
310
- b_out = biases.get() + i * nprobe;
317
+ b_in = biases_float.get() + i * cur_nprobe;
318
+ b_out = biases.get() + i * cur_nprobe;
311
319
  }
312
320
 
313
321
  quantize_LUT_and_bias(
314
- nprobe,
322
+ cur_nprobe,
315
323
  M,
316
324
  ksub,
317
325
  lut_is_3d,
@@ -358,10 +366,14 @@ void IndexIVFFastScan::search_preassigned(
358
366
  bool store_pairs,
359
367
  const IVFSearchParameters* params,
360
368
  IndexIVFStats* stats) const {
361
- size_t nprobe = this->nprobe;
369
+ size_t cur_nprobe = this->nprobe;
362
370
  if (params) {
363
- FAISS_THROW_IF_NOT(params->max_codes == 0);
364
- nprobe = params->nprobe;
371
+ // Range-search-only option.
372
+ FAISS_THROW_IF_NOT_MSG(
373
+ params->max_empty_result_buckets == 0,
374
+ "max_empty_result_buckets is a range-search knob and is "
375
+ "not honored by fastscan knn search");
376
+ cur_nprobe = params->nprobe;
365
377
  }
366
378
 
367
379
  FAISS_THROW_IF_NOT_MSG(
@@ -370,7 +382,7 @@ void IndexIVFFastScan::search_preassigned(
370
382
  FAISS_THROW_IF_NOT(k > 0);
371
383
  FastScanDistancePostProcessing empty_context{};
372
384
 
373
- const CoarseQuantized cq = {nprobe, centroid_dis, assign};
385
+ const CoarseQuantized cq = {cur_nprobe, centroid_dis, assign};
374
386
  search_dispatch_implem(
375
387
  n, x, k, distances, labels, cq, empty_context, params);
376
388
  }
@@ -381,45 +393,35 @@ void IndexIVFFastScan::range_search(
381
393
  float radius,
382
394
  RangeSearchResult* result,
383
395
  const SearchParameters* params_in) const {
384
- size_t nprobe = this->nprobe;
396
+ size_t cur_nprobe = this->nprobe;
385
397
  const IVFSearchParameters* params = nullptr;
386
398
  if (params_in) {
387
399
  params = dynamic_cast<const IVFSearchParameters*>(params_in);
388
400
  FAISS_THROW_IF_NOT_MSG(
389
401
  params, "IndexIVFFastScan params have incorrect type");
390
- nprobe = params->nprobe;
402
+ // k-NN-only options.
403
+ FAISS_THROW_IF_NOT_MSG(
404
+ params->max_lists_num == 0,
405
+ "max_lists_num is a knn knob and is not honored by "
406
+ "fastscan range search");
407
+ FAISS_THROW_IF_NOT_MSG(
408
+ !params->ensure_topk_full,
409
+ "ensure_topk_full is a knn knob and is not honored by "
410
+ "fastscan range search");
411
+ FAISS_THROW_IF_NOT_MSG(
412
+ params->max_codes == 0,
413
+ "max_codes is not honored by fastscan range search");
414
+ cur_nprobe = params->nprobe;
391
415
  }
392
416
  FastScanDistancePostProcessing empty_context{};
393
417
 
394
- const CoarseQuantized cq = {nprobe, nullptr, nullptr};
418
+ const CoarseQuantized cq = {cur_nprobe, nullptr, nullptr};
395
419
  range_search_dispatch_implem(
396
420
  n, x, radius, *result, cq, empty_context, params);
397
421
  }
398
422
 
399
423
  namespace {
400
424
 
401
- template <class C>
402
- ResultHandlerCompare<C, true>* make_knn_handler_fixC(
403
- int impl,
404
- idx_t n,
405
- idx_t k,
406
- float* distances,
407
- idx_t* labels,
408
- const IDSelector* sel,
409
- const float* normalizers) {
410
- using HeapHC = HeapHandler<C, true>;
411
- using ReservoirHC = ReservoirHandler<C, true>;
412
- using SingleResultHC = SingleResultHandler<C, true>;
413
-
414
- if (k == 1) {
415
- return new SingleResultHC(n, 0, distances, labels, sel);
416
- } else if (impl % 2 == 0) {
417
- return new HeapHC(n, 0, k, distances, labels, sel, normalizers);
418
- } else /* if (impl % 2 == 1) */ {
419
- return new ReservoirHC(n, 0, k, 2 * k, distances, labels, sel);
420
- }
421
- }
422
-
423
425
  using CoarseQuantized = IndexIVFFastScan::CoarseQuantized;
424
426
 
425
427
  struct CoarseQuantizedWithBuffer : CoarseQuantized {
@@ -454,8 +456,8 @@ struct CoarseQuantizedWithBuffer : CoarseQuantized {
454
456
 
455
457
  struct CoarseQuantizedSlice : CoarseQuantizedWithBuffer {
456
458
  const size_t i0, i1;
457
- CoarseQuantizedSlice(const CoarseQuantized& cq, size_t i0, size_t i1)
458
- : CoarseQuantizedWithBuffer(cq), i0(i0), i1(i1) {
459
+ CoarseQuantizedSlice(const CoarseQuantized& cq, size_t i0_in, size_t i1_in)
460
+ : CoarseQuantizedWithBuffer(cq), i0(i0_in), i1(i1_in) {
459
461
  if (done()) {
460
462
  dis += nprobe * i0;
461
463
  ids += nprobe * i0;
@@ -473,20 +475,20 @@ struct CoarseQuantizedSlice : CoarseQuantizedWithBuffer {
473
475
  int compute_search_nslice(
474
476
  const IndexIVFFastScan* index,
475
477
  size_t n,
476
- size_t nprobe) {
478
+ size_t cur_nprobe) {
477
479
  int nslice;
478
- if (n <= omp_get_max_threads()) {
479
- nslice = n;
480
+ if (n <= static_cast<size_t>(omp_get_max_threads())) {
481
+ nslice = static_cast<int>(n);
480
482
  } else if (index->lookup_table_is_3d()) {
481
483
  // make sure we don't make too big LUT tables
482
- size_t lut_size_per_query = index->M * index->ksub * nprobe *
484
+ size_t lut_size_per_query = index->M * index->ksub * cur_nprobe *
483
485
  (sizeof(float) + sizeof(uint8_t));
484
486
 
485
487
  size_t max_lut_size = precomputed_table_max_bytes;
486
488
  // how many queries we can handle within mem budget
487
489
  size_t nq_ok = std::max(max_lut_size / lut_size_per_query, size_t(1));
488
- nslice = roundup(
489
- std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads());
490
+ nslice = static_cast<int>(roundup(
491
+ std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads()));
490
492
  } else {
491
493
  // LUTs unlikely to be a limiting factor
492
494
  nslice = omp_get_max_threads();
@@ -496,23 +498,25 @@ int compute_search_nslice(
496
498
 
497
499
  } // namespace
498
500
 
499
- SIMDResultHandlerToFloat* IndexIVFFastScan::make_knn_handler(
501
+ std::unique_ptr<FastScanCodeScanner> IndexIVFFastScan::make_knn_scanner(
500
502
  bool is_max,
501
- int impl,
502
503
  idx_t n,
503
504
  idx_t k,
504
505
  float* distances,
505
506
  idx_t* labels,
506
507
  const IDSelector* sel,
507
- const FastScanDistancePostProcessing&,
508
- const float* normalizers) const {
509
- if (is_max) {
510
- return make_knn_handler_fixC<CMax<uint16_t, int64_t>>(
511
- impl, n, k, distances, labels, sel, normalizers);
512
- } else {
513
- return make_knn_handler_fixC<CMin<uint16_t, int64_t>>(
514
- impl, n, k, distances, labels, sel, normalizers);
515
- }
508
+ int impl,
509
+ const FastScanDistancePostProcessing&) const {
510
+ return make_fast_scan_knn_scanner(
511
+ is_max,
512
+ impl,
513
+ n,
514
+ 0,
515
+ k,
516
+ distances,
517
+ labels,
518
+ sel,
519
+ /*with_id_map=*/true);
516
520
  }
517
521
 
518
522
  void IndexIVFFastScan::search_dispatch_implem(
@@ -524,13 +528,12 @@ void IndexIVFFastScan::search_dispatch_implem(
524
528
  const CoarseQuantized& cq_in,
525
529
  const FastScanDistancePostProcessing& context,
526
530
  const IVFSearchParameters* params) const {
527
- const idx_t nprobe = params ? params->nprobe : this->nprobe;
531
+ const idx_t cur_nprobe = params ? params->nprobe : this->nprobe;
528
532
  const IDSelector* sel = (params) ? params->sel : nullptr;
529
533
  const SearchParameters* quantizer_params =
530
534
  params ? params->quantizer_params : nullptr;
531
535
 
532
536
  bool is_max = !is_similarity_metric(metric_type);
533
- using RH = SIMDResultHandlerToFloat;
534
537
 
535
538
  if (n == 0) {
536
539
  return;
@@ -539,8 +542,14 @@ void IndexIVFFastScan::search_dispatch_implem(
539
542
  // actual implementation used
540
543
  int impl = implem;
541
544
 
545
+ // Early-stop k-NN options require the per-query implementations.
546
+ const bool any_early_term_knob = params &&
547
+ (params->max_codes != 0 || params->max_lists_num != 0 ||
548
+ params->ensure_topk_full);
549
+
542
550
  if (impl == 0) {
543
- if (bbs == 32) {
551
+ // Auto-select the per-query path when early-stop budgets are used.
552
+ if (bbs == 32 && !any_early_term_knob) {
544
553
  impl = 12;
545
554
  } else {
546
555
  impl = 10;
@@ -557,15 +566,24 @@ void IndexIVFFastScan::search_dispatch_implem(
557
566
  impl -= 100;
558
567
  }
559
568
 
569
+ if (any_early_term_knob) {
570
+ FAISS_THROW_IF_NOT_MSG(
571
+ impl == 10 || impl == 11,
572
+ "max_codes / max_lists_num / ensure_topk_full are only "
573
+ "supported by IndexIVFFastScan implem 10/11; set "
574
+ "index.implem = 10 (or 11 for k>20) explicitly, or leave it "
575
+ "at the default 0");
576
+ }
577
+
560
578
  CoarseQuantizedWithBuffer cq(cq_in);
561
- cq.nprobe = nprobe;
579
+ cq.nprobe = cur_nprobe;
562
580
 
563
581
  if (!cq.done() && !multiple_threads) {
564
582
  // we do the coarse quantization here execpt when search is
565
583
  // sliced over threads (then it is more efficient to have each thread do
566
584
  // its own coarse quantization)
567
585
  cq.quantize(quantizer, n, x, quantizer_params);
568
- invlists->prefetch_lists(cq.ids, n * cq.nprobe);
586
+ invlists->prefetch_lists(cq.ids, static_cast<int>(n * cq.nprobe));
569
587
  }
570
588
 
571
589
  if (impl == 1) {
@@ -588,43 +606,38 @@ void IndexIVFFastScan::search_dispatch_implem(
588
606
  size_t ndis = 0, nlist_visited = 0;
589
607
 
590
608
  if (!multiple_threads) {
591
- // clang-format off
592
- if (impl == 12 || impl == 13) {
593
- std::unique_ptr<RH> handler(
594
- static_cast<RH*>(this->make_knn_handler(
595
- is_max,
596
- impl,
597
- n,
598
- k,
599
- distances,
600
- labels,
601
- sel,
602
- context))
603
- );
604
- search_implem_12(
605
- n, x, *handler.get(),
606
- cq, &ndis, &nlist_visited, context, params);
607
- } else if (impl == 14 || impl == 15) {
609
+ if (impl == 14 || impl == 15) {
608
610
  search_implem_14(
609
- n, x, k, distances, labels,
610
- cq, impl, context, params);
611
+ n, x, k, distances, labels, cq, impl, context, params);
611
612
  } else {
612
- std::unique_ptr<RH> handler(
613
- static_cast<RH*>(this->make_knn_handler(
614
- is_max,
615
- impl,
616
- n,
617
- k,
618
- distances,
619
- labels,
620
- sel,
621
- context))
622
- );
623
- search_implem_10(
624
- n, x, *handler.get(), cq,
625
- &ndis, &nlist_visited, context, params);
613
+ auto scanner = make_knn_scanner(
614
+ is_max, n, k, distances, labels, sel, impl, context);
615
+ auto* handler = scanner->handler();
616
+ if (impl == 12 || impl == 13) {
617
+ search_implem_12(
618
+ n,
619
+ x,
620
+ *handler,
621
+ cq,
622
+ &ndis,
623
+ &nlist_visited,
624
+ context,
625
+ params,
626
+ *scanner);
627
+ } else {
628
+ search_implem_10(
629
+ n,
630
+ x,
631
+ k,
632
+ *handler,
633
+ cq,
634
+ &ndis,
635
+ &nlist_visited,
636
+ context,
637
+ params,
638
+ *scanner);
639
+ }
626
640
  }
627
- // clang-format on
628
641
  } else {
629
642
  // explicitly slice over threads
630
643
  int nslice = compute_search_nslice(this, n, cq.nprobe);
@@ -649,30 +662,43 @@ void IndexIVFFastScan::search_dispatch_implem(
649
662
  // pointer
650
663
  FastScanDistancePostProcessing thread_context = context;
651
664
  if (thread_context.query_factors != nullptr) {
652
- thread_context.query_factors += i0 * nprobe;
665
+ thread_context.query_factors += i0 * cur_nprobe;
653
666
  }
654
667
 
655
- std::unique_ptr<RH> handler(
656
- static_cast<RH*>(this->make_knn_handler(
657
- is_max,
658
- impl,
659
- i1 - i0,
660
- k,
661
- dis_i,
662
- lab_i,
663
- sel,
664
- thread_context)));
665
- // clang-format off
668
+ auto scanner = make_knn_scanner(
669
+ is_max,
670
+ i1 - i0,
671
+ k,
672
+ dis_i,
673
+ lab_i,
674
+ sel,
675
+ impl,
676
+ thread_context);
677
+ auto* handler = scanner->handler();
666
678
  if (impl == 12 || impl == 13) {
667
679
  search_implem_12(
668
- i1 - i0, x + i0 * d, *handler.get(),
669
- cq_i, &ndis, &nlist_visited, thread_context, params);
680
+ i1 - i0,
681
+ x + i0 * d,
682
+ *handler,
683
+ cq_i,
684
+ &ndis,
685
+ &nlist_visited,
686
+ thread_context,
687
+ params,
688
+ *scanner);
670
689
  } else {
671
690
  search_implem_10(
672
- i1 - i0, x + i0 * d, *handler.get(),
673
- cq_i, &ndis, &nlist_visited, thread_context, params);
691
+ i1 - i0,
692
+ x + i0 * d,
693
+ k,
694
+ *handler,
695
+ cq_i,
696
+ &ndis,
697
+ &nlist_visited,
698
+ thread_context,
699
+ params,
700
+ *scanner);
674
701
  }
675
- // clang-format on
676
702
  }
677
703
  }
678
704
  }
@@ -702,11 +728,23 @@ void IndexIVFFastScan::range_search_dispatch_implem(
702
728
  if (n == 0) {
703
729
  return;
704
730
  }
731
+ // FastScan range early-stop budget: enabled only for ordered per-query
732
+ // scanning below.
733
+ const bool use_empty_result_early_exit =
734
+ params && params->max_empty_result_buckets != 0;
735
+ const int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT;
736
+ FAISS_THROW_IF_NOT_MSG(
737
+ !use_empty_result_early_exit || pmode == 0,
738
+ "max_empty_result_buckets supported only for parallel_mode = 0");
739
+
705
740
  // actual implementation used
706
741
  int impl = implem;
707
742
 
708
743
  if (impl == 0) {
709
- if (bbs == 32) {
744
+ if (use_empty_result_early_exit) {
745
+ // Empty-bucket early stop needs per-query probe order.
746
+ impl = 10;
747
+ } else if (bbs == 32) {
710
748
  impl = 12;
711
749
  } else {
712
750
  impl = 10;
@@ -722,28 +760,44 @@ void IndexIVFFastScan::range_search_dispatch_implem(
722
760
  impl -= 100;
723
761
  }
724
762
 
763
+ FAISS_THROW_IF_NOT_MSG(
764
+ !use_empty_result_early_exit || impl == 10,
765
+ "max_empty_result_buckets is only supported by "
766
+ "IndexIVFFastScan range-search implem 10");
767
+
725
768
  if (!multiple_threads && !cq.done()) {
726
769
  cq.quantize(quantizer, n, x, quantizer_params);
727
- invlists->prefetch_lists(cq.ids, n * cq.nprobe);
770
+ invlists->prefetch_lists(cq.ids, static_cast<int>(n * cq.nprobe));
728
771
  }
729
772
 
730
773
  size_t ndis = 0, nlist_visited = 0;
731
774
 
732
775
  if (!multiple_threads) { // single thread
733
- std::unique_ptr<SIMDResultHandlerToFloat> handler;
734
- if (is_max) {
735
- handler.reset(new RangeHandler<CMax<uint16_t, int64_t>, true>(
736
- rres, radius, 0, sel));
737
- } else {
738
- handler.reset(new RangeHandler<CMin<uint16_t, int64_t>, true>(
739
- rres, radius, 0, sel));
740
- }
776
+ auto scanner = make_range_scanner(is_max, rres, radius, 0, sel);
777
+ auto* handler = scanner->handler();
741
778
  if (impl == 12) {
742
779
  search_implem_12(
743
- n, x, *handler.get(), cq, &ndis, &nlist_visited, context);
780
+ n,
781
+ x,
782
+ *handler,
783
+ cq,
784
+ &ndis,
785
+ &nlist_visited,
786
+ context,
787
+ nullptr,
788
+ *scanner);
744
789
  } else if (impl == 10) {
745
790
  search_implem_10(
746
- n, x, *handler.get(), cq, &ndis, &nlist_visited, context);
791
+ n,
792
+ x,
793
+ /*k=*/0, // range search has no k
794
+ *handler,
795
+ cq,
796
+ &ndis,
797
+ &nlist_visited,
798
+ context,
799
+ params,
800
+ *scanner);
747
801
  } else {
748
802
  FAISS_THROW_FMT("Range search implem %d not implemented", impl);
749
803
  }
@@ -762,35 +816,33 @@ void IndexIVFFastScan::range_search_dispatch_implem(
762
816
  if (!cq_i.done()) {
763
817
  cq_i.quantize_slice(quantizer, x, quantizer_params);
764
818
  }
765
- std::unique_ptr<SIMDResultHandlerToFloat> handler;
766
- if (is_max) {
767
- handler.reset(new PartialRangeHandler<
768
- CMax<uint16_t, int64_t>,
769
- true>(pres, radius, 0, i0, i1, sel));
770
- } else {
771
- handler.reset(new PartialRangeHandler<
772
- CMin<uint16_t, int64_t>,
773
- true>(pres, radius, 0, i0, i1, sel));
774
- }
819
+ auto scanner = make_partial_range_scanner(
820
+ is_max, pres, radius, 0, i0, i1, sel);
821
+ auto* handler = scanner->handler();
775
822
 
776
823
  if (impl == 12 || impl == 13) {
777
824
  search_implem_12(
778
825
  i1 - i0,
779
826
  x + i0 * d,
780
- *handler.get(),
827
+ *handler,
781
828
  cq_i,
782
829
  &ndis,
783
830
  &nlist_visited,
784
- context);
831
+ context,
832
+ nullptr,
833
+ *scanner);
785
834
  } else {
786
835
  search_implem_10(
787
836
  i1 - i0,
788
837
  x + i0 * d,
789
- *handler.get(),
838
+ /*k=*/0,
839
+ *handler,
790
840
  cq_i,
791
841
  &ndis,
792
842
  &nlist_visited,
793
- context);
843
+ context,
844
+ params,
845
+ *scanner);
794
846
  }
795
847
  }
796
848
  pres.finalize();
@@ -811,7 +863,7 @@ void IndexIVFFastScan::search_implem_1(
811
863
  idx_t* labels,
812
864
  const CoarseQuantized& cq,
813
865
  const FastScanDistancePostProcessing& context,
814
- const IVFSearchParameters* params) const {
866
+ const IVFSearchParameters* /* params */) const {
815
867
  FAISS_THROW_IF_NOT(orig_invlists);
816
868
 
817
869
  size_t dim12 = ksub * M;
@@ -824,7 +876,7 @@ void IndexIVFFastScan::search_implem_1(
824
876
  bool single_LUT = !lookup_table_is_3d();
825
877
 
826
878
  size_t ndis = 0, nlist_visited = 0;
827
- size_t nprobe = cq.nprobe;
879
+ size_t cur_nprobe = cq.nprobe;
828
880
  #pragma omp parallel for reduction(+ : ndis, nlist_visited)
829
881
  for (idx_t i = 0; i < n; i++) {
830
882
  int64_t* heap_ids = labels + i * k;
@@ -835,11 +887,11 @@ void IndexIVFFastScan::search_implem_1(
835
887
  if (single_LUT) {
836
888
  LUT = dis_tables.get() + i * dim12;
837
889
  }
838
- for (idx_t j = 0; j < nprobe; j++) {
890
+ for (size_t j = 0; j < cur_nprobe; j++) {
839
891
  if (!single_LUT) {
840
- LUT = dis_tables.get() + (i * nprobe + j) * dim12;
892
+ LUT = dis_tables.get() + (i * cur_nprobe + j) * dim12;
841
893
  }
842
- idx_t list_no = cq.ids[i * nprobe + j];
894
+ idx_t list_no = cq.ids[i * cur_nprobe + j];
843
895
  if (list_no < 0) {
844
896
  continue;
845
897
  }
@@ -850,7 +902,7 @@ void IndexIVFFastScan::search_implem_1(
850
902
  InvertedLists::ScopedCodes codes(orig_invlists, list_no);
851
903
  InvertedLists::ScopedIds ids(orig_invlists, list_no);
852
904
 
853
- float bias = biases.get() ? biases[i * nprobe + j] : 0;
905
+ float bias = biases.get() ? biases[i * cur_nprobe + j] : 0;
854
906
 
855
907
  estimators_from_tables_generic<C>(
856
908
  *this,
@@ -882,7 +934,7 @@ void IndexIVFFastScan::search_implem_2(
882
934
  idx_t* labels,
883
935
  const CoarseQuantized& cq,
884
936
  const FastScanDistancePostProcessing& context,
885
- const IVFSearchParameters* params) const {
937
+ const IVFSearchParameters* /* params */) const {
886
938
  FAISS_THROW_IF_NOT(orig_invlists);
887
939
 
888
940
  size_t dim12 = ksub * M2;
@@ -895,7 +947,7 @@ void IndexIVFFastScan::search_implem_2(
895
947
  bool single_LUT = !lookup_table_is_3d();
896
948
 
897
949
  size_t ndis = 0, nlist_visited = 0;
898
- size_t nprobe = cq.nprobe;
950
+ size_t cur_nprobe = cq.nprobe;
899
951
 
900
952
  #pragma omp parallel for reduction(+ : ndis, nlist_visited)
901
953
  for (idx_t i = 0; i < n; i++) {
@@ -908,11 +960,11 @@ void IndexIVFFastScan::search_implem_2(
908
960
  if (single_LUT) {
909
961
  LUT = dis_tables.get() + i * dim12;
910
962
  }
911
- for (idx_t j = 0; j < nprobe; j++) {
963
+ for (size_t j = 0; j < cur_nprobe; j++) {
912
964
  if (!single_LUT) {
913
- LUT = dis_tables.get() + (i * nprobe + j) * dim12;
965
+ LUT = dis_tables.get() + (i * cur_nprobe + j) * dim12;
914
966
  }
915
- idx_t list_no = cq.ids[i * nprobe + j];
967
+ idx_t list_no = cq.ids[i * cur_nprobe + j];
916
968
  if (list_no < 0) {
917
969
  continue;
918
970
  }
@@ -923,7 +975,7 @@ void IndexIVFFastScan::search_implem_2(
923
975
  InvertedLists::ScopedCodes codes(orig_invlists, list_no);
924
976
  InvertedLists::ScopedIds ids(orig_invlists, list_no);
925
977
 
926
- uint16_t bias = biases.get() ? biases[i * nprobe + j] : 0;
978
+ uint16_t bias = biases.get() ? biases[i * cur_nprobe + j] : 0;
927
979
 
928
980
  estimators_from_tables_generic<C>(
929
981
  *this,
@@ -962,12 +1014,14 @@ void IndexIVFFastScan::search_implem_2(
962
1014
  void IndexIVFFastScan::search_implem_10(
963
1015
  idx_t n,
964
1016
  const float* x,
1017
+ idx_t k,
965
1018
  SIMDResultHandlerToFloat& handler,
966
1019
  const CoarseQuantized& cq,
967
1020
  size_t* ndis_out,
968
1021
  size_t* nlist_out,
969
1022
  const FastScanDistancePostProcessing& context,
970
- const IVFSearchParameters* /* params */) const {
1023
+ const IVFSearchParameters* params,
1024
+ FastScanCodeScanner& scanner) const {
971
1025
  size_t dim12 = ksub * M2;
972
1026
  AlignedTable<uint8_t> dis_tables;
973
1027
  AlignedTable<uint16_t> biases;
@@ -981,7 +1035,28 @@ void IndexIVFFastScan::search_implem_10(
981
1035
  int qmap1[1];
982
1036
  handler.q_map = qmap1;
983
1037
  handler.begin(skip & 16 ? nullptr : normalizers.get());
984
- size_t nprobe = cq.nprobe;
1038
+ size_t cur_nprobe = cq.nprobe;
1039
+
1040
+ // Per-query early-stop options from SearchParametersIVF.
1041
+ const size_t param_max_codes = params ? params->max_codes : 0;
1042
+ const size_t param_max_lists_num = params ? params->max_lists_num : 0;
1043
+ const bool ensure_topk_full = params ? params->ensure_topk_full : false;
1044
+ const size_t cur_max_codes = (param_max_codes == 0)
1045
+ ? std::numeric_limits<size_t>::max()
1046
+ : param_max_codes;
1047
+ const size_t cur_max_lists_num =
1048
+ (param_max_lists_num == 0) ? cur_nprobe : param_max_lists_num;
1049
+ // Effective budgets are the values tested in the probe loop below.
1050
+ // ensure_topk_full raises small budgets to reduce empty result slots.
1051
+ const size_t effective_max_codes = ensure_topk_full
1052
+ ? std::max(cur_max_codes, (size_t)k)
1053
+ : cur_max_codes;
1054
+ const size_t effective_max_lists_num = ensure_topk_full
1055
+ ? std::max(cur_max_lists_num, (size_t)k)
1056
+ : cur_max_lists_num;
1057
+ const bool is_range_search = k == 0;
1058
+ const size_t max_empty_result_buckets =
1059
+ (is_range_search && params) ? params->max_empty_result_buckets : 0;
985
1060
 
986
1061
  // Allocate probe_map once and reuse it
987
1062
  std::vector<int> probe_map;
@@ -989,13 +1064,30 @@ void IndexIVFFastScan::search_implem_10(
989
1064
 
990
1065
  for (idx_t i = 0; i < n; i++) {
991
1066
  const uint8_t* LUT = nullptr;
992
- qmap1[0] = i;
1067
+ qmap1[0] = static_cast<int>(i);
993
1068
 
994
1069
  if (single_LUT) {
995
1070
  LUT = dis_tables.get() + i * dim12;
996
1071
  }
997
- for (idx_t j = 0; j < nprobe; j++) {
998
- size_t ij = i * nprobe + j;
1072
+ // Per-query counters. For k-NN, the handler count excludes rows
1073
+ // filtered by IDSelector.
1074
+ const size_t scan0 = handler.count_scanned_rows();
1075
+ size_t nscan_q = 0;
1076
+ size_t nlists_visited_q = 0;
1077
+ size_t nempty_result_buckets = 0;
1078
+ for (size_t j = 0; j < cur_nprobe; j++) {
1079
+ if (!is_range_search) {
1080
+ nscan_q = handler.count_scanned_rows() - scan0;
1081
+ }
1082
+ // Early-stop check: apply k-NN max_codes/max_lists_num before
1083
+ // starting the next list. nscan_q excludes IDSelector-filtered
1084
+ // rows.
1085
+ if (nscan_q >= effective_max_codes ||
1086
+ nlists_visited_q >= effective_max_lists_num) {
1087
+ break;
1088
+ }
1089
+ const size_t prev_in_range_num = handler.in_range_num;
1090
+ size_t ij = i * cur_nprobe + j;
999
1091
  if (!single_LUT) {
1000
1092
  LUT = dis_tables.get() + ij * dim12;
1001
1093
  }
@@ -1005,10 +1097,22 @@ void IndexIVFFastScan::search_implem_10(
1005
1097
 
1006
1098
  idx_t list_no = cq.ids[ij];
1007
1099
  if (list_no < 0) {
1100
+ // Early-stop check: invalid probes count as empty range
1101
+ // buckets.
1102
+ if (max_empty_result_buckets > 0 &&
1103
+ ++nempty_result_buckets >= max_empty_result_buckets) {
1104
+ break;
1105
+ }
1008
1106
  continue;
1009
1107
  }
1010
1108
  size_t ls = invlists->list_size(list_no);
1011
1109
  if (ls == 0) {
1110
+ // Early-stop check: empty inverted lists count as empty range
1111
+ // buckets.
1112
+ if (max_empty_result_buckets > 0 &&
1113
+ ++nempty_result_buckets >= max_empty_result_buckets) {
1114
+ break;
1115
+ }
1012
1116
  continue;
1013
1117
  }
1014
1118
 
@@ -1023,19 +1127,35 @@ void IndexIVFFastScan::search_implem_10(
1023
1127
  probe_map[0] = static_cast<int>(j);
1024
1128
  handler.set_list_context(list_no, probe_map);
1025
1129
 
1026
- pq4_accumulate_loop(
1130
+ scanner.accumulate_loop(
1027
1131
  1,
1028
1132
  roundup(ls, bbs),
1029
1133
  bbs,
1030
1134
  M2,
1031
1135
  codes.get(),
1032
1136
  LUT,
1033
- handler,
1034
- context.norm_scaler,
1137
+ context.pq2x4_scale,
1035
1138
  get_block_stride());
1036
1139
 
1037
1140
  ndis += ls;
1038
1141
  nlist_visited++;
1142
+ if (is_range_search) {
1143
+ nscan_q += ls;
1144
+ }
1145
+ nlists_visited_q++;
1146
+
1147
+ if (max_empty_result_buckets > 0) {
1148
+ // Early-stop check: apply the range-search empty-bucket
1149
+ // budget after each visited list; any hit resets the counter.
1150
+ if (handler.in_range_num == prev_in_range_num) {
1151
+ nempty_result_buckets++;
1152
+ if (nempty_result_buckets >= max_empty_result_buckets) {
1153
+ break;
1154
+ }
1155
+ } else {
1156
+ nempty_result_buckets = 0;
1157
+ }
1158
+ }
1039
1159
  }
1040
1160
  }
1041
1161
 
@@ -1052,7 +1172,8 @@ void IndexIVFFastScan::search_implem_12(
1052
1172
  size_t* ndis_out,
1053
1173
  size_t* nlist_out,
1054
1174
  const FastScanDistancePostProcessing& context,
1055
- const IVFSearchParameters* /* params */) const {
1175
+ const IVFSearchParameters* /* params */,
1176
+ FastScanCodeScanner& scanner) const {
1056
1177
  if (n == 0) { // does not work well with reservoir
1057
1178
  return;
1058
1179
  }
@@ -1073,15 +1194,15 @@ void IndexIVFFastScan::search_implem_12(
1073
1194
  int rank; // this is the rank'th result of the coarse quantizer
1074
1195
  };
1075
1196
  bool single_LUT = !lookup_table_is_3d();
1076
- size_t nprobe = cq.nprobe;
1197
+ size_t cur_nprobe = cq.nprobe;
1077
1198
 
1078
1199
  std::vector<QC> qcs;
1079
1200
  {
1080
- int ij = 0;
1081
- for (int i = 0; i < n; i++) {
1082
- for (int j = 0; j < nprobe; j++) {
1201
+ size_t ij = 0;
1202
+ for (idx_t i = 0; i < n; i++) {
1203
+ for (size_t j = 0; j < cur_nprobe; j++) {
1083
1204
  if (cq.ids[ij] >= 0) {
1084
- qcs.push_back(QC{i, int(cq.ids[ij]), int(j)});
1205
+ qcs.push_back(QC{int(i), int(cq.ids[ij]), int(j)});
1085
1206
  }
1086
1207
  ij++;
1087
1208
  }
@@ -1093,7 +1214,7 @@ void IndexIVFFastScan::search_implem_12(
1093
1214
 
1094
1215
  // prepare the result handlers
1095
1216
 
1096
- int actual_qbs2 = this->qbs2 ? this->qbs2 : 11;
1217
+ int actual_qbs2 = static_cast<int>(this->qbs2 ? this->qbs2 : 11);
1097
1218
 
1098
1219
  std::vector<uint16_t> tmp_bias;
1099
1220
  if (biases.get()) {
@@ -1130,7 +1251,7 @@ void IndexIVFFastScan::search_implem_12(
1130
1251
  nlist_visited++;
1131
1252
 
1132
1253
  // re-organize LUTs and biases into the right order
1133
- int nc = i1 - i0;
1254
+ int nc = static_cast<int>(i1 - i0);
1134
1255
 
1135
1256
  std::vector<int> q_map(nc), lut_entries(nc);
1136
1257
  AlignedTable<uint8_t> LUT(nc * dim12);
@@ -1140,7 +1261,7 @@ void IndexIVFFastScan::search_implem_12(
1140
1261
  for (size_t i = i0; i < i1; i++) {
1141
1262
  const QC& qc = qcs[i];
1142
1263
  q_map[i - i0] = qc.qno;
1143
- int ij = qc.qno * nprobe + qc.rank;
1264
+ int ij = static_cast<int>(qc.qno * cur_nprobe + qc.rank);
1144
1265
  lut_entries[i - i0] = single_LUT ? qc.qno : ij;
1145
1266
  if (biases.get()) {
1146
1267
  tmp_bias[i - i0] = biases[ij];
@@ -1148,7 +1269,7 @@ void IndexIVFFastScan::search_implem_12(
1148
1269
  }
1149
1270
  pq4_pack_LUT_qbs_q_map(
1150
1271
  qbs_for_list,
1151
- M2,
1272
+ static_cast<int>(M2),
1152
1273
  dis_tables.get(),
1153
1274
  lut_entries.data(),
1154
1275
  LUT.get());
@@ -1176,14 +1297,13 @@ void IndexIVFFastScan::search_implem_12(
1176
1297
  }
1177
1298
  handler.set_list_context(list_no, probe_map);
1178
1299
 
1179
- pq4_accumulate_loop_qbs(
1300
+ scanner.accumulate_loop_qbs(
1180
1301
  qbs_for_list,
1181
1302
  list_size,
1182
- M2,
1303
+ static_cast<int>(M2),
1183
1304
  codes.get(),
1184
1305
  LUT.get(),
1185
- handler,
1186
- context.norm_scaler,
1306
+ context.pq2x4_scale,
1187
1307
  get_block_stride());
1188
1308
  // prepare for next loop
1189
1309
  i0 = i1;
@@ -1229,15 +1349,15 @@ void IndexIVFFastScan::search_implem_14(
1229
1349
  int rank; // this is the rank'th result of the coarse quantizer
1230
1350
  };
1231
1351
  bool single_LUT = !lookup_table_is_3d();
1232
- size_t nprobe = cq.nprobe;
1352
+ size_t cur_nprobe = cq.nprobe;
1233
1353
 
1234
1354
  std::vector<QC> qcs;
1235
1355
  {
1236
- int ij = 0;
1237
- for (int i = 0; i < n; i++) {
1238
- for (int j = 0; j < nprobe; j++) {
1356
+ size_t ij = 0;
1357
+ for (idx_t i = 0; i < n; i++) {
1358
+ for (size_t j = 0; j < cur_nprobe; j++) {
1239
1359
  if (cq.ids[ij] >= 0) {
1240
- qcs.push_back(QC{i, int(cq.ids[ij]), int(j)});
1360
+ qcs.push_back(QC{int(i), int(cq.ids[ij]), int(j)});
1241
1361
  }
1242
1362
  ij++;
1243
1363
  }
@@ -1316,25 +1436,24 @@ void IndexIVFFastScan::search_implem_14(
1316
1436
  std::vector<idx_t> local_idx(k * n);
1317
1437
  std::vector<float> local_dis(k * n);
1318
1438
 
1319
- // prepare the result handlers
1320
- std::unique_ptr<SIMDResultHandlerToFloat> handler(
1321
- this->make_knn_handler(
1322
- is_max,
1323
- impl,
1324
- n,
1325
- k,
1326
- local_dis.data(),
1327
- local_idx.data(),
1328
- sel,
1329
- context));
1330
- handler->begin(normalizers.get());
1331
-
1332
- int actual_qbs2 = this->qbs2 ? this->qbs2 : 11;
1439
+ auto scanner = make_knn_scanner(
1440
+ is_max,
1441
+ n,
1442
+ k,
1443
+ local_dis.data(),
1444
+ local_idx.data(),
1445
+ sel,
1446
+ impl,
1447
+ context);
1448
+ SIMDResultHandlerToFloat* handler_ptr = scanner->handler();
1449
+ handler_ptr->begin(normalizers.get());
1450
+
1451
+ int actual_qbs2 = static_cast<int>(this->qbs2 ? this->qbs2 : 11);
1333
1452
 
1334
1453
  std::vector<uint16_t> tmp_bias;
1335
1454
  if (biases.get()) {
1336
1455
  tmp_bias.resize(actual_qbs2);
1337
- handler->dbias = tmp_bias.data();
1456
+ handler_ptr->dbias = tmp_bias.data();
1338
1457
  }
1339
1458
 
1340
1459
  std::set<int> q_set;
@@ -1345,7 +1464,8 @@ void IndexIVFFastScan::search_implem_14(
1345
1464
  probe_map.reserve(actual_qbs2);
1346
1465
 
1347
1466
  #pragma omp for schedule(dynamic)
1348
- for (idx_t cluster = 0; cluster < ses.size(); cluster++) {
1467
+ for (idx_t cluster = 0; cluster < static_cast<idx_t>(ses.size());
1468
+ cluster++) {
1349
1469
  size_t i0 = ses[cluster].start;
1350
1470
  size_t i1 = ses[cluster].end;
1351
1471
  size_t list_size = ses[cluster].list_size;
@@ -1353,7 +1473,7 @@ void IndexIVFFastScan::search_implem_14(
1353
1473
  int list_no = qcs[i0].list_no;
1354
1474
 
1355
1475
  // re-organize LUTs and biases into the right order
1356
- int nc = i1 - i0;
1476
+ int nc = static_cast<int>(i1 - i0);
1357
1477
 
1358
1478
  std::vector<int> q_map(nc), lut_entries(nc);
1359
1479
  AlignedTable<uint8_t> LUT(nc * dim12);
@@ -1364,7 +1484,7 @@ void IndexIVFFastScan::search_implem_14(
1364
1484
  const QC& qc = qcs[i];
1365
1485
  q_map[i - i0] = qc.qno;
1366
1486
  q_set.insert(qc.qno);
1367
- int ij = qc.qno * nprobe + qc.rank;
1487
+ int ij = static_cast<int>(qc.qno * cur_nprobe + qc.rank);
1368
1488
  lut_entries[i - i0] = single_LUT ? qc.qno : ij;
1369
1489
  if (biases.get()) {
1370
1490
  tmp_bias[i - i0] = biases[ij];
@@ -1372,7 +1492,7 @@ void IndexIVFFastScan::search_implem_14(
1372
1492
  }
1373
1493
  pq4_pack_LUT_qbs_q_map(
1374
1494
  qbs_for_list,
1375
- M2,
1495
+ static_cast<int>(M2),
1376
1496
  dis_tables.get(),
1377
1497
  lut_entries.data(),
1378
1498
  LUT.get());
@@ -1386,9 +1506,9 @@ void IndexIVFFastScan::search_implem_14(
1386
1506
 
1387
1507
  // prepare the handler
1388
1508
 
1389
- handler->ntotal = list_size;
1390
- handler->q_map = q_map.data();
1391
- handler->id_map = ids.get();
1509
+ handler_ptr->ntotal = list_size;
1510
+ handler_ptr->q_map = q_map.data();
1511
+ handler_ptr->id_map = ids.get();
1392
1512
 
1393
1513
  // Set context information for handlers that need additional data
1394
1514
  // All queries in this batch access the same list_no, but each
@@ -1398,21 +1518,20 @@ void IndexIVFFastScan::search_implem_14(
1398
1518
  const QC& qc = qcs[i];
1399
1519
  probe_map[i - i0] = qc.rank;
1400
1520
  }
1401
- handler->set_list_context(list_no, probe_map);
1521
+ handler_ptr->set_list_context(list_no, probe_map);
1402
1522
 
1403
- pq4_accumulate_loop_qbs(
1523
+ scanner->accumulate_loop_qbs(
1404
1524
  qbs_for_list,
1405
1525
  list_size,
1406
- M2,
1526
+ static_cast<int>(M2),
1407
1527
  codes.get(),
1408
1528
  LUT.get(),
1409
- *handler.get(),
1410
- context.norm_scaler,
1529
+ context.pq2x4_scale,
1411
1530
  get_block_stride());
1412
1531
  }
1413
1532
 
1414
1533
  // labels is in-place for HeapHC
1415
- handler->end();
1534
+ handler_ptr->end();
1416
1535
 
1417
1536
  // merge per-thread results
1418
1537
  #pragma omp single
@@ -1466,7 +1585,7 @@ void IndexIVFFastScan::reconstruct_from_offset(
1466
1585
  for (size_t m = 0; m < M; m++) {
1467
1586
  uint8_t c =
1468
1587
  pq4_get_packed_element(list_codes.get(), bbs, M2, offset, m);
1469
- bsw.write(c, nbits);
1588
+ bsw.write(c, static_cast<int>(nbits));
1470
1589
  }
1471
1590
 
1472
1591
  sa_decode(1, code.data(), recons);
@@ -1477,7 +1596,7 @@ void IndexIVFFastScan::reconstruct_orig_invlists() {
1477
1596
  FAISS_THROW_IF_NOT(orig_invlists->list_size(0) == 0);
1478
1597
 
1479
1598
  #pragma omp parallel for if (nlist > 100)
1480
- for (idx_t list_no = 0; list_no < nlist; list_no++) {
1599
+ for (idx_t list_no = 0; list_no < static_cast<idx_t>(nlist); list_no++) {
1481
1600
  InvertedLists::ScopedCodes codes(invlists, list_no);
1482
1601
  InvertedLists::ScopedIds ids(invlists, list_no);
1483
1602
  size_t list_size = invlists->list_size(list_no);
@@ -1489,7 +1608,7 @@ void IndexIVFFastScan::reconstruct_orig_invlists() {
1489
1608
  for (size_t m = 0; m < M; m++) {
1490
1609
  uint8_t c =
1491
1610
  pq4_get_packed_element(codes.get(), bbs, M2, offset, m);
1492
- bsw.write(c, nbits);
1611
+ bsw.write(c, static_cast<int>(nbits));
1493
1612
  }
1494
1613
 
1495
1614
  // get id
@@ -1516,7 +1635,7 @@ void IndexIVFFastScan::sa_decode(idx_t n, const uint8_t* codes, float* x)
1516
1635
  fine_quantizer->decode(code + coarse_size, xi, 1);
1517
1636
  if (by_residual) {
1518
1637
  quantizer->reconstruct(list_no, residual.data());
1519
- for (size_t j = 0; j < d; j++) {
1638
+ for (int j = 0; j < d; j++) {
1520
1639
  xi[j] += residual[j];
1521
1640
  }
1522
1641
  }