faiss 0.5.3 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (379) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/ext/faiss/ext.cpp +1 -1
  4. data/ext/faiss/extconf.rb +4 -4
  5. data/ext/faiss/index.cpp +63 -45
  6. data/ext/faiss/index_binary.cpp +37 -27
  7. data/ext/faiss/kmeans.cpp +9 -8
  8. data/ext/faiss/pca_matrix.cpp +9 -7
  9. data/ext/faiss/product_quantizer.cpp +13 -11
  10. data/ext/faiss/utils.cpp +4 -2
  11. data/ext/faiss/utils.h +4 -0
  12. data/lib/faiss/version.rb +1 -1
  13. data/lib/faiss.rb +1 -1
  14. data/vendor/faiss/faiss/AutoTune.cpp +214 -82
  15. data/vendor/faiss/faiss/AutoTune.h +14 -1
  16. data/vendor/faiss/faiss/Clustering.cpp +97 -249
  17. data/vendor/faiss/faiss/Clustering.h +18 -0
  18. data/vendor/faiss/faiss/IVFlib.cpp +67 -44
  19. data/vendor/faiss/faiss/Index.cpp +25 -12
  20. data/vendor/faiss/faiss/Index.h +26 -4
  21. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  22. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
  23. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  24. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  25. data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
  26. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  27. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  28. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  29. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  30. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
  31. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  32. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  33. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  34. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
  35. data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
  36. data/vendor/faiss/faiss/IndexFastScan.h +35 -24
  37. data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
  38. data/vendor/faiss/faiss/IndexFlat.h +32 -14
  39. data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
  40. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
  41. data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
  42. data/vendor/faiss/faiss/IndexHNSW.h +30 -14
  43. data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
  44. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  45. data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
  46. data/vendor/faiss/faiss/IndexIVF.h +47 -16
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  48. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
  49. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
  50. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
  51. data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
  52. data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
  53. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  54. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
  55. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  56. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  57. data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
  58. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
  59. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  60. data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
  61. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
  62. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
  63. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
  64. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
  65. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  66. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  67. data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
  68. data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
  69. data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
  70. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  71. data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
  72. data/vendor/faiss/faiss/IndexNSG.h +0 -2
  73. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
  74. data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
  75. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  76. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  77. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  78. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  79. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  80. data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
  81. data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
  82. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
  83. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
  84. data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
  85. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  86. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  87. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  88. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  89. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  90. data/vendor/faiss/faiss/IndexShards.cpp +13 -13
  91. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  92. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  93. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  94. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  95. data/vendor/faiss/faiss/MetricType.h +29 -6
  96. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  97. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  98. data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
  99. data/vendor/faiss/faiss/VectorTransform.h +39 -16
  100. data/vendor/faiss/faiss/build.cpp +23 -0
  101. data/vendor/faiss/faiss/build.h +15 -0
  102. data/vendor/faiss/faiss/clone_index.cpp +55 -51
  103. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  104. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  105. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  106. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  107. data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
  108. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  109. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  110. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  111. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  113. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  118. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  119. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  120. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  121. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  122. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  123. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  124. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  125. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  126. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  127. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  128. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  129. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  130. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  132. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
  134. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  135. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  136. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
  137. data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
  138. data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
  139. data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
  140. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
  141. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
  142. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  143. data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
  144. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  145. data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
  146. data/vendor/faiss/faiss/impl/HNSW.h +21 -40
  147. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  148. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  149. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  150. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
  151. data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
  152. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  153. data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
  154. data/vendor/faiss/faiss/impl/NSG.h +20 -10
  155. data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
  156. data/vendor/faiss/faiss/impl/Panorama.h +265 -78
  157. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  158. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  159. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
  160. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  161. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  162. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  163. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
  164. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  165. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
  166. data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
  167. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
  168. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
  169. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
  170. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
  171. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
  172. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  173. data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
  174. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
  175. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
  176. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  177. data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
  178. data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
  179. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  180. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  181. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  182. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  183. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  184. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  185. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  186. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  187. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  188. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  189. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  190. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  191. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  192. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  193. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  194. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  195. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  196. data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
  197. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  198. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  199. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  200. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  201. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  202. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  203. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
  204. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  205. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  206. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  207. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  208. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  209. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  210. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
  211. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  212. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  213. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
  214. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  215. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  216. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  217. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  218. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  219. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  220. data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
  221. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
  222. data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
  223. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  224. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  225. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  226. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
  227. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  228. data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
  229. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  230. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  231. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  232. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  233. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  234. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  235. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  236. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  237. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
  238. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
  239. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
  240. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  241. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
  242. data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
  243. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  244. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
  245. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  246. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
  247. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
  248. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
  249. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
  250. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
  251. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
  252. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
  253. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
  254. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
  255. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  256. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
  257. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
  258. data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
  259. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  260. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
  261. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  262. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  263. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  264. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
  265. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  266. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  267. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  268. data/vendor/faiss/faiss/index_factory.cpp +115 -28
  269. data/vendor/faiss/faiss/index_io.h +53 -3
  270. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
  271. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  272. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  273. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  274. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  275. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  276. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
  277. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  278. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
  279. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  280. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
  281. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  282. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  283. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  284. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  285. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  286. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  287. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  288. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
  289. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  290. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
  291. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
  292. data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
  293. data/vendor/faiss/faiss/utils/Heap.h +21 -0
  294. data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
  295. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  296. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  297. data/vendor/faiss/faiss/utils/distances.cpp +507 -559
  298. data/vendor/faiss/faiss/utils/distances.h +118 -1
  299. data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
  300. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  301. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  302. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  303. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  304. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  305. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  306. data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
  307. data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
  308. data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
  309. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  310. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  311. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  312. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  313. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  314. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  315. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  316. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  317. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  318. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  319. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  320. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  321. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  322. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  323. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  324. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  325. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  326. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  327. data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
  328. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  329. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  330. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  331. data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
  332. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  333. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
  339. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  340. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  341. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
  342. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  343. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  344. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  345. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  346. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  347. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  348. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  349. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  350. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  351. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  352. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  353. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  354. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  355. data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
  356. data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
  357. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  358. data/vendor/faiss/faiss/utils/utils.cpp +21 -14
  359. data/vendor/faiss/faiss/utils/utils.h +3 -3
  360. metadata +156 -42
  361. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  362. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  363. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
  364. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
  365. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
  366. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
  367. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  368. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  369. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  370. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  371. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
  372. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  373. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  374. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
  375. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  376. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  377. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  378. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
  379. /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
@@ -17,11 +17,10 @@
17
17
  #include <faiss/IndexIVFPQ.h>
18
18
  #include <faiss/impl/AuxIndexStructures.h>
19
19
  #include <faiss/impl/FaissAssert.h>
20
- #include <faiss/impl/FastScanDistancePostProcessing.h>
21
- #include <faiss/impl/LookupTableScaler.h>
22
20
  #include <faiss/impl/RaBitQUtils.h>
23
- #include <faiss/impl/pq4_fast_scan.h>
24
- #include <faiss/impl/simd_result_handlers.h>
21
+ #include <faiss/impl/fast_scan/FastScanDistancePostProcessing.h>
22
+ #include <faiss/impl/fast_scan/fast_scan.h>
23
+ #include <faiss/impl/fast_scan/simd_result_handlers.h>
25
24
  #include <faiss/invlists/BlockInvertedLists.h>
26
25
  #include <faiss/utils/hamming.h>
27
26
  #include <faiss/utils/quantize_lut.h>
@@ -29,20 +28,24 @@
29
28
 
30
29
  namespace faiss {
31
30
 
32
- using namespace simd_result_handlers;
33
-
34
31
  inline size_t roundup(size_t a, size_t b) {
35
32
  return (a + b - 1) / b * b;
36
33
  }
37
34
 
38
35
  IndexIVFFastScan::IndexIVFFastScan(
39
- Index* quantizer,
40
- size_t d,
41
- size_t nlist,
42
- size_t code_size,
36
+ Index* quantizer_in,
37
+ size_t d_in,
38
+ size_t nlist_in,
39
+ size_t code_size_in,
43
40
  MetricType metric,
44
- bool own_invlists)
45
- : IndexIVF(quantizer, d, nlist, code_size, metric, own_invlists) {
41
+ bool own_invlists_in)
42
+ : IndexIVF(
43
+ quantizer_in,
44
+ d_in,
45
+ nlist_in,
46
+ code_size_in,
47
+ metric,
48
+ own_invlists_in) {
46
49
  // unlike other indexes, we prefer no residuals for performance reasons.
47
50
  by_residual = false;
48
51
  FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
@@ -56,35 +59,40 @@ IndexIVFFastScan::IndexIVFFastScan() {
56
59
  }
57
60
 
58
61
  void IndexIVFFastScan::init_fastscan(
59
- Quantizer* fine_quantizer,
60
- size_t M,
62
+ Quantizer* fine_quantizer_in,
63
+ size_t M_in,
61
64
  size_t nbits_init,
62
- size_t nlist,
65
+ size_t nlist_in,
63
66
  MetricType /* metric */,
64
67
  int bbs_2,
65
- bool own_invlists) {
68
+ bool own_invlists_in) {
66
69
  FAISS_THROW_IF_NOT(bbs_2 % 32 == 0);
67
70
  FAISS_THROW_IF_NOT(nbits_init == 4);
68
- FAISS_THROW_IF_NOT(fine_quantizer->d == d);
71
+ FAISS_THROW_IF_NOT(fine_quantizer_in->d == static_cast<size_t>(d));
69
72
 
70
- this->fine_quantizer = fine_quantizer;
71
- this->M = M;
73
+ this->fine_quantizer = fine_quantizer_in;
74
+ this->M = M_in;
72
75
  this->nbits = nbits_init;
73
76
  this->bbs = bbs_2;
74
77
  ksub = (1 << nbits_init);
75
- M2 = roundup(M, 2);
78
+ M2 = roundup(M_in, 2);
76
79
  code_size = M2 / 2;
77
- FAISS_THROW_IF_NOT(code_size == fine_quantizer->code_size);
80
+ FAISS_THROW_IF_NOT(code_size == fine_quantizer_in->code_size);
78
81
 
79
82
  is_trained = false;
80
- if (own_invlists) {
81
- replace_invlists(new BlockInvertedLists(nlist, get_CodePacker()), true);
83
+ if (own_invlists_in) {
84
+ replace_invlists(
85
+ new BlockInvertedLists(nlist_in, get_CodePacker()), true);
82
86
  }
83
87
  }
84
88
 
85
89
  void IndexIVFFastScan::init_code_packer() {
86
90
  auto bil = dynamic_cast<BlockInvertedLists*>(invlists);
87
- FAISS_THROW_IF_NOT(bil);
91
+ if (!bil) {
92
+ // invlists is not block-packed (e.g., when own_invlists=false).
93
+ // Nothing to do — the caller manages inverted lists externally.
94
+ return;
95
+ }
88
96
  delete bil->packer; // in case there was one before
89
97
  bil->packer = get_CodePacker();
90
98
  }
@@ -95,18 +103,19 @@ IndexIVFFastScan::~IndexIVFFastScan() = default;
95
103
  * Code management functions
96
104
  *********************************************************/
97
105
 
98
- void IndexIVFFastScan::preprocess_code_metadata(
99
- idx_t /* n */,
100
- const uint8_t* /* flat_codes */,
101
- idx_t /* start_global_idx */) {
102
- // Default: no-op
103
- }
104
-
105
106
  size_t IndexIVFFastScan::code_packing_stride() const {
106
107
  // Default: use standard M-byte stride
107
108
  return 0;
108
109
  }
109
110
 
111
+ size_t IndexIVFFastScan::get_block_stride() const {
112
+ std::unique_ptr<CodePacker> packer(get_CodePacker());
113
+ FAISS_THROW_IF_NOT_MSG(
114
+ packer->nvec == static_cast<size_t>(bbs),
115
+ "CodePacker must pack bbs vectors per block for fast-scan");
116
+ return packer->block_size;
117
+ }
118
+
110
119
  void IndexIVFFastScan::add_with_ids(
111
120
  idx_t n,
112
121
  const float* x,
@@ -148,9 +157,6 @@ void IndexIVFFastScan::add_with_ids(
148
157
  AlignedTable<uint8_t> flat_codes(n * code_size);
149
158
  encode_vectors(n, x, idx.get(), flat_codes.get());
150
159
 
151
- // Allow subclasses to preprocess metadata before packing
152
- preprocess_code_metadata(n, flat_codes.get(), ntotal);
153
-
154
160
  DirectMapAdd dm_adder(direct_map, n, xids);
155
161
  BlockInvertedLists* bil = dynamic_cast<BlockInvertedLists*>(invlists);
156
162
  FAISS_THROW_IF_NOT_MSG(bil, "only block inverted lists supported");
@@ -206,7 +212,11 @@ void IndexIVFFastScan::add_with_ids(
206
212
  bbs,
207
213
  M2,
208
214
  bil->codes[list_no].data(),
209
- pack_stride);
215
+ pack_stride,
216
+ get_block_stride());
217
+
218
+ postprocess_packed_codes(
219
+ list_no, list_size, i1 - i0, list_codes.data());
210
220
 
211
221
  i0 = i1;
212
222
  }
@@ -237,22 +247,22 @@ void estimators_from_tables_generic(
237
247
  int64_t* heap_ids,
238
248
  const FastScanDistancePostProcessing& context) {
239
249
  using accu_t = typename C::T;
240
- size_t nscale = context.norm_scaler ? context.norm_scaler->nscale : 0;
250
+ size_t nscale = context.pq2x4_scale ? 2 : 0;
241
251
  for (size_t j = 0; j < ncodes; ++j) {
242
252
  BitstringReader bsr(codes + j * index.code_size, index.code_size);
243
253
  accu_t dis = bias;
244
254
  const dis_t* __restrict dt = dis_table;
245
255
 
246
256
  for (size_t m = 0; m < index.M - nscale; m++) {
247
- uint64_t c = bsr.read(index.nbits);
257
+ uint64_t c = bsr.read(static_cast<int>(index.nbits));
248
258
  dis += dt[c];
249
259
  dt += index.ksub;
250
260
  }
251
261
 
252
- if (context.norm_scaler) {
262
+ if (nscale) {
253
263
  for (size_t m = 0; m < nscale; m++) {
254
- uint64_t c = bsr.read(index.nbits);
255
- dis += context.norm_scaler->scale_one(dt[c]);
264
+ uint64_t c = bsr.read(static_cast<int>(index.nbits));
265
+ dis += dt[c] * context.pq2x4_scale;
256
266
  dt += index.ksub;
257
267
  }
258
268
  }
@@ -283,33 +293,33 @@ void IndexIVFFastScan::compute_LUT_uint8(
283
293
  AlignedTable<float> biases_float;
284
294
 
285
295
  compute_LUT(n, x, cq, dis_tables_float, biases_float, context);
286
- size_t nprobe = cq.nprobe;
296
+ size_t cur_nprobe = cq.nprobe;
287
297
  bool lut_is_3d = lookup_table_is_3d();
288
298
  size_t dim123 = ksub * M;
289
299
  size_t dim123_2 = ksub * M2;
290
300
  if (lut_is_3d) {
291
- dim123 *= nprobe;
292
- dim123_2 *= nprobe;
301
+ dim123 *= cur_nprobe;
302
+ dim123_2 *= cur_nprobe;
293
303
  }
294
304
  dis_tables.resize(n * dim123_2);
295
305
  if (biases_float.get()) {
296
- biases.resize(n * nprobe);
306
+ biases.resize(n * cur_nprobe);
297
307
  }
298
308
 
299
309
  // OMP for MSVC requires i to have signed integral type
300
310
  #pragma omp parallel for if (n > 100)
301
- for (int64_t i = 0; i < n; i++) {
311
+ for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
302
312
  const float* t_in = dis_tables_float.get() + i * dim123;
303
313
  const float* b_in = nullptr;
304
314
  uint8_t* t_out = dis_tables.get() + i * dim123_2;
305
315
  uint16_t* b_out = nullptr;
306
316
  if (biases_float.get()) {
307
- b_in = biases_float.get() + i * nprobe;
308
- b_out = biases.get() + i * nprobe;
317
+ b_in = biases_float.get() + i * cur_nprobe;
318
+ b_out = biases.get() + i * cur_nprobe;
309
319
  }
310
320
 
311
321
  quantize_LUT_and_bias(
312
- nprobe,
322
+ cur_nprobe,
313
323
  M,
314
324
  ksub,
315
325
  lut_is_3d,
@@ -356,10 +366,14 @@ void IndexIVFFastScan::search_preassigned(
356
366
  bool store_pairs,
357
367
  const IVFSearchParameters* params,
358
368
  IndexIVFStats* stats) const {
359
- size_t nprobe = this->nprobe;
369
+ size_t cur_nprobe = this->nprobe;
360
370
  if (params) {
361
- FAISS_THROW_IF_NOT(params->max_codes == 0);
362
- nprobe = params->nprobe;
371
+ // Range-search-only option.
372
+ FAISS_THROW_IF_NOT_MSG(
373
+ params->max_empty_result_buckets == 0,
374
+ "max_empty_result_buckets is a range-search knob and is "
375
+ "not honored by fastscan knn search");
376
+ cur_nprobe = params->nprobe;
363
377
  }
364
378
 
365
379
  FAISS_THROW_IF_NOT_MSG(
@@ -368,7 +382,7 @@ void IndexIVFFastScan::search_preassigned(
368
382
  FAISS_THROW_IF_NOT(k > 0);
369
383
  FastScanDistancePostProcessing empty_context{};
370
384
 
371
- const CoarseQuantized cq = {nprobe, centroid_dis, assign};
385
+ const CoarseQuantized cq = {cur_nprobe, centroid_dis, assign};
372
386
  search_dispatch_implem(
373
387
  n, x, k, distances, labels, cq, empty_context, params);
374
388
  }
@@ -379,45 +393,35 @@ void IndexIVFFastScan::range_search(
379
393
  float radius,
380
394
  RangeSearchResult* result,
381
395
  const SearchParameters* params_in) const {
382
- size_t nprobe = this->nprobe;
396
+ size_t cur_nprobe = this->nprobe;
383
397
  const IVFSearchParameters* params = nullptr;
384
398
  if (params_in) {
385
399
  params = dynamic_cast<const IVFSearchParameters*>(params_in);
386
400
  FAISS_THROW_IF_NOT_MSG(
387
401
  params, "IndexIVFFastScan params have incorrect type");
388
- nprobe = params->nprobe;
402
+ // k-NN-only options.
403
+ FAISS_THROW_IF_NOT_MSG(
404
+ params->max_lists_num == 0,
405
+ "max_lists_num is a knn knob and is not honored by "
406
+ "fastscan range search");
407
+ FAISS_THROW_IF_NOT_MSG(
408
+ !params->ensure_topk_full,
409
+ "ensure_topk_full is a knn knob and is not honored by "
410
+ "fastscan range search");
411
+ FAISS_THROW_IF_NOT_MSG(
412
+ params->max_codes == 0,
413
+ "max_codes is not honored by fastscan range search");
414
+ cur_nprobe = params->nprobe;
389
415
  }
390
416
  FastScanDistancePostProcessing empty_context{};
391
417
 
392
- const CoarseQuantized cq = {nprobe, nullptr, nullptr};
418
+ const CoarseQuantized cq = {cur_nprobe, nullptr, nullptr};
393
419
  range_search_dispatch_implem(
394
420
  n, x, radius, *result, cq, empty_context, params);
395
421
  }
396
422
 
397
423
  namespace {
398
424
 
399
- template <class C>
400
- ResultHandlerCompare<C, true>* make_knn_handler_fixC(
401
- int impl,
402
- idx_t n,
403
- idx_t k,
404
- float* distances,
405
- idx_t* labels,
406
- const IDSelector* sel,
407
- const float* normalizers) {
408
- using HeapHC = HeapHandler<C, true>;
409
- using ReservoirHC = ReservoirHandler<C, true>;
410
- using SingleResultHC = SingleResultHandler<C, true>;
411
-
412
- if (k == 1) {
413
- return new SingleResultHC(n, 0, distances, labels, sel);
414
- } else if (impl % 2 == 0) {
415
- return new HeapHC(n, 0, k, distances, labels, sel, normalizers);
416
- } else /* if (impl % 2 == 1) */ {
417
- return new ReservoirHC(n, 0, k, 2 * k, distances, labels, sel);
418
- }
419
- }
420
-
421
425
  using CoarseQuantized = IndexIVFFastScan::CoarseQuantized;
422
426
 
423
427
  struct CoarseQuantizedWithBuffer : CoarseQuantized {
@@ -452,8 +456,8 @@ struct CoarseQuantizedWithBuffer : CoarseQuantized {
452
456
 
453
457
  struct CoarseQuantizedSlice : CoarseQuantizedWithBuffer {
454
458
  const size_t i0, i1;
455
- CoarseQuantizedSlice(const CoarseQuantized& cq, size_t i0, size_t i1)
456
- : CoarseQuantizedWithBuffer(cq), i0(i0), i1(i1) {
459
+ CoarseQuantizedSlice(const CoarseQuantized& cq, size_t i0_in, size_t i1_in)
460
+ : CoarseQuantizedWithBuffer(cq), i0(i0_in), i1(i1_in) {
457
461
  if (done()) {
458
462
  dis += nprobe * i0;
459
463
  ids += nprobe * i0;
@@ -471,20 +475,20 @@ struct CoarseQuantizedSlice : CoarseQuantizedWithBuffer {
471
475
  int compute_search_nslice(
472
476
  const IndexIVFFastScan* index,
473
477
  size_t n,
474
- size_t nprobe) {
478
+ size_t cur_nprobe) {
475
479
  int nslice;
476
- if (n <= omp_get_max_threads()) {
477
- nslice = n;
480
+ if (n <= static_cast<size_t>(omp_get_max_threads())) {
481
+ nslice = static_cast<int>(n);
478
482
  } else if (index->lookup_table_is_3d()) {
479
483
  // make sure we don't make too big LUT tables
480
- size_t lut_size_per_query = index->M * index->ksub * nprobe *
484
+ size_t lut_size_per_query = index->M * index->ksub * cur_nprobe *
481
485
  (sizeof(float) + sizeof(uint8_t));
482
486
 
483
487
  size_t max_lut_size = precomputed_table_max_bytes;
484
488
  // how many queries we can handle within mem budget
485
489
  size_t nq_ok = std::max(max_lut_size / lut_size_per_query, size_t(1));
486
- nslice = roundup(
487
- std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads());
490
+ nslice = static_cast<int>(roundup(
491
+ std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads()));
488
492
  } else {
489
493
  // LUTs unlikely to be a limiting factor
490
494
  nslice = omp_get_max_threads();
@@ -494,23 +498,25 @@ int compute_search_nslice(
494
498
 
495
499
  } // namespace
496
500
 
497
- SIMDResultHandlerToFloat* IndexIVFFastScan::make_knn_handler(
501
+ std::unique_ptr<FastScanCodeScanner> IndexIVFFastScan::make_knn_scanner(
498
502
  bool is_max,
499
- int impl,
500
503
  idx_t n,
501
504
  idx_t k,
502
505
  float* distances,
503
506
  idx_t* labels,
504
507
  const IDSelector* sel,
505
- const FastScanDistancePostProcessing&,
506
- const float* normalizers) const {
507
- if (is_max) {
508
- return make_knn_handler_fixC<CMax<uint16_t, int64_t>>(
509
- impl, n, k, distances, labels, sel, normalizers);
510
- } else {
511
- return make_knn_handler_fixC<CMin<uint16_t, int64_t>>(
512
- impl, n, k, distances, labels, sel, normalizers);
513
- }
508
+ int impl,
509
+ const FastScanDistancePostProcessing&) const {
510
+ return make_fast_scan_knn_scanner(
511
+ is_max,
512
+ impl,
513
+ n,
514
+ 0,
515
+ k,
516
+ distances,
517
+ labels,
518
+ sel,
519
+ /*with_id_map=*/true);
514
520
  }
515
521
 
516
522
  void IndexIVFFastScan::search_dispatch_implem(
@@ -522,13 +528,12 @@ void IndexIVFFastScan::search_dispatch_implem(
522
528
  const CoarseQuantized& cq_in,
523
529
  const FastScanDistancePostProcessing& context,
524
530
  const IVFSearchParameters* params) const {
525
- const idx_t nprobe = params ? params->nprobe : this->nprobe;
531
+ const idx_t cur_nprobe = params ? params->nprobe : this->nprobe;
526
532
  const IDSelector* sel = (params) ? params->sel : nullptr;
527
533
  const SearchParameters* quantizer_params =
528
534
  params ? params->quantizer_params : nullptr;
529
535
 
530
536
  bool is_max = !is_similarity_metric(metric_type);
531
- using RH = SIMDResultHandlerToFloat;
532
537
 
533
538
  if (n == 0) {
534
539
  return;
@@ -537,8 +542,14 @@ void IndexIVFFastScan::search_dispatch_implem(
537
542
  // actual implementation used
538
543
  int impl = implem;
539
544
 
545
+ // Early-stop k-NN options require the per-query implementations.
546
+ const bool any_early_term_knob = params &&
547
+ (params->max_codes != 0 || params->max_lists_num != 0 ||
548
+ params->ensure_topk_full);
549
+
540
550
  if (impl == 0) {
541
- if (bbs == 32) {
551
+ // Auto-select the per-query path when early-stop budgets are used.
552
+ if (bbs == 32 && !any_early_term_knob) {
542
553
  impl = 12;
543
554
  } else {
544
555
  impl = 10;
@@ -555,15 +566,24 @@ void IndexIVFFastScan::search_dispatch_implem(
555
566
  impl -= 100;
556
567
  }
557
568
 
569
+ if (any_early_term_knob) {
570
+ FAISS_THROW_IF_NOT_MSG(
571
+ impl == 10 || impl == 11,
572
+ "max_codes / max_lists_num / ensure_topk_full are only "
573
+ "supported by IndexIVFFastScan implem 10/11; set "
574
+ "index.implem = 10 (or 11 for k>20) explicitly, or leave it "
575
+ "at the default 0");
576
+ }
577
+
558
578
  CoarseQuantizedWithBuffer cq(cq_in);
559
- cq.nprobe = nprobe;
579
+ cq.nprobe = cur_nprobe;
560
580
 
561
581
  if (!cq.done() && !multiple_threads) {
562
582
  // we do the coarse quantization here execpt when search is
563
583
  // sliced over threads (then it is more efficient to have each thread do
564
584
  // its own coarse quantization)
565
585
  cq.quantize(quantizer, n, x, quantizer_params);
566
- invlists->prefetch_lists(cq.ids, n * cq.nprobe);
586
+ invlists->prefetch_lists(cq.ids, static_cast<int>(n * cq.nprobe));
567
587
  }
568
588
 
569
589
  if (impl == 1) {
@@ -586,43 +606,38 @@ void IndexIVFFastScan::search_dispatch_implem(
586
606
  size_t ndis = 0, nlist_visited = 0;
587
607
 
588
608
  if (!multiple_threads) {
589
- // clang-format off
590
- if (impl == 12 || impl == 13) {
591
- std::unique_ptr<RH> handler(
592
- static_cast<RH*>(this->make_knn_handler(
593
- is_max,
594
- impl,
595
- n,
596
- k,
597
- distances,
598
- labels,
599
- sel,
600
- context))
601
- );
602
- search_implem_12(
603
- n, x, *handler.get(),
604
- cq, &ndis, &nlist_visited, context, params);
605
- } else if (impl == 14 || impl == 15) {
609
+ if (impl == 14 || impl == 15) {
606
610
  search_implem_14(
607
- n, x, k, distances, labels,
608
- cq, impl, context, params);
611
+ n, x, k, distances, labels, cq, impl, context, params);
609
612
  } else {
610
- std::unique_ptr<RH> handler(
611
- static_cast<RH*>(this->make_knn_handler(
612
- is_max,
613
- impl,
614
- n,
615
- k,
616
- distances,
617
- labels,
618
- sel,
619
- context))
620
- );
621
- search_implem_10(
622
- n, x, *handler.get(), cq,
623
- &ndis, &nlist_visited, context, params);
613
+ auto scanner = make_knn_scanner(
614
+ is_max, n, k, distances, labels, sel, impl, context);
615
+ auto* handler = scanner->handler();
616
+ if (impl == 12 || impl == 13) {
617
+ search_implem_12(
618
+ n,
619
+ x,
620
+ *handler,
621
+ cq,
622
+ &ndis,
623
+ &nlist_visited,
624
+ context,
625
+ params,
626
+ *scanner);
627
+ } else {
628
+ search_implem_10(
629
+ n,
630
+ x,
631
+ k,
632
+ *handler,
633
+ cq,
634
+ &ndis,
635
+ &nlist_visited,
636
+ context,
637
+ params,
638
+ *scanner);
639
+ }
624
640
  }
625
- // clang-format on
626
641
  } else {
627
642
  // explicitly slice over threads
628
643
  int nslice = compute_search_nslice(this, n, cq.nprobe);
@@ -647,30 +662,43 @@ void IndexIVFFastScan::search_dispatch_implem(
647
662
  // pointer
648
663
  FastScanDistancePostProcessing thread_context = context;
649
664
  if (thread_context.query_factors != nullptr) {
650
- thread_context.query_factors += i0 * nprobe;
665
+ thread_context.query_factors += i0 * cur_nprobe;
651
666
  }
652
667
 
653
- std::unique_ptr<RH> handler(
654
- static_cast<RH*>(this->make_knn_handler(
655
- is_max,
656
- impl,
657
- i1 - i0,
658
- k,
659
- dis_i,
660
- lab_i,
661
- sel,
662
- thread_context)));
663
- // clang-format off
668
+ auto scanner = make_knn_scanner(
669
+ is_max,
670
+ i1 - i0,
671
+ k,
672
+ dis_i,
673
+ lab_i,
674
+ sel,
675
+ impl,
676
+ thread_context);
677
+ auto* handler = scanner->handler();
664
678
  if (impl == 12 || impl == 13) {
665
679
  search_implem_12(
666
- i1 - i0, x + i0 * d, *handler.get(),
667
- cq_i, &ndis, &nlist_visited, thread_context, params);
680
+ i1 - i0,
681
+ x + i0 * d,
682
+ *handler,
683
+ cq_i,
684
+ &ndis,
685
+ &nlist_visited,
686
+ thread_context,
687
+ params,
688
+ *scanner);
668
689
  } else {
669
690
  search_implem_10(
670
- i1 - i0, x + i0 * d, *handler.get(),
671
- cq_i, &ndis, &nlist_visited, thread_context, params);
691
+ i1 - i0,
692
+ x + i0 * d,
693
+ k,
694
+ *handler,
695
+ cq_i,
696
+ &ndis,
697
+ &nlist_visited,
698
+ thread_context,
699
+ params,
700
+ *scanner);
672
701
  }
673
- // clang-format on
674
702
  }
675
703
  }
676
704
  }
@@ -700,11 +728,23 @@ void IndexIVFFastScan::range_search_dispatch_implem(
700
728
  if (n == 0) {
701
729
  return;
702
730
  }
731
+ // FastScan range early-stop budget: enabled only for ordered per-query
732
+ // scanning below.
733
+ const bool use_empty_result_early_exit =
734
+ params && params->max_empty_result_buckets != 0;
735
+ const int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT;
736
+ FAISS_THROW_IF_NOT_MSG(
737
+ !use_empty_result_early_exit || pmode == 0,
738
+ "max_empty_result_buckets supported only for parallel_mode = 0");
739
+
703
740
  // actual implementation used
704
741
  int impl = implem;
705
742
 
706
743
  if (impl == 0) {
707
- if (bbs == 32) {
744
+ if (use_empty_result_early_exit) {
745
+ // Empty-bucket early stop needs per-query probe order.
746
+ impl = 10;
747
+ } else if (bbs == 32) {
708
748
  impl = 12;
709
749
  } else {
710
750
  impl = 10;
@@ -720,28 +760,44 @@ void IndexIVFFastScan::range_search_dispatch_implem(
720
760
  impl -= 100;
721
761
  }
722
762
 
763
+ FAISS_THROW_IF_NOT_MSG(
764
+ !use_empty_result_early_exit || impl == 10,
765
+ "max_empty_result_buckets is only supported by "
766
+ "IndexIVFFastScan range-search implem 10");
767
+
723
768
  if (!multiple_threads && !cq.done()) {
724
769
  cq.quantize(quantizer, n, x, quantizer_params);
725
- invlists->prefetch_lists(cq.ids, n * cq.nprobe);
770
+ invlists->prefetch_lists(cq.ids, static_cast<int>(n * cq.nprobe));
726
771
  }
727
772
 
728
773
  size_t ndis = 0, nlist_visited = 0;
729
774
 
730
775
  if (!multiple_threads) { // single thread
731
- std::unique_ptr<SIMDResultHandlerToFloat> handler;
732
- if (is_max) {
733
- handler.reset(new RangeHandler<CMax<uint16_t, int64_t>, true>(
734
- rres, radius, 0, sel));
735
- } else {
736
- handler.reset(new RangeHandler<CMin<uint16_t, int64_t>, true>(
737
- rres, radius, 0, sel));
738
- }
776
+ auto scanner = make_range_scanner(is_max, rres, radius, 0, sel);
777
+ auto* handler = scanner->handler();
739
778
  if (impl == 12) {
740
779
  search_implem_12(
741
- n, x, *handler.get(), cq, &ndis, &nlist_visited, context);
780
+ n,
781
+ x,
782
+ *handler,
783
+ cq,
784
+ &ndis,
785
+ &nlist_visited,
786
+ context,
787
+ nullptr,
788
+ *scanner);
742
789
  } else if (impl == 10) {
743
790
  search_implem_10(
744
- n, x, *handler.get(), cq, &ndis, &nlist_visited, context);
791
+ n,
792
+ x,
793
+ /*k=*/0, // range search has no k
794
+ *handler,
795
+ cq,
796
+ &ndis,
797
+ &nlist_visited,
798
+ context,
799
+ params,
800
+ *scanner);
745
801
  } else {
746
802
  FAISS_THROW_FMT("Range search implem %d not implemented", impl);
747
803
  }
@@ -760,35 +816,33 @@ void IndexIVFFastScan::range_search_dispatch_implem(
760
816
  if (!cq_i.done()) {
761
817
  cq_i.quantize_slice(quantizer, x, quantizer_params);
762
818
  }
763
- std::unique_ptr<SIMDResultHandlerToFloat> handler;
764
- if (is_max) {
765
- handler.reset(new PartialRangeHandler<
766
- CMax<uint16_t, int64_t>,
767
- true>(pres, radius, 0, i0, i1, sel));
768
- } else {
769
- handler.reset(new PartialRangeHandler<
770
- CMin<uint16_t, int64_t>,
771
- true>(pres, radius, 0, i0, i1, sel));
772
- }
819
+ auto scanner = make_partial_range_scanner(
820
+ is_max, pres, radius, 0, i0, i1, sel);
821
+ auto* handler = scanner->handler();
773
822
 
774
823
  if (impl == 12 || impl == 13) {
775
824
  search_implem_12(
776
825
  i1 - i0,
777
826
  x + i0 * d,
778
- *handler.get(),
827
+ *handler,
779
828
  cq_i,
780
829
  &ndis,
781
830
  &nlist_visited,
782
- context);
831
+ context,
832
+ nullptr,
833
+ *scanner);
783
834
  } else {
784
835
  search_implem_10(
785
836
  i1 - i0,
786
837
  x + i0 * d,
787
- *handler.get(),
838
+ /*k=*/0,
839
+ *handler,
788
840
  cq_i,
789
841
  &ndis,
790
842
  &nlist_visited,
791
- context);
843
+ context,
844
+ params,
845
+ *scanner);
792
846
  }
793
847
  }
794
848
  pres.finalize();
@@ -809,7 +863,7 @@ void IndexIVFFastScan::search_implem_1(
809
863
  idx_t* labels,
810
864
  const CoarseQuantized& cq,
811
865
  const FastScanDistancePostProcessing& context,
812
- const IVFSearchParameters* params) const {
866
+ const IVFSearchParameters* /* params */) const {
813
867
  FAISS_THROW_IF_NOT(orig_invlists);
814
868
 
815
869
  size_t dim12 = ksub * M;
@@ -822,7 +876,7 @@ void IndexIVFFastScan::search_implem_1(
822
876
  bool single_LUT = !lookup_table_is_3d();
823
877
 
824
878
  size_t ndis = 0, nlist_visited = 0;
825
- size_t nprobe = cq.nprobe;
879
+ size_t cur_nprobe = cq.nprobe;
826
880
  #pragma omp parallel for reduction(+ : ndis, nlist_visited)
827
881
  for (idx_t i = 0; i < n; i++) {
828
882
  int64_t* heap_ids = labels + i * k;
@@ -833,11 +887,11 @@ void IndexIVFFastScan::search_implem_1(
833
887
  if (single_LUT) {
834
888
  LUT = dis_tables.get() + i * dim12;
835
889
  }
836
- for (idx_t j = 0; j < nprobe; j++) {
890
+ for (size_t j = 0; j < cur_nprobe; j++) {
837
891
  if (!single_LUT) {
838
- LUT = dis_tables.get() + (i * nprobe + j) * dim12;
892
+ LUT = dis_tables.get() + (i * cur_nprobe + j) * dim12;
839
893
  }
840
- idx_t list_no = cq.ids[i * nprobe + j];
894
+ idx_t list_no = cq.ids[i * cur_nprobe + j];
841
895
  if (list_no < 0) {
842
896
  continue;
843
897
  }
@@ -848,7 +902,7 @@ void IndexIVFFastScan::search_implem_1(
848
902
  InvertedLists::ScopedCodes codes(orig_invlists, list_no);
849
903
  InvertedLists::ScopedIds ids(orig_invlists, list_no);
850
904
 
851
- float bias = biases.get() ? biases[i * nprobe + j] : 0;
905
+ float bias = biases.get() ? biases[i * cur_nprobe + j] : 0;
852
906
 
853
907
  estimators_from_tables_generic<C>(
854
908
  *this,
@@ -880,7 +934,7 @@ void IndexIVFFastScan::search_implem_2(
880
934
  idx_t* labels,
881
935
  const CoarseQuantized& cq,
882
936
  const FastScanDistancePostProcessing& context,
883
- const IVFSearchParameters* params) const {
937
+ const IVFSearchParameters* /* params */) const {
884
938
  FAISS_THROW_IF_NOT(orig_invlists);
885
939
 
886
940
  size_t dim12 = ksub * M2;
@@ -893,7 +947,7 @@ void IndexIVFFastScan::search_implem_2(
893
947
  bool single_LUT = !lookup_table_is_3d();
894
948
 
895
949
  size_t ndis = 0, nlist_visited = 0;
896
- size_t nprobe = cq.nprobe;
950
+ size_t cur_nprobe = cq.nprobe;
897
951
 
898
952
  #pragma omp parallel for reduction(+ : ndis, nlist_visited)
899
953
  for (idx_t i = 0; i < n; i++) {
@@ -906,11 +960,11 @@ void IndexIVFFastScan::search_implem_2(
906
960
  if (single_LUT) {
907
961
  LUT = dis_tables.get() + i * dim12;
908
962
  }
909
- for (idx_t j = 0; j < nprobe; j++) {
963
+ for (size_t j = 0; j < cur_nprobe; j++) {
910
964
  if (!single_LUT) {
911
- LUT = dis_tables.get() + (i * nprobe + j) * dim12;
965
+ LUT = dis_tables.get() + (i * cur_nprobe + j) * dim12;
912
966
  }
913
- idx_t list_no = cq.ids[i * nprobe + j];
967
+ idx_t list_no = cq.ids[i * cur_nprobe + j];
914
968
  if (list_no < 0) {
915
969
  continue;
916
970
  }
@@ -921,7 +975,7 @@ void IndexIVFFastScan::search_implem_2(
921
975
  InvertedLists::ScopedCodes codes(orig_invlists, list_no);
922
976
  InvertedLists::ScopedIds ids(orig_invlists, list_no);
923
977
 
924
- uint16_t bias = biases.get() ? biases[i * nprobe + j] : 0;
978
+ uint16_t bias = biases.get() ? biases[i * cur_nprobe + j] : 0;
925
979
 
926
980
  estimators_from_tables_generic<C>(
927
981
  *this,
@@ -960,12 +1014,14 @@ void IndexIVFFastScan::search_implem_2(
960
1014
  void IndexIVFFastScan::search_implem_10(
961
1015
  idx_t n,
962
1016
  const float* x,
1017
+ idx_t k,
963
1018
  SIMDResultHandlerToFloat& handler,
964
1019
  const CoarseQuantized& cq,
965
1020
  size_t* ndis_out,
966
1021
  size_t* nlist_out,
967
1022
  const FastScanDistancePostProcessing& context,
968
- const IVFSearchParameters* /* params */) const {
1023
+ const IVFSearchParameters* params,
1024
+ FastScanCodeScanner& scanner) const {
969
1025
  size_t dim12 = ksub * M2;
970
1026
  AlignedTable<uint8_t> dis_tables;
971
1027
  AlignedTable<uint16_t> biases;
@@ -979,7 +1035,28 @@ void IndexIVFFastScan::search_implem_10(
979
1035
  int qmap1[1];
980
1036
  handler.q_map = qmap1;
981
1037
  handler.begin(skip & 16 ? nullptr : normalizers.get());
982
- size_t nprobe = cq.nprobe;
1038
+ size_t cur_nprobe = cq.nprobe;
1039
+
1040
+ // Per-query early-stop options from SearchParametersIVF.
1041
+ const size_t param_max_codes = params ? params->max_codes : 0;
1042
+ const size_t param_max_lists_num = params ? params->max_lists_num : 0;
1043
+ const bool ensure_topk_full = params ? params->ensure_topk_full : false;
1044
+ const size_t cur_max_codes = (param_max_codes == 0)
1045
+ ? std::numeric_limits<size_t>::max()
1046
+ : param_max_codes;
1047
+ const size_t cur_max_lists_num =
1048
+ (param_max_lists_num == 0) ? cur_nprobe : param_max_lists_num;
1049
+ // Effective budgets are the values tested in the probe loop below.
1050
+ // ensure_topk_full raises small budgets to reduce empty result slots.
1051
+ const size_t effective_max_codes = ensure_topk_full
1052
+ ? std::max(cur_max_codes, (size_t)k)
1053
+ : cur_max_codes;
1054
+ const size_t effective_max_lists_num = ensure_topk_full
1055
+ ? std::max(cur_max_lists_num, (size_t)k)
1056
+ : cur_max_lists_num;
1057
+ const bool is_range_search = k == 0;
1058
+ const size_t max_empty_result_buckets =
1059
+ (is_range_search && params) ? params->max_empty_result_buckets : 0;
983
1060
 
984
1061
  // Allocate probe_map once and reuse it
985
1062
  std::vector<int> probe_map;
@@ -987,13 +1064,30 @@ void IndexIVFFastScan::search_implem_10(
987
1064
 
988
1065
  for (idx_t i = 0; i < n; i++) {
989
1066
  const uint8_t* LUT = nullptr;
990
- qmap1[0] = i;
1067
+ qmap1[0] = static_cast<int>(i);
991
1068
 
992
1069
  if (single_LUT) {
993
1070
  LUT = dis_tables.get() + i * dim12;
994
1071
  }
995
- for (idx_t j = 0; j < nprobe; j++) {
996
- size_t ij = i * nprobe + j;
1072
+ // Per-query counters. For k-NN, the handler count excludes rows
1073
+ // filtered by IDSelector.
1074
+ const size_t scan0 = handler.count_scanned_rows();
1075
+ size_t nscan_q = 0;
1076
+ size_t nlists_visited_q = 0;
1077
+ size_t nempty_result_buckets = 0;
1078
+ for (size_t j = 0; j < cur_nprobe; j++) {
1079
+ if (!is_range_search) {
1080
+ nscan_q = handler.count_scanned_rows() - scan0;
1081
+ }
1082
+ // Early-stop check: apply k-NN max_codes/max_lists_num before
1083
+ // starting the next list. nscan_q excludes IDSelector-filtered
1084
+ // rows.
1085
+ if (nscan_q >= effective_max_codes ||
1086
+ nlists_visited_q >= effective_max_lists_num) {
1087
+ break;
1088
+ }
1089
+ const size_t prev_in_range_num = handler.in_range_num;
1090
+ size_t ij = i * cur_nprobe + j;
997
1091
  if (!single_LUT) {
998
1092
  LUT = dis_tables.get() + ij * dim12;
999
1093
  }
@@ -1003,10 +1097,22 @@ void IndexIVFFastScan::search_implem_10(
1003
1097
 
1004
1098
  idx_t list_no = cq.ids[ij];
1005
1099
  if (list_no < 0) {
1100
+ // Early-stop check: invalid probes count as empty range
1101
+ // buckets.
1102
+ if (max_empty_result_buckets > 0 &&
1103
+ ++nempty_result_buckets >= max_empty_result_buckets) {
1104
+ break;
1105
+ }
1006
1106
  continue;
1007
1107
  }
1008
1108
  size_t ls = invlists->list_size(list_no);
1009
1109
  if (ls == 0) {
1110
+ // Early-stop check: empty inverted lists count as empty range
1111
+ // buckets.
1112
+ if (max_empty_result_buckets > 0 &&
1113
+ ++nempty_result_buckets >= max_empty_result_buckets) {
1114
+ break;
1115
+ }
1010
1116
  continue;
1011
1117
  }
1012
1118
 
@@ -1021,18 +1127,35 @@ void IndexIVFFastScan::search_implem_10(
1021
1127
  probe_map[0] = static_cast<int>(j);
1022
1128
  handler.set_list_context(list_no, probe_map);
1023
1129
 
1024
- pq4_accumulate_loop(
1130
+ scanner.accumulate_loop(
1025
1131
  1,
1026
1132
  roundup(ls, bbs),
1027
1133
  bbs,
1028
1134
  M2,
1029
1135
  codes.get(),
1030
1136
  LUT,
1031
- handler,
1032
- context.norm_scaler);
1137
+ context.pq2x4_scale,
1138
+ get_block_stride());
1033
1139
 
1034
1140
  ndis += ls;
1035
1141
  nlist_visited++;
1142
+ if (is_range_search) {
1143
+ nscan_q += ls;
1144
+ }
1145
+ nlists_visited_q++;
1146
+
1147
+ if (max_empty_result_buckets > 0) {
1148
+ // Early-stop check: apply the range-search empty-bucket
1149
+ // budget after each visited list; any hit resets the counter.
1150
+ if (handler.in_range_num == prev_in_range_num) {
1151
+ nempty_result_buckets++;
1152
+ if (nempty_result_buckets >= max_empty_result_buckets) {
1153
+ break;
1154
+ }
1155
+ } else {
1156
+ nempty_result_buckets = 0;
1157
+ }
1158
+ }
1036
1159
  }
1037
1160
  }
1038
1161
 
@@ -1049,7 +1172,8 @@ void IndexIVFFastScan::search_implem_12(
1049
1172
  size_t* ndis_out,
1050
1173
  size_t* nlist_out,
1051
1174
  const FastScanDistancePostProcessing& context,
1052
- const IVFSearchParameters* /* params */) const {
1175
+ const IVFSearchParameters* /* params */,
1176
+ FastScanCodeScanner& scanner) const {
1053
1177
  if (n == 0) { // does not work well with reservoir
1054
1178
  return;
1055
1179
  }
@@ -1070,15 +1194,15 @@ void IndexIVFFastScan::search_implem_12(
1070
1194
  int rank; // this is the rank'th result of the coarse quantizer
1071
1195
  };
1072
1196
  bool single_LUT = !lookup_table_is_3d();
1073
- size_t nprobe = cq.nprobe;
1197
+ size_t cur_nprobe = cq.nprobe;
1074
1198
 
1075
1199
  std::vector<QC> qcs;
1076
1200
  {
1077
- int ij = 0;
1078
- for (int i = 0; i < n; i++) {
1079
- for (int j = 0; j < nprobe; j++) {
1201
+ size_t ij = 0;
1202
+ for (idx_t i = 0; i < n; i++) {
1203
+ for (size_t j = 0; j < cur_nprobe; j++) {
1080
1204
  if (cq.ids[ij] >= 0) {
1081
- qcs.push_back(QC{i, int(cq.ids[ij]), int(j)});
1205
+ qcs.push_back(QC{int(i), int(cq.ids[ij]), int(j)});
1082
1206
  }
1083
1207
  ij++;
1084
1208
  }
@@ -1090,7 +1214,7 @@ void IndexIVFFastScan::search_implem_12(
1090
1214
 
1091
1215
  // prepare the result handlers
1092
1216
 
1093
- int actual_qbs2 = this->qbs2 ? this->qbs2 : 11;
1217
+ int actual_qbs2 = static_cast<int>(this->qbs2 ? this->qbs2 : 11);
1094
1218
 
1095
1219
  std::vector<uint16_t> tmp_bias;
1096
1220
  if (biases.get()) {
@@ -1127,7 +1251,7 @@ void IndexIVFFastScan::search_implem_12(
1127
1251
  nlist_visited++;
1128
1252
 
1129
1253
  // re-organize LUTs and biases into the right order
1130
- int nc = i1 - i0;
1254
+ int nc = static_cast<int>(i1 - i0);
1131
1255
 
1132
1256
  std::vector<int> q_map(nc), lut_entries(nc);
1133
1257
  AlignedTable<uint8_t> LUT(nc * dim12);
@@ -1137,7 +1261,7 @@ void IndexIVFFastScan::search_implem_12(
1137
1261
  for (size_t i = i0; i < i1; i++) {
1138
1262
  const QC& qc = qcs[i];
1139
1263
  q_map[i - i0] = qc.qno;
1140
- int ij = qc.qno * nprobe + qc.rank;
1264
+ int ij = static_cast<int>(qc.qno * cur_nprobe + qc.rank);
1141
1265
  lut_entries[i - i0] = single_LUT ? qc.qno : ij;
1142
1266
  if (biases.get()) {
1143
1267
  tmp_bias[i - i0] = biases[ij];
@@ -1145,7 +1269,7 @@ void IndexIVFFastScan::search_implem_12(
1145
1269
  }
1146
1270
  pq4_pack_LUT_qbs_q_map(
1147
1271
  qbs_for_list,
1148
- M2,
1272
+ static_cast<int>(M2),
1149
1273
  dis_tables.get(),
1150
1274
  lut_entries.data(),
1151
1275
  LUT.get());
@@ -1173,14 +1297,14 @@ void IndexIVFFastScan::search_implem_12(
1173
1297
  }
1174
1298
  handler.set_list_context(list_no, probe_map);
1175
1299
 
1176
- pq4_accumulate_loop_qbs(
1300
+ scanner.accumulate_loop_qbs(
1177
1301
  qbs_for_list,
1178
1302
  list_size,
1179
- M2,
1303
+ static_cast<int>(M2),
1180
1304
  codes.get(),
1181
1305
  LUT.get(),
1182
- handler,
1183
- context.norm_scaler);
1306
+ context.pq2x4_scale,
1307
+ get_block_stride());
1184
1308
  // prepare for next loop
1185
1309
  i0 = i1;
1186
1310
  }
@@ -1225,15 +1349,15 @@ void IndexIVFFastScan::search_implem_14(
1225
1349
  int rank; // this is the rank'th result of the coarse quantizer
1226
1350
  };
1227
1351
  bool single_LUT = !lookup_table_is_3d();
1228
- size_t nprobe = cq.nprobe;
1352
+ size_t cur_nprobe = cq.nprobe;
1229
1353
 
1230
1354
  std::vector<QC> qcs;
1231
1355
  {
1232
- int ij = 0;
1233
- for (int i = 0; i < n; i++) {
1234
- for (int j = 0; j < nprobe; j++) {
1356
+ size_t ij = 0;
1357
+ for (idx_t i = 0; i < n; i++) {
1358
+ for (size_t j = 0; j < cur_nprobe; j++) {
1235
1359
  if (cq.ids[ij] >= 0) {
1236
- qcs.push_back(QC{i, int(cq.ids[ij]), int(j)});
1360
+ qcs.push_back(QC{int(i), int(cq.ids[ij]), int(j)});
1237
1361
  }
1238
1362
  ij++;
1239
1363
  }
@@ -1312,25 +1436,24 @@ void IndexIVFFastScan::search_implem_14(
1312
1436
  std::vector<idx_t> local_idx(k * n);
1313
1437
  std::vector<float> local_dis(k * n);
1314
1438
 
1315
- // prepare the result handlers
1316
- std::unique_ptr<SIMDResultHandlerToFloat> handler(
1317
- this->make_knn_handler(
1318
- is_max,
1319
- impl,
1320
- n,
1321
- k,
1322
- local_dis.data(),
1323
- local_idx.data(),
1324
- sel,
1325
- context));
1326
- handler->begin(normalizers.get());
1327
-
1328
- int actual_qbs2 = this->qbs2 ? this->qbs2 : 11;
1439
+ auto scanner = make_knn_scanner(
1440
+ is_max,
1441
+ n,
1442
+ k,
1443
+ local_dis.data(),
1444
+ local_idx.data(),
1445
+ sel,
1446
+ impl,
1447
+ context);
1448
+ SIMDResultHandlerToFloat* handler_ptr = scanner->handler();
1449
+ handler_ptr->begin(normalizers.get());
1450
+
1451
+ int actual_qbs2 = static_cast<int>(this->qbs2 ? this->qbs2 : 11);
1329
1452
 
1330
1453
  std::vector<uint16_t> tmp_bias;
1331
1454
  if (biases.get()) {
1332
1455
  tmp_bias.resize(actual_qbs2);
1333
- handler->dbias = tmp_bias.data();
1456
+ handler_ptr->dbias = tmp_bias.data();
1334
1457
  }
1335
1458
 
1336
1459
  std::set<int> q_set;
@@ -1341,7 +1464,8 @@ void IndexIVFFastScan::search_implem_14(
1341
1464
  probe_map.reserve(actual_qbs2);
1342
1465
 
1343
1466
  #pragma omp for schedule(dynamic)
1344
- for (idx_t cluster = 0; cluster < ses.size(); cluster++) {
1467
+ for (idx_t cluster = 0; cluster < static_cast<idx_t>(ses.size());
1468
+ cluster++) {
1345
1469
  size_t i0 = ses[cluster].start;
1346
1470
  size_t i1 = ses[cluster].end;
1347
1471
  size_t list_size = ses[cluster].list_size;
@@ -1349,7 +1473,7 @@ void IndexIVFFastScan::search_implem_14(
1349
1473
  int list_no = qcs[i0].list_no;
1350
1474
 
1351
1475
  // re-organize LUTs and biases into the right order
1352
- int nc = i1 - i0;
1476
+ int nc = static_cast<int>(i1 - i0);
1353
1477
 
1354
1478
  std::vector<int> q_map(nc), lut_entries(nc);
1355
1479
  AlignedTable<uint8_t> LUT(nc * dim12);
@@ -1360,7 +1484,7 @@ void IndexIVFFastScan::search_implem_14(
1360
1484
  const QC& qc = qcs[i];
1361
1485
  q_map[i - i0] = qc.qno;
1362
1486
  q_set.insert(qc.qno);
1363
- int ij = qc.qno * nprobe + qc.rank;
1487
+ int ij = static_cast<int>(qc.qno * cur_nprobe + qc.rank);
1364
1488
  lut_entries[i - i0] = single_LUT ? qc.qno : ij;
1365
1489
  if (biases.get()) {
1366
1490
  tmp_bias[i - i0] = biases[ij];
@@ -1368,7 +1492,7 @@ void IndexIVFFastScan::search_implem_14(
1368
1492
  }
1369
1493
  pq4_pack_LUT_qbs_q_map(
1370
1494
  qbs_for_list,
1371
- M2,
1495
+ static_cast<int>(M2),
1372
1496
  dis_tables.get(),
1373
1497
  lut_entries.data(),
1374
1498
  LUT.get());
@@ -1382,9 +1506,9 @@ void IndexIVFFastScan::search_implem_14(
1382
1506
 
1383
1507
  // prepare the handler
1384
1508
 
1385
- handler->ntotal = list_size;
1386
- handler->q_map = q_map.data();
1387
- handler->id_map = ids.get();
1509
+ handler_ptr->ntotal = list_size;
1510
+ handler_ptr->q_map = q_map.data();
1511
+ handler_ptr->id_map = ids.get();
1388
1512
 
1389
1513
  // Set context information for handlers that need additional data
1390
1514
  // All queries in this batch access the same list_no, but each
@@ -1394,20 +1518,20 @@ void IndexIVFFastScan::search_implem_14(
1394
1518
  const QC& qc = qcs[i];
1395
1519
  probe_map[i - i0] = qc.rank;
1396
1520
  }
1397
- handler->set_list_context(list_no, probe_map);
1521
+ handler_ptr->set_list_context(list_no, probe_map);
1398
1522
 
1399
- pq4_accumulate_loop_qbs(
1523
+ scanner->accumulate_loop_qbs(
1400
1524
  qbs_for_list,
1401
1525
  list_size,
1402
- M2,
1526
+ static_cast<int>(M2),
1403
1527
  codes.get(),
1404
1528
  LUT.get(),
1405
- *handler.get(),
1406
- context.norm_scaler);
1529
+ context.pq2x4_scale,
1530
+ get_block_stride());
1407
1531
  }
1408
1532
 
1409
1533
  // labels is in-place for HeapHC
1410
- handler->end();
1534
+ handler_ptr->end();
1411
1535
 
1412
1536
  // merge per-thread results
1413
1537
  #pragma omp single
@@ -1461,7 +1585,7 @@ void IndexIVFFastScan::reconstruct_from_offset(
1461
1585
  for (size_t m = 0; m < M; m++) {
1462
1586
  uint8_t c =
1463
1587
  pq4_get_packed_element(list_codes.get(), bbs, M2, offset, m);
1464
- bsw.write(c, nbits);
1588
+ bsw.write(c, static_cast<int>(nbits));
1465
1589
  }
1466
1590
 
1467
1591
  sa_decode(1, code.data(), recons);
@@ -1472,7 +1596,7 @@ void IndexIVFFastScan::reconstruct_orig_invlists() {
1472
1596
  FAISS_THROW_IF_NOT(orig_invlists->list_size(0) == 0);
1473
1597
 
1474
1598
  #pragma omp parallel for if (nlist > 100)
1475
- for (idx_t list_no = 0; list_no < nlist; list_no++) {
1599
+ for (idx_t list_no = 0; list_no < static_cast<idx_t>(nlist); list_no++) {
1476
1600
  InvertedLists::ScopedCodes codes(invlists, list_no);
1477
1601
  InvertedLists::ScopedIds ids(invlists, list_no);
1478
1602
  size_t list_size = invlists->list_size(list_no);
@@ -1484,7 +1608,7 @@ void IndexIVFFastScan::reconstruct_orig_invlists() {
1484
1608
  for (size_t m = 0; m < M; m++) {
1485
1609
  uint8_t c =
1486
1610
  pq4_get_packed_element(codes.get(), bbs, M2, offset, m);
1487
- bsw.write(c, nbits);
1611
+ bsw.write(c, static_cast<int>(nbits));
1488
1612
  }
1489
1613
 
1490
1614
  // get id
@@ -1511,7 +1635,7 @@ void IndexIVFFastScan::sa_decode(idx_t n, const uint8_t* codes, float* x)
1511
1635
  fine_quantizer->decode(code + coarse_size, xi, 1);
1512
1636
  if (by_residual) {
1513
1637
  quantizer->reconstruct(list_no, residual.data());
1514
- for (size_t j = 0; j < d; j++) {
1638
+ for (int j = 0; j < d; j++) {
1515
1639
  xi[j] += residual[j];
1516
1640
  }
1517
1641
  }
@@ -1519,6 +1643,12 @@ void IndexIVFFastScan::sa_decode(idx_t n, const uint8_t* codes, float* x)
1519
1643
  }
1520
1644
  }
1521
1645
 
1646
+ void IndexIVFFastScan::postprocess_packed_codes(
1647
+ idx_t /*list_no*/,
1648
+ size_t /*list_offset*/,
1649
+ size_t /*n_added*/,
1650
+ const uint8_t* /*flat_codes*/) {}
1651
+
1522
1652
  IVFFastScanStats IVFFastScan_stats;
1523
1653
 
1524
1654
  } // namespace faiss