faiss 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +2 -1
  4. data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
  5. data/ext/faiss/index_binary.cpp +1 -1
  6. data/ext/faiss/kmeans.cpp +1 -1
  7. data/ext/faiss/pca_matrix.cpp +1 -1
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
  10. data/lib/faiss/version.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +93 -80
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -240
  13. data/vendor/faiss/faiss/Clustering.h +6 -0
  14. data/vendor/faiss/faiss/IVFlib.cpp +41 -21
  15. data/vendor/faiss/faiss/Index.cpp +6 -5
  16. data/vendor/faiss/faiss/Index.h +5 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
  21. data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
  22. data/vendor/faiss/faiss/IndexBinary.h +4 -4
  23. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
  31. data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
  32. data/vendor/faiss/faiss/IndexFastScan.h +25 -23
  33. data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
  34. data/vendor/faiss/faiss/IndexFlat.h +21 -18
  35. data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
  36. data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
  37. data/vendor/faiss/faiss/IndexHNSW.h +16 -2
  38. data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
  39. data/vendor/faiss/faiss/IndexIDMap.h +9 -7
  40. data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
  41. data/vendor/faiss/faiss/IndexIVF.h +33 -12
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
  45. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
  46. data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
  47. data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
  48. data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
  49. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
  50. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
  51. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
  53. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
  55. data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
  56. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
  57. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
  58. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
  59. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
  60. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
  61. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  62. data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
  63. data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
  66. data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
  67. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
  68. data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
  69. data/vendor/faiss/faiss/IndexPQ.h +3 -2
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
  73. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  74. data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
  75. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
  76. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
  77. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
  78. data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
  79. data/vendor/faiss/faiss/IndexRefine.h +4 -4
  80. data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
  81. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
  82. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
  83. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
  84. data/vendor/faiss/faiss/IndexShards.cpp +10 -9
  85. data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
  86. data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
  87. data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
  88. data/vendor/faiss/faiss/MetaIndexes.h +1 -1
  89. data/vendor/faiss/faiss/MetricType.h +14 -7
  90. data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
  91. data/vendor/faiss/faiss/SuperKMeans.h +97 -0
  92. data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
  93. data/vendor/faiss/faiss/VectorTransform.h +16 -16
  94. data/vendor/faiss/faiss/build.cpp +23 -0
  95. data/vendor/faiss/faiss/build.h +15 -0
  96. data/vendor/faiss/faiss/clone_index.cpp +48 -47
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
  101. data/vendor/faiss/faiss/factory_tools.cpp +5 -0
  102. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
  109. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
  110. data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
  111. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
  112. data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
  113. data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
  114. data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
  115. data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
  116. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
  117. data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
  118. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
  119. data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
  120. data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
  121. data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
  122. data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
  123. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
  124. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
  125. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
  126. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
  127. data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
  128. data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
  129. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
  130. data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
  131. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
  132. data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
  133. data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
  134. data/vendor/faiss/faiss/impl/FaissException.h +50 -3
  135. data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
  136. data/vendor/faiss/faiss/impl/HNSW.h +13 -34
  137. data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
  138. data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
  139. data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
  141. data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
  142. data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
  143. data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
  144. data/vendor/faiss/faiss/impl/NSG.h +4 -4
  145. data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
  146. data/vendor/faiss/faiss/impl/Panorama.h +258 -87
  147. data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
  148. data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
  149. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
  150. data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
  151. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
  152. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
  153. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
  154. data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
  155. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
  156. data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
  157. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
  158. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
  159. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  160. data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
  161. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
  162. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
  163. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
  164. data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
  165. data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
  166. data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
  167. data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
  168. data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
  169. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
  170. data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
  171. data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
  172. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
  173. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
  174. data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
  175. data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
  176. data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
  177. data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
  178. data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
  179. data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
  180. data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
  181. data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
  182. data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
  183. data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
  184. data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
  185. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
  186. data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
  187. data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
  188. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
  189. data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
  190. data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
  191. data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
  192. data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
  193. data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
  194. data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
  195. data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
  196. data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
  197. data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
  198. data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
  199. data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
  200. data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
  201. data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
  202. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
  203. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
  204. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
  205. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
  206. data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
  207. data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
  208. data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
  209. data/vendor/faiss/faiss/impl/io.cpp +6 -6
  210. data/vendor/faiss/faiss/impl/io_macros.h +33 -16
  211. data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
  212. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
  213. data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
  214. data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
  215. data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
  216. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
  217. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
  218. data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
  219. data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
  220. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
  221. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
  222. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
  223. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
  224. data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
  225. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
  226. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
  227. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
  228. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
  229. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
  230. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
  231. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
  232. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
  233. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
  234. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
  235. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
  236. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
  237. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
  238. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
  239. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
  240. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
  241. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
  242. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
  243. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
  244. data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
  245. data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
  246. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
  247. data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
  248. data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
  249. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
  250. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
  251. data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
  252. data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
  253. data/vendor/faiss/faiss/impl/svs_io.h +8 -2
  254. data/vendor/faiss/faiss/index_factory.cpp +86 -18
  255. data/vendor/faiss/faiss/index_io.h +24 -0
  256. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
  257. data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
  258. data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
  259. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
  260. data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
  261. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
  262. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
  263. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  264. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
  265. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
  266. data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
  267. data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
  268. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
  269. data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
  270. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
  271. data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
  272. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
  273. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
  274. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
  275. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
  276. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
  277. data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
  278. data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
  279. data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
  280. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
  281. data/vendor/faiss/faiss/utils/distances.cpp +390 -560
  282. data/vendor/faiss/faiss/utils/distances.h +20 -1
  283. data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
  284. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
  285. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
  286. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
  287. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
  288. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
  289. data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
  290. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
  291. data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
  292. data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
  293. data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
  294. data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
  295. data/vendor/faiss/faiss/utils/hamming.h +92 -2
  296. data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
  297. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
  298. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
  299. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
  300. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
  301. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
  302. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
  303. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
  304. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
  305. data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
  306. data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
  307. data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
  308. data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
  309. data/vendor/faiss/faiss/utils/partitioning.h +31 -0
  310. data/vendor/faiss/faiss/utils/popcount.h +29 -0
  311. data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
  312. data/vendor/faiss/faiss/utils/prefetch.h +2 -2
  313. data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
  314. data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
  315. data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
  316. data/vendor/faiss/faiss/utils/random.cpp +6 -6
  317. data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
  318. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
  319. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
  320. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
  321. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
  322. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
  323. data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
  324. data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
  325. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
  326. data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
  327. data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
  328. data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
  329. data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
  330. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
  331. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
  332. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
  333. data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
  334. data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
  335. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
  336. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
  337. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
  338. data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
  339. data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
  340. data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
  341. data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
  342. data/vendor/faiss/faiss/utils/utils.cpp +5 -5
  343. data/vendor/faiss/faiss/utils/utils.h +3 -3
  344. metadata +119 -34
  345. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
  346. data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
  347. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
  348. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
  349. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
  350. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
  351. data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
  352. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
  353. data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
  354. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
  355. data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
  356. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
  357. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
  358. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
  359. data/vendor/faiss/faiss/utils/simdlib.h +0 -42
  360. data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
  361. /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
@@ -9,7 +9,6 @@
9
9
 
10
10
  #include <faiss/IndexIVFPQ.h>
11
11
 
12
- #include <cassert>
13
12
  #include <cinttypes>
14
13
  #include <cmath>
15
14
  #include <cstdint>
@@ -17,7 +16,6 @@
17
16
 
18
17
  #include <algorithm>
19
18
 
20
- #include <faiss/utils/Heap.h>
21
19
  #include <faiss/utils/distances_dispatch.h>
22
20
  #include <faiss/utils/utils.h>
23
21
 
@@ -30,9 +28,15 @@
30
28
  #include <faiss/impl/IDSelector.h>
31
29
  #include <faiss/impl/ProductQuantizer.h>
32
30
  #include <faiss/impl/ResultHandler.h>
33
- #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
31
+ #include <faiss/impl/pq_code_distance/pq_code_distance-generic.h>
34
32
  #include <faiss/impl/simd_dispatch.h>
35
33
 
34
+ // Scalar (NONE) fallback for dynamic dispatch
35
+ #define THE_SIMD_LEVEL SIMDLevel::NONE
36
+ // NOLINTNEXTLINE(facebook-hte-InlineHeader)
37
+ #include <faiss/impl/pq_code_distance/IVFPQScanner_impl.h>
38
+ #undef THE_SIMD_LEVEL
39
+
36
40
  namespace faiss {
37
41
 
38
42
  /*****************************************
@@ -40,17 +44,17 @@ namespace faiss {
40
44
  ******************************************/
41
45
 
42
46
  IndexIVFPQ::IndexIVFPQ(
43
- Index* quantizer,
44
- size_t d,
45
- size_t nlist,
47
+ Index* quantizer_in,
48
+ size_t d_in,
49
+ size_t nlist_in,
46
50
  size_t M,
47
51
  size_t nbits_per_idx,
48
52
  MetricType metric,
49
- bool own_invlists)
50
- : IndexIVF(quantizer, d, nlist, 0, metric, own_invlists),
51
- pq(d, M, nbits_per_idx) {
53
+ bool own_invlists_in)
54
+ : IndexIVF(quantizer_in, d_in, nlist_in, 0, metric, own_invlists_in),
55
+ pq(d_in, M, nbits_per_idx) {
52
56
  code_size = pq.code_size;
53
- if (own_invlists) {
57
+ if (own_invlists_in) {
54
58
  invlists->code_size = code_size;
55
59
  }
56
60
  is_trained = false;
@@ -66,12 +70,16 @@ IndexIVFPQ::IndexIVFPQ(
66
70
  /****************************************************************
67
71
  * training */
68
72
 
69
- void IndexIVFPQ::train_encoder(idx_t n, const float* x, const idx_t* assign) {
73
+ void IndexIVFPQ::train_encoder(
74
+ idx_t n,
75
+ const float* x,
76
+ const idx_t* /*assign*/) {
70
77
  pq.train(n, x);
71
78
 
72
79
  if (do_polysemous_training) {
73
- if (verbose)
80
+ if (verbose) {
74
81
  printf("doing polysemous training for PQ\n");
82
+ }
75
83
  PolysemousTraining default_pt;
76
84
  PolysemousTraining* pt =
77
85
  polysemous_training ? polysemous_training : &default_pt;
@@ -96,8 +104,9 @@ void IndexIVFPQ::encode(idx_t key, const float* x, uint8_t* code) const {
96
104
  std::vector<float> residual_vec(d);
97
105
  quantizer->compute_residual(x, residual_vec.data(), key);
98
106
  pq.compute_code(residual_vec.data(), code);
99
- } else
107
+ } else {
100
108
  pq.compute_code(x, code);
109
+ }
101
110
  }
102
111
 
103
112
  void IndexIVFPQ::encode_multiple(
@@ -106,8 +115,9 @@ void IndexIVFPQ::encode_multiple(
106
115
  const float* x,
107
116
  uint8_t* xcodes,
108
117
  bool compute_keys) const {
109
- if (compute_keys)
118
+ if (compute_keys) {
110
119
  quantizer->assign(n, x, keys);
120
+ }
111
121
 
112
122
  encode_vectors(n, x, keys, xcodes);
113
123
  }
@@ -123,7 +133,7 @@ void IndexIVFPQ::decode_multiple(
123
133
  for (size_t i = 0; i < n; i++) {
124
134
  quantizer->reconstruct(keys[i], centroid.data());
125
135
  float* xi = x + i * d;
126
- for (size_t j = 0; j < d; j++) {
136
+ for (int j = 0; j < d; j++) {
127
137
  xi[j] += centroid[j];
128
138
  }
129
139
  }
@@ -149,13 +159,15 @@ static std::unique_ptr<float[]> compute_residuals(
149
159
  const idx_t* list_nos) {
150
160
  size_t d = quantizer->d;
151
161
  std::unique_ptr<float[]> residuals(new float[n * d]);
152
- // TODO: parallelize?
153
- for (size_t i = 0; i < n; i++) {
154
- if (list_nos[i] < 0)
162
+ // Parallelize with OpenMP (each iteration is independent)
163
+ #pragma omp parallel for if (n > 1000)
164
+ for (idx_t i = 0; i < n; i++) {
165
+ if (list_nos[i] < 0) {
155
166
  memset(residuals.get() + i * d, 0, sizeof(float) * d);
156
- else
167
+ } else {
157
168
  quantizer->compute_residual(
158
169
  x + i * d, residuals.get() + i * d, list_nos[i]);
170
+ }
159
171
  }
160
172
  return residuals;
161
173
  }
@@ -207,7 +219,7 @@ void IndexIVFPQ::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
207
219
  pq.decode(code + coarse_size, xi);
208
220
  if (by_residual) {
209
221
  quantizer->reconstruct(list_no, residual.data());
210
- for (size_t j = 0; j < d; j++) {
222
+ for (int j = 0; j < d; j++) {
211
223
  xi[j] += residual[j];
212
224
  }
213
225
  }
@@ -282,14 +294,15 @@ void IndexIVFPQ::add_core_o(
282
294
  double t2 = getmillisecs();
283
295
  // TODO: parallelize?
284
296
  size_t n_ignore = 0;
285
- for (size_t i = 0; i < n; i++) {
297
+ for (idx_t i = 0; i < n; i++) {
286
298
  idx_t key = idx[i];
287
299
  idx_t id = xids ? xids[i] : ntotal + i;
288
300
  if (key < 0) {
289
301
  direct_map.add_single_id(id, -1, 0);
290
302
  n_ignore++;
291
- if (residuals_2)
303
+ if (residuals_2) {
292
304
  memset(residuals_2, 0, sizeof(*residuals_2) * d);
305
+ }
293
306
  continue;
294
307
  }
295
308
 
@@ -301,8 +314,9 @@ void IndexIVFPQ::add_core_o(
301
314
  float* res2 = residuals_2 + i * d;
302
315
  const float* xi = to_encode + i * d;
303
316
  pq.decode(code, res2);
304
- for (int j = 0; j < d; j++)
317
+ for (int j = 0; j < d; j++) {
305
318
  res2[j] = xi[j] - res2[j];
319
+ }
306
320
  }
307
321
 
308
322
  direct_map.add_single_id(id, key, offset);
@@ -311,8 +325,9 @@ void IndexIVFPQ::add_core_o(
311
325
  double t3 = getmillisecs();
312
326
  if (verbose) {
313
327
  char comment[100] = {0};
314
- if (n_ignore > 0)
328
+ if (n_ignore > 0) {
315
329
  snprintf(comment, 100, "(%zd vectors ignored)", n_ignore);
330
+ }
316
331
  printf(" add_core times: %.3f %.3f %.3f %s\n",
317
332
  t1 - t0,
318
333
  t2 - t1,
@@ -379,6 +394,7 @@ void initialize_IVFPQ_precomputed_table(
379
394
  AlignedTable<float>& precomputed_table,
380
395
  bool by_residual,
381
396
  bool verbose) {
397
+ FAISS_THROW_IF_NOT_MSG(quantizer, "IVF quantizer must not be null");
382
398
  size_t nlist = quantizer->ntotal;
383
399
  size_t d = quantizer->d;
384
400
  FAISS_THROW_IF_NOT(d == pq.d);
@@ -388,6 +404,9 @@ void initialize_IVFPQ_precomputed_table(
388
404
  return;
389
405
  }
390
406
 
407
+ const size_t m_ksub =
408
+ mul_no_overflow(pq.M, pq.ksub, "IVFPQ precomputed_table");
409
+
391
410
  if (use_precomputed_table == 0) { // then choose the type of table
392
411
  if (!(quantizer->metric_type == METRIC_L2 && by_residual)) {
393
412
  if (verbose) {
@@ -399,10 +418,13 @@ void initialize_IVFPQ_precomputed_table(
399
418
  }
400
419
  const MultiIndexQuantizer* miq =
401
420
  dynamic_cast<const MultiIndexQuantizer*>(quantizer);
402
- if (miq && pq.M % miq->pq.M == 0)
421
+ if (miq && pq.M % miq->pq.M == 0) {
403
422
  use_precomputed_table = 2;
404
- else {
405
- size_t table_size = pq.M * pq.ksub * nlist * sizeof(float);
423
+ } else {
424
+ size_t table_size = mul_no_overflow(
425
+ mul_no_overflow(m_ksub, nlist, "IVFPQ precomputed_table"),
426
+ sizeof(float),
427
+ "IVFPQ precomputed_table");
406
428
  if (table_size > precomputed_table_max_bytes) {
407
429
  if (verbose) {
408
430
  printf("IndexIVFPQ::precompute_table: not precomputing table, "
@@ -422,22 +444,25 @@ void initialize_IVFPQ_precomputed_table(
422
444
  }
423
445
 
424
446
  // squared norms of the PQ centroids
425
- std::vector<float> r_norms(pq.M * pq.ksub, NAN);
426
- for (int m = 0; m < pq.M; m++)
427
- for (int j = 0; j < pq.ksub; j++)
447
+ std::vector<float> r_norms(m_ksub, NAN);
448
+ for (size_t m = 0; m < pq.M; m++) {
449
+ for (size_t j = 0; j < pq.ksub; j++) {
428
450
  r_norms[m * pq.ksub + j] =
429
451
  fvec_norm_L2sqr_dispatch(pq.get_centroids(m, j), pq.dsub);
452
+ }
453
+ }
430
454
 
431
455
  if (use_precomputed_table == 1) {
432
- precomputed_table.resize(nlist * pq.M * pq.ksub);
456
+ precomputed_table.resize(
457
+ mul_no_overflow(nlist, m_ksub, "IVFPQ precomputed_table"));
433
458
  std::vector<float> centroid(d);
434
459
 
435
460
  for (size_t i = 0; i < nlist; i++) {
436
461
  quantizer->reconstruct(i, centroid.data());
437
462
 
438
- float* tab = &precomputed_table[i * pq.M * pq.ksub];
463
+ float* tab = &precomputed_table[i * m_ksub];
439
464
  pq.compute_inner_prod_table(centroid.data(), tab);
440
- fvec_madd_dispatch(pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
465
+ fvec_madd_dispatch(m_ksub, r_norms.data(), 2.0, tab, tab);
441
466
  }
442
467
  } else if (use_precomputed_table == 2) {
443
468
  const MultiIndexQuantizer* miq =
@@ -446,12 +471,13 @@ void initialize_IVFPQ_precomputed_table(
446
471
  const ProductQuantizer& cpq = miq->pq;
447
472
  FAISS_THROW_IF_NOT(pq.M % cpq.M == 0);
448
473
 
449
- precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
474
+ precomputed_table.resize(
475
+ mul_no_overflow(cpq.ksub, m_ksub, "IVFPQ precomputed_table"));
450
476
 
451
477
  // reorder PQ centroid table
452
478
  std::vector<float> centroids(d * cpq.ksub, NAN);
453
479
 
454
- for (int m = 0; m < cpq.M; m++) {
480
+ for (size_t m = 0; m < cpq.M; m++) {
455
481
  for (size_t i = 0; i < cpq.ksub; i++) {
456
482
  memcpy(centroids.data() + i * d + m * cpq.dsub,
457
483
  cpq.get_centroids(m, i),
@@ -463,8 +489,8 @@ void initialize_IVFPQ_precomputed_table(
463
489
  cpq.ksub, centroids.data(), precomputed_table.data());
464
490
 
465
491
  for (size_t i = 0; i < cpq.ksub; i++) {
466
- float* tab = &precomputed_table[i * pq.M * pq.ksub];
467
- fvec_madd_dispatch(pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
492
+ float* tab = &precomputed_table[i * m_ksub];
493
+ fvec_madd_dispatch(m_ksub, r_norms.data(), 2.0, tab, tab);
468
494
  }
469
495
  }
470
496
  }
@@ -479,812 +505,13 @@ void IndexIVFPQ::precompute_table() {
479
505
  verbose);
480
506
  }
481
507
 
482
- namespace {
483
-
484
- #define TIC t0 = get_cycles()
485
- #define TOC get_cycles() - t0
486
-
487
- /** QueryTables manages the various ways of searching an
488
- * IndexIVFPQ. The code contains a lot of branches, depending on:
489
- * - metric_type: are we computing L2 or Inner product similarity?
490
- * - by_residual: do we encode raw vectors or residuals?
491
- * - use_precomputed_table: are x_R|x_C tables precomputed?
492
- * - polysemous_ht: are we filtering with polysemous codes?
493
- */
494
- struct QueryTables {
495
- /*****************************************************
496
- * General data from the IVFPQ
497
- *****************************************************/
498
-
499
- const IndexIVFPQ& ivfpq;
500
- const IVFSearchParameters* params;
501
-
502
- // copied from IndexIVFPQ for easier access
503
- int d;
504
- const ProductQuantizer& pq;
505
- MetricType metric_type;
506
- bool by_residual;
507
- int use_precomputed_table;
508
- int polysemous_ht;
509
-
510
- // pre-allocated data buffers
511
- float *sim_table, *sim_table_2;
512
- float *residual_vec, *decoded_vec;
513
-
514
- // single data buffer
515
- std::vector<float> mem;
516
-
517
- // for table pointers
518
- std::vector<const float*> sim_table_ptrs;
519
-
520
- explicit QueryTables(
521
- const IndexIVFPQ& ivfpq,
522
- const IVFSearchParameters* params)
523
- : ivfpq(ivfpq),
524
- d(ivfpq.d),
525
- pq(ivfpq.pq),
526
- metric_type(ivfpq.metric_type),
527
- by_residual(ivfpq.by_residual),
528
- use_precomputed_table(ivfpq.use_precomputed_table) {
529
- mem.resize(pq.ksub * pq.M * 2 + d * 2);
530
- sim_table = mem.data();
531
- sim_table_2 = sim_table + pq.ksub * pq.M;
532
- residual_vec = sim_table_2 + pq.ksub * pq.M;
533
- decoded_vec = residual_vec + d;
534
-
535
- // for polysemous
536
- polysemous_ht = ivfpq.polysemous_ht;
537
- if (auto ivfpq_params =
538
- dynamic_cast<const IVFPQSearchParameters*>(params)) {
539
- polysemous_ht = ivfpq_params->polysemous_ht;
540
- }
541
- if (polysemous_ht != 0) {
542
- q_code.resize(pq.code_size);
543
- }
544
- init_list_cycles = 0;
545
- sim_table_ptrs.resize(pq.M);
546
- }
547
-
548
- /*****************************************************
549
- * What we do when query is known
550
- *****************************************************/
551
-
552
- // field specific to query
553
- const float* qi;
554
-
555
- // query-specific initialization
556
- void init_query(const float* qi) {
557
- this->qi = qi;
558
- if (metric_type == METRIC_INNER_PRODUCT)
559
- init_query_IP();
560
- else
561
- init_query_L2();
562
- if (!by_residual && polysemous_ht != 0)
563
- pq.compute_code(qi, q_code.data());
564
- }
565
-
566
- void init_query_IP() {
567
- // precompute some tables specific to the query qi
568
- pq.compute_inner_prod_table(qi, sim_table);
569
- }
570
-
571
- void init_query_L2() {
572
- if (!by_residual) {
573
- pq.compute_distance_table(qi, sim_table);
574
- } else if (use_precomputed_table) {
575
- pq.compute_inner_prod_table(qi, sim_table_2);
576
- }
577
- }
578
-
579
- /*****************************************************
580
- * When inverted list is known: prepare computations
581
- *****************************************************/
582
-
583
- // fields specific to list
584
- idx_t key;
585
- float coarse_dis;
586
- std::vector<uint8_t> q_code;
587
-
588
- uint64_t init_list_cycles;
589
-
590
- /// once we know the query and the centroid, we can prepare the
591
- /// sim_table that will be used for accumulation
592
- /// and dis0, the initial value
593
- float precompute_list_tables() {
594
- float dis0 = 0;
595
- uint64_t t0;
596
- TIC;
597
- if (by_residual) {
598
- if (metric_type == METRIC_INNER_PRODUCT)
599
- dis0 = precompute_list_tables_IP();
600
- else
601
- dis0 = precompute_list_tables_L2();
602
- }
603
- init_list_cycles += TOC;
604
- return dis0;
605
- }
606
-
607
- float precompute_list_table_pointers() {
608
- float dis0 = 0;
609
- uint64_t t0;
610
- TIC;
611
- if (by_residual) {
612
- if (metric_type == METRIC_INNER_PRODUCT)
613
- FAISS_THROW_MSG("not implemented");
614
- else
615
- dis0 = precompute_list_table_pointers_L2();
616
- }
617
- init_list_cycles += TOC;
618
- return dis0;
619
- }
620
-
621
- /*****************************************************
622
- * compute tables for inner prod
623
- *****************************************************/
624
-
625
- float precompute_list_tables_IP() {
626
- // prepare the sim_table that will be used for accumulation
627
- // and dis0, the initial value
628
- ivfpq.quantizer->reconstruct(key, decoded_vec);
629
- // decoded_vec = centroid
630
- float dis0 = fvec_inner_product_dispatch(qi, decoded_vec, d);
631
-
632
- if (polysemous_ht) {
633
- for (int i = 0; i < d; i++) {
634
- residual_vec[i] = qi[i] - decoded_vec[i];
635
- }
636
- pq.compute_code(residual_vec, q_code.data());
637
- }
638
- return dis0;
639
- }
640
-
641
- /*****************************************************
642
- * compute tables for L2 distance
643
- *****************************************************/
644
-
645
- float precompute_list_tables_L2() {
646
- float dis0 = 0;
647
-
648
- if (use_precomputed_table == 0 || use_precomputed_table == -1) {
649
- ivfpq.quantizer->compute_residual(qi, residual_vec, key);
650
- pq.compute_distance_table(residual_vec, sim_table);
651
-
652
- if (polysemous_ht != 0) {
653
- pq.compute_code(residual_vec, q_code.data());
654
- }
655
-
656
- } else if (use_precomputed_table == 1) {
657
- dis0 = coarse_dis;
658
-
659
- fvec_madd_dispatch(
660
- pq.M * pq.ksub,
661
- ivfpq.precomputed_table.data() + key * pq.ksub * pq.M,
662
- -2.0,
663
- sim_table_2,
664
- sim_table);
665
-
666
- if (polysemous_ht != 0) {
667
- ivfpq.quantizer->compute_residual(qi, residual_vec, key);
668
- pq.compute_code(residual_vec, q_code.data());
669
- }
670
-
671
- } else if (use_precomputed_table == 2) {
672
- dis0 = coarse_dis;
673
-
674
- const MultiIndexQuantizer* miq =
675
- dynamic_cast<const MultiIndexQuantizer*>(ivfpq.quantizer);
676
- FAISS_THROW_IF_NOT(miq);
677
- const ProductQuantizer& cpq = miq->pq;
678
- int Mf = pq.M / cpq.M;
679
-
680
- const float* qtab = sim_table_2; // query-specific table
681
- float* ltab = sim_table; // (output) list-specific table
682
-
683
- long k = key;
684
- for (int cm = 0; cm < cpq.M; cm++) {
685
- // compute PQ index
686
- int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
687
- k >>= cpq.nbits;
688
-
689
- // get corresponding table
690
- const float* pc = ivfpq.precomputed_table.data() +
691
- (ki * pq.M + cm * Mf) * pq.ksub;
692
-
693
- if (polysemous_ht == 0) {
694
- // sum up with query-specific table
695
- fvec_madd_dispatch(Mf * pq.ksub, pc, -2.0, qtab, ltab);
696
- ltab += Mf * pq.ksub;
697
- qtab += Mf * pq.ksub;
698
- } else {
699
- for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
700
- q_code[m] = fvec_madd_and_argmin_dispatch(
701
- pq.ksub, pc, -2, qtab, ltab);
702
- pc += pq.ksub;
703
- ltab += pq.ksub;
704
- qtab += pq.ksub;
705
- }
706
- }
707
- }
708
- }
709
-
710
- return dis0;
711
- }
712
-
713
- float precompute_list_table_pointers_L2() {
714
- float dis0 = 0;
715
-
716
- if (use_precomputed_table == 1) {
717
- dis0 = coarse_dis;
718
-
719
- const float* s =
720
- ivfpq.precomputed_table.data() + key * pq.ksub * pq.M;
721
- for (int m = 0; m < pq.M; m++) {
722
- sim_table_ptrs[m] = s;
723
- s += pq.ksub;
724
- }
725
- } else if (use_precomputed_table == 2) {
726
- dis0 = coarse_dis;
727
-
728
- const MultiIndexQuantizer* miq =
729
- dynamic_cast<const MultiIndexQuantizer*>(ivfpq.quantizer);
730
- FAISS_THROW_IF_NOT(miq);
731
- const ProductQuantizer& cpq = miq->pq;
732
- int Mf = pq.M / cpq.M;
733
-
734
- long k = key;
735
- int m0 = 0;
736
- for (int cm = 0; cm < cpq.M; cm++) {
737
- int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
738
- k >>= cpq.nbits;
739
-
740
- const float* pc = ivfpq.precomputed_table.data() +
741
- (ki * pq.M + cm * Mf) * pq.ksub;
742
-
743
- for (int m = m0; m < m0 + Mf; m++) {
744
- sim_table_ptrs[m] = pc;
745
- pc += pq.ksub;
746
- }
747
- m0 += Mf;
748
- }
749
- } else {
750
- FAISS_THROW_MSG("need precomputed tables");
751
- }
752
-
753
- if (polysemous_ht) {
754
- FAISS_THROW_MSG("not implemented");
755
- // Not clear that it makes sense to implemente this,
756
- // because it costs M * ksub, which is what we wanted to
757
- // avoid with the tables pointers.
758
- }
759
-
760
- return dis0;
761
- }
762
- };
763
-
764
- template <class C, bool use_sel>
765
- struct WrappedSearchResult {
766
- ResultHandler& res;
767
- size_t nup = 0;
768
- idx_t list_no;
769
-
770
- const idx_t* ids;
771
- const IDSelector* sel;
772
-
773
- WrappedSearchResult(
774
- idx_t list_no,
775
- const idx_t* ids,
776
- const IDSelector* sel,
777
- ResultHandler& res)
778
- : res(res), list_no(list_no), ids(ids), sel(sel) {}
779
-
780
- inline bool skip_entry(idx_t j) {
781
- return use_sel && !sel->is_member(ids[j]);
782
- }
783
-
784
- inline void add(idx_t j, float dis) {
785
- if (C::cmp(res.threshold, dis)) {
786
- idx_t id = ids ? ids[j] : lo_build(this->list_no, j);
787
- res.add_result(dis, id);
788
- nup++;
789
- }
790
- }
791
- };
792
-
793
- /*****************************************************
794
- * Scaning the codes.
795
- * The scanning functions call their favorite precompute_*
796
- * function to precompute the tables they need.
797
- *****************************************************/
798
- template <typename IDType, MetricType METRIC_TYPE, class PQCodeDist>
799
- struct IVFPQScannerT : QueryTables {
800
- using PQDecoder = typename PQCodeDist::PQDecoder;
801
- const uint8_t* list_codes;
802
- const IDType* list_ids;
803
- size_t list_size;
804
-
805
- IVFPQScannerT(const IndexIVFPQ& ivfpq, const IVFSearchParameters* params)
806
- : QueryTables(ivfpq, params) {
807
- assert(METRIC_TYPE == metric_type);
808
- }
809
-
810
- float dis0;
811
-
812
- void init_list(idx_t list_no, float coarse_dis, int mode) {
813
- this->key = list_no;
814
- this->coarse_dis = coarse_dis;
815
-
816
- if (mode == 2) {
817
- dis0 = precompute_list_tables();
818
- } else if (mode == 1) {
819
- dis0 = precompute_list_table_pointers();
820
- }
821
- }
822
-
823
- /*****************************************************
824
- * Scaning the codes: simple PQ scan.
825
- *****************************************************/
826
-
827
- // This is the baseline version of scan_list_with_tables().
828
- // It demonstrates what this function actually does.
829
- //
830
- // /// version of the scan where we use precomputed tables.
831
- // template <class SearchResultType>
832
- // void scan_list_with_table(
833
- // size_t ncode,
834
- // const uint8_t* codes,
835
- // SearchResultType& res) const {
836
- //
837
- // for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
838
- // if (res.skip_entry(j)) {
839
- // continue;
840
- // }
841
- // float dis = dis0 + PQCodeDist::distance_single_code(
842
- // pq, sim_table, codes);
843
- // res.add(j, dis);
844
- // }
845
- // }
846
-
847
- // This is the modified version of scan_list_with_tables().
848
- // It was observed that doing manual unrolling of the loop that
849
- // utilizes distance_single_code() speeds up the computations.
850
-
851
- /// version of the scan where we use precomputed tables.
852
- template <class SearchResultType>
853
- void scan_list_with_table(
854
- size_t ncode,
855
- const uint8_t* codes,
856
- SearchResultType& res) const {
857
- int counter = 0;
858
-
859
- size_t saved_j[4] = {0, 0, 0, 0};
860
- for (size_t j = 0; j < ncode; j++) {
861
- if (res.skip_entry(j)) {
862
- continue;
863
- }
864
-
865
- saved_j[0] = (counter == 0) ? j : saved_j[0];
866
- saved_j[1] = (counter == 1) ? j : saved_j[1];
867
- saved_j[2] = (counter == 2) ? j : saved_j[2];
868
- saved_j[3] = (counter == 3) ? j : saved_j[3];
869
-
870
- counter += 1;
871
- if (counter == 4) {
872
- float distance_0 = 0;
873
- float distance_1 = 0;
874
- float distance_2 = 0;
875
- float distance_3 = 0;
876
- PQCodeDist::distance_four_codes(
877
- pq.M,
878
- pq.nbits,
879
- sim_table,
880
- codes + saved_j[0] * pq.code_size,
881
- codes + saved_j[1] * pq.code_size,
882
- codes + saved_j[2] * pq.code_size,
883
- codes + saved_j[3] * pq.code_size,
884
- distance_0,
885
- distance_1,
886
- distance_2,
887
- distance_3);
888
-
889
- res.add(saved_j[0], dis0 + distance_0);
890
- res.add(saved_j[1], dis0 + distance_1);
891
- res.add(saved_j[2], dis0 + distance_2);
892
- res.add(saved_j[3], dis0 + distance_3);
893
- counter = 0;
894
- }
895
- }
896
-
897
- if (counter >= 1) {
898
- float dis = dis0 +
899
- PQCodeDist::distance_single_code(
900
- pq.M,
901
- pq.nbits,
902
- sim_table,
903
- codes + saved_j[0] * pq.code_size);
904
- res.add(saved_j[0], dis);
905
- }
906
- if (counter >= 2) {
907
- float dis = dis0 +
908
- PQCodeDist::distance_single_code(
909
- pq.M,
910
- pq.nbits,
911
- sim_table,
912
- codes + saved_j[1] * pq.code_size);
913
- res.add(saved_j[1], dis);
914
- }
915
- if (counter >= 3) {
916
- float dis = dis0 +
917
- PQCodeDist::distance_single_code(
918
- pq.M,
919
- pq.nbits,
920
- sim_table,
921
- codes + saved_j[2] * pq.code_size);
922
- res.add(saved_j[2], dis);
923
- }
924
- }
925
-
926
- /// tables are not precomputed, but pointers are provided to the
927
- /// relevant X_c|x_r tables
928
- template <class SearchResultType>
929
- void scan_list_with_pointer(
930
- size_t ncode,
931
- const uint8_t* codes,
932
- SearchResultType& res) const {
933
- for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
934
- if (res.skip_entry(j)) {
935
- continue;
936
- }
937
- PQDecoder decoder(codes, pq.nbits);
938
- float dis = dis0;
939
- const float* tab = sim_table_2;
940
-
941
- for (size_t m = 0; m < pq.M; m++) {
942
- int ci = decoder.decode();
943
- dis += sim_table_ptrs[m][ci] - 2 * tab[ci];
944
- tab += pq.ksub;
945
- }
946
- res.add(j, dis);
947
- }
948
- }
949
-
950
- /// nothing is precomputed: access residuals on-the-fly
951
- template <class SearchResultType>
952
- void scan_on_the_fly_dist(
953
- size_t ncode,
954
- const uint8_t* codes,
955
- SearchResultType& res) const {
956
- const float* dvec;
957
- float dis0 = 0;
958
- if (by_residual) {
959
- if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
960
- ivfpq.quantizer->reconstruct(key, residual_vec);
961
- dis0 = fvec_inner_product_dispatch(residual_vec, qi, d);
962
- } else {
963
- ivfpq.quantizer->compute_residual(qi, residual_vec, key);
964
- }
965
- dvec = residual_vec;
966
- } else {
967
- dvec = qi;
968
- dis0 = 0;
969
- }
970
-
971
- for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
972
- if (res.skip_entry(j)) {
973
- continue;
974
- }
975
- pq.decode(codes, decoded_vec);
976
-
977
- float dis;
978
- if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
979
- dis = dis0 + fvec_inner_product_dispatch(decoded_vec, qi, d);
980
- } else {
981
- dis = fvec_L2sqr_dispatch(decoded_vec, dvec, d);
982
- }
983
- res.add(j, dis);
984
- }
985
- }
986
-
987
- /*****************************************************
988
- * Scanning codes with polysemous filtering
989
- *****************************************************/
990
-
991
- // This is the baseline version of scan_list_polysemous_hc().
992
- // It demonstrates what this function actually does.
993
-
994
- // template <class HammingComputer, class SearchResultType>
995
- // void scan_list_polysemous_hc(
996
- // size_t ncode,
997
- // const uint8_t* codes,
998
- // SearchResultType& res) const {
999
- // int ht = ivfpq.polysemous_ht;
1000
- // size_t n_hamming_pass = 0, nup = 0;
1001
- //
1002
- // int code_size = pq.code_size;
1003
- //
1004
- // HammingComputer hc(q_code.data(), code_size);
1005
- //
1006
- // for (size_t j = 0; j < ncode; j++, codes += code_size) {
1007
- // if (res.skip_entry(j)) {
1008
- // continue;
1009
- // }
1010
- // const uint8_t* b_code = codes;
1011
- // int hd = hc.hamming(b_code);
1012
- // if (hd < ht) {
1013
- // n_hamming_pass++;
1014
- //
1015
- // float dis =
1016
- // dis0 +
1017
- // PQCodeDist::distance_single_code(
1018
- // pq, sim_table, codes);
1019
- //
1020
- // res.add(j, dis);
1021
- // }
1022
- // }
1023
- // #pragma omp critical
1024
- // { indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
1025
- // }
1026
-
1027
- // This is the modified version of scan_list_with_tables().
1028
- // It was observed that doing manual unrolling of the loop that
1029
- // utilizes distance_single_code() speeds up the computations.
1030
-
1031
- template <class HammingComputer, class SearchResultType>
1032
- void scan_list_polysemous_hc(
1033
- size_t ncode,
1034
- const uint8_t* codes,
1035
- SearchResultType& res) const {
1036
- int ht = ivfpq.polysemous_ht;
1037
- size_t n_hamming_pass = 0;
1038
-
1039
- int code_size = pq.code_size;
1040
-
1041
- size_t saved_j[8];
1042
- int counter = 0;
1043
-
1044
- HammingComputer hc(q_code.data(), code_size);
1045
-
1046
- for (size_t j = 0; j < (ncode / 4) * 4; j += 4) {
1047
- const uint8_t* b_code = codes + j * code_size;
1048
-
1049
- // Unrolling is a key. Basically, doing multiple popcount
1050
- // operations one after another speeds things up.
1051
-
1052
- // 9999999 is just an arbitrary large number
1053
- int hd0 = (res.skip_entry(j + 0))
1054
- ? 99999999
1055
- : hc.hamming(b_code + 0 * code_size);
1056
- int hd1 = (res.skip_entry(j + 1))
1057
- ? 99999999
1058
- : hc.hamming(b_code + 1 * code_size);
1059
- int hd2 = (res.skip_entry(j + 2))
1060
- ? 99999999
1061
- : hc.hamming(b_code + 2 * code_size);
1062
- int hd3 = (res.skip_entry(j + 3))
1063
- ? 99999999
1064
- : hc.hamming(b_code + 3 * code_size);
1065
-
1066
- saved_j[counter] = j + 0;
1067
- counter = (hd0 < ht) ? (counter + 1) : counter;
1068
- saved_j[counter] = j + 1;
1069
- counter = (hd1 < ht) ? (counter + 1) : counter;
1070
- saved_j[counter] = j + 2;
1071
- counter = (hd2 < ht) ? (counter + 1) : counter;
1072
- saved_j[counter] = j + 3;
1073
- counter = (hd3 < ht) ? (counter + 1) : counter;
1074
-
1075
- if (counter >= 4) {
1076
- // process four codes at the same time
1077
- n_hamming_pass += 4;
1078
-
1079
- float distance_0 = dis0;
1080
- float distance_1 = dis0;
1081
- float distance_2 = dis0;
1082
- float distance_3 = dis0;
1083
- PQCodeDist::distance_four_codes(
1084
- pq.M,
1085
- pq.nbits,
1086
- sim_table,
1087
- codes + saved_j[0] * pq.code_size,
1088
- codes + saved_j[1] * pq.code_size,
1089
- codes + saved_j[2] * pq.code_size,
1090
- codes + saved_j[3] * pq.code_size,
1091
- distance_0,
1092
- distance_1,
1093
- distance_2,
1094
- distance_3);
1095
-
1096
- res.add(saved_j[0], dis0 + distance_0);
1097
- res.add(saved_j[1], dis0 + distance_1);
1098
- res.add(saved_j[2], dis0 + distance_2);
1099
- res.add(saved_j[3], dis0 + distance_3);
1100
-
1101
- //
1102
- counter -= 4;
1103
- saved_j[0] = saved_j[4];
1104
- saved_j[1] = saved_j[5];
1105
- saved_j[2] = saved_j[6];
1106
- saved_j[3] = saved_j[7];
1107
- }
1108
- }
1109
-
1110
- for (size_t kk = 0; kk < counter; kk++) {
1111
- n_hamming_pass++;
1112
-
1113
- float dis = dis0 +
1114
- PQCodeDist::distance_single_code(
1115
- pq.M,
1116
- pq.nbits,
1117
- sim_table,
1118
- codes + saved_j[kk] * pq.code_size);
1119
-
1120
- res.add(saved_j[kk], dis);
1121
- }
1122
-
1123
- // process leftovers
1124
- for (size_t j = (ncode / 4) * 4; j < ncode; j++) {
1125
- if (res.skip_entry(j)) {
1126
- continue;
1127
- }
1128
- const uint8_t* b_code = codes + j * code_size;
1129
- int hd = hc.hamming(b_code);
1130
- if (hd < ht) {
1131
- n_hamming_pass++;
1132
-
1133
- float dis = dis0 +
1134
- PQCodeDist::distance_single_code(
1135
- pq.M,
1136
- pq.nbits,
1137
- sim_table,
1138
- codes + j * code_size);
1139
-
1140
- res.add(j, dis);
1141
- }
1142
- }
1143
-
1144
- #pragma omp critical
1145
- {
1146
- indexIVFPQ_stats.n_hamming_pass += n_hamming_pass;
1147
- }
1148
- }
1149
-
1150
- template <class SearchResultType>
1151
- struct Run_scan_list_polysemous_hc {
1152
- using T = void;
1153
- template <class HammingComputer, class... Types>
1154
- void f(const IVFPQScannerT* scanner, Types... args) {
1155
- scanner->scan_list_polysemous_hc<HammingComputer, SearchResultType>(
1156
- args...);
1157
- }
1158
- };
1159
-
1160
- template <class SearchResultType>
1161
- void scan_list_polysemous(
1162
- size_t ncode,
1163
- const uint8_t* codes,
1164
- SearchResultType& res) const {
1165
- Run_scan_list_polysemous_hc<SearchResultType> r;
1166
- dispatch_HammingComputer(pq.code_size, r, this, ncode, codes, res);
1167
- }
1168
- };
1169
-
1170
- /* We put as many parameters as possible in template. Hopefully the
1171
- * gain in runtime is worth the code bloat.
1172
- *
1173
- * C is the comparator < or >, it is directly related to METRIC_TYPE.
1174
- *
1175
- * precompute_mode is how much we precompute (2 = precompute distance tables,
1176
- * 1 = precompute pointers to distances, 0 = compute distances one by one).
1177
- * Currently only 2 is supported
1178
- *
1179
- * use_sel: store or ignore the IDSelector
1180
- */
1181
- template <MetricType METRIC_TYPE, class C, class PQCodeDist, bool use_sel>
1182
- struct IVFPQScanner : IVFPQScannerT<idx_t, METRIC_TYPE, PQCodeDist>,
1183
- InvertedListScanner {
1184
- int precompute_mode;
1185
- const IDSelector* sel;
1186
-
1187
- IVFPQScanner(
1188
- const IndexIVFPQ& ivfpq,
1189
- bool store_pairs,
1190
- int precompute_mode,
1191
- const IDSelector* sel)
1192
- : IVFPQScannerT<idx_t, METRIC_TYPE, PQCodeDist>(ivfpq, nullptr),
1193
- precompute_mode(precompute_mode),
1194
- sel(sel) {
1195
- this->store_pairs = store_pairs;
1196
- this->keep_max = is_similarity_metric(METRIC_TYPE);
1197
- this->code_size = this->pq.code_size;
1198
- }
1199
-
1200
- void set_query(const float* query) override {
1201
- this->init_query(query);
1202
- }
1203
-
1204
- void set_list(idx_t list_no, float coarse_dis) override {
1205
- this->list_no = list_no;
1206
- this->init_list(list_no, coarse_dis, precompute_mode);
1207
- }
1208
-
1209
- float distance_to_code(const uint8_t* code) const override {
1210
- assert(precompute_mode == 2);
1211
- float dis = this->dis0 +
1212
- PQCodeDist::distance_single_code(
1213
- this->pq.M, this->pq.nbits, this->sim_table, code);
1214
- return dis;
1215
- }
1216
-
1217
- size_t scan_codes(
1218
- size_t ncode,
1219
- const uint8_t* codes,
1220
- const idx_t* ids,
1221
- ResultHandler& handler) const override {
1222
- WrappedSearchResult<C, use_sel> res(
1223
- this->key,
1224
- this->store_pairs ? nullptr : ids,
1225
- this->sel,
1226
- handler);
1227
-
1228
- if (this->polysemous_ht > 0) {
1229
- assert(precompute_mode == 2);
1230
- this->scan_list_polysemous(ncode, codes, res);
1231
- } else if (precompute_mode == 2) {
1232
- this->scan_list_with_table(ncode, codes, res);
1233
- } else if (precompute_mode == 1) {
1234
- this->scan_list_with_pointer(ncode, codes, res);
1235
- } else if (precompute_mode == 0) {
1236
- this->scan_on_the_fly_dist(ncode, codes, res);
1237
- } else {
1238
- FAISS_THROW_MSG("bad precomp mode");
1239
- }
1240
- return res.nup;
1241
- }
1242
- };
1243
-
1244
- } // anonymous namespace
1245
-
1246
508
  InvertedListScanner* IndexIVFPQ::get_InvertedListScanner(
1247
509
  bool store_pairs,
1248
510
  const IDSelector* sel,
1249
511
  const IVFSearchParameters*) const {
1250
512
  return with_simd_level([&]<SIMDLevel SL>() -> InvertedListScanner* {
1251
- auto make =
1252
- [&]<class PQCodeDist, bool use_sel>() -> InvertedListScanner* {
1253
- if (metric_type == METRIC_INNER_PRODUCT) {
1254
- return new IVFPQScanner<
1255
- METRIC_INNER_PRODUCT,
1256
- CMin<float, idx_t>,
1257
- PQCodeDist,
1258
- use_sel>(*this, store_pairs, 2, sel);
1259
- } else if (metric_type == METRIC_L2) {
1260
- return new IVFPQScanner<
1261
- METRIC_L2,
1262
- CMax<float, idx_t>,
1263
- PQCodeDist,
1264
- use_sel>(*this, store_pairs, 2, sel);
1265
- } else {
1266
- FAISS_THROW_MSG("unsupported metric type");
1267
- }
1268
- };
1269
-
1270
- auto with_decoder = [&]<bool use_sel>() -> InvertedListScanner* {
1271
- if (pq.nbits == 8) {
1272
- return make.template
1273
- operator()<PQCodeDistance<PQDecoder8, SL>, use_sel>();
1274
- } else if (pq.nbits == 16) {
1275
- return make.template
1276
- operator()<PQCodeDistance<PQDecoder16, SL>, use_sel>();
1277
- } else {
1278
- return make.template
1279
- operator()<PQCodeDistance<PQDecoderGeneric, SL>, use_sel>();
1280
- }
1281
- };
1282
-
1283
- if (sel) {
1284
- return with_decoder.template operator()<true>();
1285
- } else {
1286
- return with_decoder.template operator()<false>();
1287
- }
513
+ return pq_code_distance::make_IVFPQInvertedListScanner<SL>(
514
+ *this, store_pairs, sel);
1288
515
  });
1289
516
  }
1290
517
 
@@ -1320,25 +547,26 @@ size_t IndexIVFPQ::find_duplicates(idx_t* dup_ids, size_t* lims) const {
1320
547
  for (size_t list_no = 0; list_no < nlist; list_no++) {
1321
548
  size_t n = invlists->list_size(list_no);
1322
549
  std::vector<int> ord(n);
1323
- for (int i = 0; i < n; i++)
1324
- ord[i] = i;
550
+ for (size_t i = 0; i < n; i++) {
551
+ ord[i] = static_cast<int>(i);
552
+ }
1325
553
  InvertedLists::ScopedCodes codes(invlists, list_no);
1326
554
  CodeCmp cs = {codes.get(), code_size};
1327
555
  std::sort(ord.begin(), ord.end(), cs);
1328
556
 
1329
557
  InvertedLists::ScopedIds list_ids(invlists, list_no);
1330
558
  int prev = -1; // all elements from prev to i-1 are equal
1331
- for (int i = 0; i < n; i++) {
559
+ for (size_t i = 0; i < n; i++) {
1332
560
  if (prev >= 0 && cs.cmp(ord[prev], ord[i]) == 0) {
1333
561
  // same as previous => remember
1334
- if (prev + 1 == i) { // start new group
562
+ if (static_cast<size_t>(prev + 1) == i) { // start new group
1335
563
  ngroup++;
1336
564
  lims[ngroup] = lims[ngroup - 1];
1337
565
  dup_ids[lims[ngroup]++] = list_ids[ord[prev]];
1338
566
  }
1339
567
  dup_ids[lims[ngroup]++] = list_ids[ord[i]];
1340
568
  } else { // not same as previous.
1341
- prev = i;
569
+ prev = static_cast<int>(i);
1342
570
  }
1343
571
  }
1344
572
  }