faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -17,11 +17,10 @@
|
|
|
17
17
|
#include <faiss/IndexIVFPQ.h>
|
|
18
18
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
19
19
|
#include <faiss/impl/FaissAssert.h>
|
|
20
|
-
#include <faiss/impl/FastScanDistancePostProcessing.h>
|
|
21
|
-
#include <faiss/impl/LookupTableScaler.h>
|
|
22
20
|
#include <faiss/impl/RaBitQUtils.h>
|
|
23
|
-
#include <faiss/impl/
|
|
24
|
-
#include <faiss/impl/
|
|
21
|
+
#include <faiss/impl/fast_scan/FastScanDistancePostProcessing.h>
|
|
22
|
+
#include <faiss/impl/fast_scan/fast_scan.h>
|
|
23
|
+
#include <faiss/impl/fast_scan/simd_result_handlers.h>
|
|
25
24
|
#include <faiss/invlists/BlockInvertedLists.h>
|
|
26
25
|
#include <faiss/utils/hamming.h>
|
|
27
26
|
#include <faiss/utils/quantize_lut.h>
|
|
@@ -29,20 +28,24 @@
|
|
|
29
28
|
|
|
30
29
|
namespace faiss {
|
|
31
30
|
|
|
32
|
-
using namespace simd_result_handlers;
|
|
33
|
-
|
|
34
31
|
inline size_t roundup(size_t a, size_t b) {
|
|
35
32
|
return (a + b - 1) / b * b;
|
|
36
33
|
}
|
|
37
34
|
|
|
38
35
|
IndexIVFFastScan::IndexIVFFastScan(
|
|
39
|
-
Index*
|
|
40
|
-
size_t
|
|
41
|
-
size_t
|
|
42
|
-
size_t
|
|
36
|
+
Index* quantizer_in,
|
|
37
|
+
size_t d_in,
|
|
38
|
+
size_t nlist_in,
|
|
39
|
+
size_t code_size_in,
|
|
43
40
|
MetricType metric,
|
|
44
|
-
bool
|
|
45
|
-
: IndexIVF(
|
|
41
|
+
bool own_invlists_in)
|
|
42
|
+
: IndexIVF(
|
|
43
|
+
quantizer_in,
|
|
44
|
+
d_in,
|
|
45
|
+
nlist_in,
|
|
46
|
+
code_size_in,
|
|
47
|
+
metric,
|
|
48
|
+
own_invlists_in) {
|
|
46
49
|
// unlike other indexes, we prefer no residuals for performance reasons.
|
|
47
50
|
by_residual = false;
|
|
48
51
|
FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
|
|
@@ -56,35 +59,40 @@ IndexIVFFastScan::IndexIVFFastScan() {
|
|
|
56
59
|
}
|
|
57
60
|
|
|
58
61
|
void IndexIVFFastScan::init_fastscan(
|
|
59
|
-
Quantizer*
|
|
60
|
-
size_t
|
|
62
|
+
Quantizer* fine_quantizer_in,
|
|
63
|
+
size_t M_in,
|
|
61
64
|
size_t nbits_init,
|
|
62
|
-
size_t
|
|
65
|
+
size_t nlist_in,
|
|
63
66
|
MetricType /* metric */,
|
|
64
67
|
int bbs_2,
|
|
65
|
-
bool
|
|
68
|
+
bool own_invlists_in) {
|
|
66
69
|
FAISS_THROW_IF_NOT(bbs_2 % 32 == 0);
|
|
67
70
|
FAISS_THROW_IF_NOT(nbits_init == 4);
|
|
68
|
-
FAISS_THROW_IF_NOT(
|
|
71
|
+
FAISS_THROW_IF_NOT(fine_quantizer_in->d == static_cast<size_t>(d));
|
|
69
72
|
|
|
70
|
-
this->fine_quantizer =
|
|
71
|
-
this->M =
|
|
73
|
+
this->fine_quantizer = fine_quantizer_in;
|
|
74
|
+
this->M = M_in;
|
|
72
75
|
this->nbits = nbits_init;
|
|
73
76
|
this->bbs = bbs_2;
|
|
74
77
|
ksub = (1 << nbits_init);
|
|
75
|
-
M2 = roundup(
|
|
78
|
+
M2 = roundup(M_in, 2);
|
|
76
79
|
code_size = M2 / 2;
|
|
77
|
-
FAISS_THROW_IF_NOT(code_size ==
|
|
80
|
+
FAISS_THROW_IF_NOT(code_size == fine_quantizer_in->code_size);
|
|
78
81
|
|
|
79
82
|
is_trained = false;
|
|
80
|
-
if (
|
|
81
|
-
replace_invlists(
|
|
83
|
+
if (own_invlists_in) {
|
|
84
|
+
replace_invlists(
|
|
85
|
+
new BlockInvertedLists(nlist_in, get_CodePacker()), true);
|
|
82
86
|
}
|
|
83
87
|
}
|
|
84
88
|
|
|
85
89
|
void IndexIVFFastScan::init_code_packer() {
|
|
86
90
|
auto bil = dynamic_cast<BlockInvertedLists*>(invlists);
|
|
87
|
-
|
|
91
|
+
if (!bil) {
|
|
92
|
+
// invlists is not block-packed (e.g., when own_invlists=false).
|
|
93
|
+
// Nothing to do — the caller manages inverted lists externally.
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
88
96
|
delete bil->packer; // in case there was one before
|
|
89
97
|
bil->packer = get_CodePacker();
|
|
90
98
|
}
|
|
@@ -239,22 +247,22 @@ void estimators_from_tables_generic(
|
|
|
239
247
|
int64_t* heap_ids,
|
|
240
248
|
const FastScanDistancePostProcessing& context) {
|
|
241
249
|
using accu_t = typename C::T;
|
|
242
|
-
size_t nscale = context.
|
|
250
|
+
size_t nscale = context.pq2x4_scale ? 2 : 0;
|
|
243
251
|
for (size_t j = 0; j < ncodes; ++j) {
|
|
244
252
|
BitstringReader bsr(codes + j * index.code_size, index.code_size);
|
|
245
253
|
accu_t dis = bias;
|
|
246
254
|
const dis_t* __restrict dt = dis_table;
|
|
247
255
|
|
|
248
256
|
for (size_t m = 0; m < index.M - nscale; m++) {
|
|
249
|
-
uint64_t c = bsr.read(index.nbits);
|
|
257
|
+
uint64_t c = bsr.read(static_cast<int>(index.nbits));
|
|
250
258
|
dis += dt[c];
|
|
251
259
|
dt += index.ksub;
|
|
252
260
|
}
|
|
253
261
|
|
|
254
|
-
if (
|
|
262
|
+
if (nscale) {
|
|
255
263
|
for (size_t m = 0; m < nscale; m++) {
|
|
256
|
-
uint64_t c = bsr.read(index.nbits);
|
|
257
|
-
dis +=
|
|
264
|
+
uint64_t c = bsr.read(static_cast<int>(index.nbits));
|
|
265
|
+
dis += dt[c] * context.pq2x4_scale;
|
|
258
266
|
dt += index.ksub;
|
|
259
267
|
}
|
|
260
268
|
}
|
|
@@ -285,33 +293,33 @@ void IndexIVFFastScan::compute_LUT_uint8(
|
|
|
285
293
|
AlignedTable<float> biases_float;
|
|
286
294
|
|
|
287
295
|
compute_LUT(n, x, cq, dis_tables_float, biases_float, context);
|
|
288
|
-
size_t
|
|
296
|
+
size_t cur_nprobe = cq.nprobe;
|
|
289
297
|
bool lut_is_3d = lookup_table_is_3d();
|
|
290
298
|
size_t dim123 = ksub * M;
|
|
291
299
|
size_t dim123_2 = ksub * M2;
|
|
292
300
|
if (lut_is_3d) {
|
|
293
|
-
dim123 *=
|
|
294
|
-
dim123_2 *=
|
|
301
|
+
dim123 *= cur_nprobe;
|
|
302
|
+
dim123_2 *= cur_nprobe;
|
|
295
303
|
}
|
|
296
304
|
dis_tables.resize(n * dim123_2);
|
|
297
305
|
if (biases_float.get()) {
|
|
298
|
-
biases.resize(n *
|
|
306
|
+
biases.resize(n * cur_nprobe);
|
|
299
307
|
}
|
|
300
308
|
|
|
301
309
|
// OMP for MSVC requires i to have signed integral type
|
|
302
310
|
#pragma omp parallel for if (n > 100)
|
|
303
|
-
for (int64_t i = 0; i < n; i++) {
|
|
311
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
304
312
|
const float* t_in = dis_tables_float.get() + i * dim123;
|
|
305
313
|
const float* b_in = nullptr;
|
|
306
314
|
uint8_t* t_out = dis_tables.get() + i * dim123_2;
|
|
307
315
|
uint16_t* b_out = nullptr;
|
|
308
316
|
if (biases_float.get()) {
|
|
309
|
-
b_in = biases_float.get() + i *
|
|
310
|
-
b_out = biases.get() + i *
|
|
317
|
+
b_in = biases_float.get() + i * cur_nprobe;
|
|
318
|
+
b_out = biases.get() + i * cur_nprobe;
|
|
311
319
|
}
|
|
312
320
|
|
|
313
321
|
quantize_LUT_and_bias(
|
|
314
|
-
|
|
322
|
+
cur_nprobe,
|
|
315
323
|
M,
|
|
316
324
|
ksub,
|
|
317
325
|
lut_is_3d,
|
|
@@ -358,10 +366,14 @@ void IndexIVFFastScan::search_preassigned(
|
|
|
358
366
|
bool store_pairs,
|
|
359
367
|
const IVFSearchParameters* params,
|
|
360
368
|
IndexIVFStats* stats) const {
|
|
361
|
-
size_t
|
|
369
|
+
size_t cur_nprobe = this->nprobe;
|
|
362
370
|
if (params) {
|
|
363
|
-
|
|
364
|
-
|
|
371
|
+
// Range-search-only option.
|
|
372
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
373
|
+
params->max_empty_result_buckets == 0,
|
|
374
|
+
"max_empty_result_buckets is a range-search knob and is "
|
|
375
|
+
"not honored by fastscan knn search");
|
|
376
|
+
cur_nprobe = params->nprobe;
|
|
365
377
|
}
|
|
366
378
|
|
|
367
379
|
FAISS_THROW_IF_NOT_MSG(
|
|
@@ -370,7 +382,7 @@ void IndexIVFFastScan::search_preassigned(
|
|
|
370
382
|
FAISS_THROW_IF_NOT(k > 0);
|
|
371
383
|
FastScanDistancePostProcessing empty_context{};
|
|
372
384
|
|
|
373
|
-
const CoarseQuantized cq = {
|
|
385
|
+
const CoarseQuantized cq = {cur_nprobe, centroid_dis, assign};
|
|
374
386
|
search_dispatch_implem(
|
|
375
387
|
n, x, k, distances, labels, cq, empty_context, params);
|
|
376
388
|
}
|
|
@@ -381,45 +393,35 @@ void IndexIVFFastScan::range_search(
|
|
|
381
393
|
float radius,
|
|
382
394
|
RangeSearchResult* result,
|
|
383
395
|
const SearchParameters* params_in) const {
|
|
384
|
-
size_t
|
|
396
|
+
size_t cur_nprobe = this->nprobe;
|
|
385
397
|
const IVFSearchParameters* params = nullptr;
|
|
386
398
|
if (params_in) {
|
|
387
399
|
params = dynamic_cast<const IVFSearchParameters*>(params_in);
|
|
388
400
|
FAISS_THROW_IF_NOT_MSG(
|
|
389
401
|
params, "IndexIVFFastScan params have incorrect type");
|
|
390
|
-
|
|
402
|
+
// k-NN-only options.
|
|
403
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
404
|
+
params->max_lists_num == 0,
|
|
405
|
+
"max_lists_num is a knn knob and is not honored by "
|
|
406
|
+
"fastscan range search");
|
|
407
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
408
|
+
!params->ensure_topk_full,
|
|
409
|
+
"ensure_topk_full is a knn knob and is not honored by "
|
|
410
|
+
"fastscan range search");
|
|
411
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
412
|
+
params->max_codes == 0,
|
|
413
|
+
"max_codes is not honored by fastscan range search");
|
|
414
|
+
cur_nprobe = params->nprobe;
|
|
391
415
|
}
|
|
392
416
|
FastScanDistancePostProcessing empty_context{};
|
|
393
417
|
|
|
394
|
-
const CoarseQuantized cq = {
|
|
418
|
+
const CoarseQuantized cq = {cur_nprobe, nullptr, nullptr};
|
|
395
419
|
range_search_dispatch_implem(
|
|
396
420
|
n, x, radius, *result, cq, empty_context, params);
|
|
397
421
|
}
|
|
398
422
|
|
|
399
423
|
namespace {
|
|
400
424
|
|
|
401
|
-
template <class C>
|
|
402
|
-
ResultHandlerCompare<C, true>* make_knn_handler_fixC(
|
|
403
|
-
int impl,
|
|
404
|
-
idx_t n,
|
|
405
|
-
idx_t k,
|
|
406
|
-
float* distances,
|
|
407
|
-
idx_t* labels,
|
|
408
|
-
const IDSelector* sel,
|
|
409
|
-
const float* normalizers) {
|
|
410
|
-
using HeapHC = HeapHandler<C, true>;
|
|
411
|
-
using ReservoirHC = ReservoirHandler<C, true>;
|
|
412
|
-
using SingleResultHC = SingleResultHandler<C, true>;
|
|
413
|
-
|
|
414
|
-
if (k == 1) {
|
|
415
|
-
return new SingleResultHC(n, 0, distances, labels, sel);
|
|
416
|
-
} else if (impl % 2 == 0) {
|
|
417
|
-
return new HeapHC(n, 0, k, distances, labels, sel, normalizers);
|
|
418
|
-
} else /* if (impl % 2 == 1) */ {
|
|
419
|
-
return new ReservoirHC(n, 0, k, 2 * k, distances, labels, sel);
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
|
|
423
425
|
using CoarseQuantized = IndexIVFFastScan::CoarseQuantized;
|
|
424
426
|
|
|
425
427
|
struct CoarseQuantizedWithBuffer : CoarseQuantized {
|
|
@@ -454,8 +456,8 @@ struct CoarseQuantizedWithBuffer : CoarseQuantized {
|
|
|
454
456
|
|
|
455
457
|
struct CoarseQuantizedSlice : CoarseQuantizedWithBuffer {
|
|
456
458
|
const size_t i0, i1;
|
|
457
|
-
CoarseQuantizedSlice(const CoarseQuantized& cq, size_t
|
|
458
|
-
: CoarseQuantizedWithBuffer(cq), i0(
|
|
459
|
+
CoarseQuantizedSlice(const CoarseQuantized& cq, size_t i0_in, size_t i1_in)
|
|
460
|
+
: CoarseQuantizedWithBuffer(cq), i0(i0_in), i1(i1_in) {
|
|
459
461
|
if (done()) {
|
|
460
462
|
dis += nprobe * i0;
|
|
461
463
|
ids += nprobe * i0;
|
|
@@ -473,20 +475,20 @@ struct CoarseQuantizedSlice : CoarseQuantizedWithBuffer {
|
|
|
473
475
|
int compute_search_nslice(
|
|
474
476
|
const IndexIVFFastScan* index,
|
|
475
477
|
size_t n,
|
|
476
|
-
size_t
|
|
478
|
+
size_t cur_nprobe) {
|
|
477
479
|
int nslice;
|
|
478
|
-
if (n <= omp_get_max_threads()) {
|
|
479
|
-
nslice = n;
|
|
480
|
+
if (n <= static_cast<size_t>(omp_get_max_threads())) {
|
|
481
|
+
nslice = static_cast<int>(n);
|
|
480
482
|
} else if (index->lookup_table_is_3d()) {
|
|
481
483
|
// make sure we don't make too big LUT tables
|
|
482
|
-
size_t lut_size_per_query = index->M * index->ksub *
|
|
484
|
+
size_t lut_size_per_query = index->M * index->ksub * cur_nprobe *
|
|
483
485
|
(sizeof(float) + sizeof(uint8_t));
|
|
484
486
|
|
|
485
487
|
size_t max_lut_size = precomputed_table_max_bytes;
|
|
486
488
|
// how many queries we can handle within mem budget
|
|
487
489
|
size_t nq_ok = std::max(max_lut_size / lut_size_per_query, size_t(1));
|
|
488
|
-
nslice = roundup(
|
|
489
|
-
std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads());
|
|
490
|
+
nslice = static_cast<int>(roundup(
|
|
491
|
+
std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads()));
|
|
490
492
|
} else {
|
|
491
493
|
// LUTs unlikely to be a limiting factor
|
|
492
494
|
nslice = omp_get_max_threads();
|
|
@@ -496,23 +498,25 @@ int compute_search_nslice(
|
|
|
496
498
|
|
|
497
499
|
} // namespace
|
|
498
500
|
|
|
499
|
-
|
|
501
|
+
std::unique_ptr<FastScanCodeScanner> IndexIVFFastScan::make_knn_scanner(
|
|
500
502
|
bool is_max,
|
|
501
|
-
int impl,
|
|
502
503
|
idx_t n,
|
|
503
504
|
idx_t k,
|
|
504
505
|
float* distances,
|
|
505
506
|
idx_t* labels,
|
|
506
507
|
const IDSelector* sel,
|
|
507
|
-
|
|
508
|
-
const
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
508
|
+
int impl,
|
|
509
|
+
const FastScanDistancePostProcessing&) const {
|
|
510
|
+
return make_fast_scan_knn_scanner(
|
|
511
|
+
is_max,
|
|
512
|
+
impl,
|
|
513
|
+
n,
|
|
514
|
+
0,
|
|
515
|
+
k,
|
|
516
|
+
distances,
|
|
517
|
+
labels,
|
|
518
|
+
sel,
|
|
519
|
+
/*with_id_map=*/true);
|
|
516
520
|
}
|
|
517
521
|
|
|
518
522
|
void IndexIVFFastScan::search_dispatch_implem(
|
|
@@ -524,13 +528,12 @@ void IndexIVFFastScan::search_dispatch_implem(
|
|
|
524
528
|
const CoarseQuantized& cq_in,
|
|
525
529
|
const FastScanDistancePostProcessing& context,
|
|
526
530
|
const IVFSearchParameters* params) const {
|
|
527
|
-
const idx_t
|
|
531
|
+
const idx_t cur_nprobe = params ? params->nprobe : this->nprobe;
|
|
528
532
|
const IDSelector* sel = (params) ? params->sel : nullptr;
|
|
529
533
|
const SearchParameters* quantizer_params =
|
|
530
534
|
params ? params->quantizer_params : nullptr;
|
|
531
535
|
|
|
532
536
|
bool is_max = !is_similarity_metric(metric_type);
|
|
533
|
-
using RH = SIMDResultHandlerToFloat;
|
|
534
537
|
|
|
535
538
|
if (n == 0) {
|
|
536
539
|
return;
|
|
@@ -539,8 +542,14 @@ void IndexIVFFastScan::search_dispatch_implem(
|
|
|
539
542
|
// actual implementation used
|
|
540
543
|
int impl = implem;
|
|
541
544
|
|
|
545
|
+
// Early-stop k-NN options require the per-query implementations.
|
|
546
|
+
const bool any_early_term_knob = params &&
|
|
547
|
+
(params->max_codes != 0 || params->max_lists_num != 0 ||
|
|
548
|
+
params->ensure_topk_full);
|
|
549
|
+
|
|
542
550
|
if (impl == 0) {
|
|
543
|
-
|
|
551
|
+
// Auto-select the per-query path when early-stop budgets are used.
|
|
552
|
+
if (bbs == 32 && !any_early_term_knob) {
|
|
544
553
|
impl = 12;
|
|
545
554
|
} else {
|
|
546
555
|
impl = 10;
|
|
@@ -557,15 +566,24 @@ void IndexIVFFastScan::search_dispatch_implem(
|
|
|
557
566
|
impl -= 100;
|
|
558
567
|
}
|
|
559
568
|
|
|
569
|
+
if (any_early_term_knob) {
|
|
570
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
571
|
+
impl == 10 || impl == 11,
|
|
572
|
+
"max_codes / max_lists_num / ensure_topk_full are only "
|
|
573
|
+
"supported by IndexIVFFastScan implem 10/11; set "
|
|
574
|
+
"index.implem = 10 (or 11 for k>20) explicitly, or leave it "
|
|
575
|
+
"at the default 0");
|
|
576
|
+
}
|
|
577
|
+
|
|
560
578
|
CoarseQuantizedWithBuffer cq(cq_in);
|
|
561
|
-
cq.nprobe =
|
|
579
|
+
cq.nprobe = cur_nprobe;
|
|
562
580
|
|
|
563
581
|
if (!cq.done() && !multiple_threads) {
|
|
564
582
|
// we do the coarse quantization here execpt when search is
|
|
565
583
|
// sliced over threads (then it is more efficient to have each thread do
|
|
566
584
|
// its own coarse quantization)
|
|
567
585
|
cq.quantize(quantizer, n, x, quantizer_params);
|
|
568
|
-
invlists->prefetch_lists(cq.ids, n * cq.nprobe);
|
|
586
|
+
invlists->prefetch_lists(cq.ids, static_cast<int>(n * cq.nprobe));
|
|
569
587
|
}
|
|
570
588
|
|
|
571
589
|
if (impl == 1) {
|
|
@@ -588,43 +606,38 @@ void IndexIVFFastScan::search_dispatch_implem(
|
|
|
588
606
|
size_t ndis = 0, nlist_visited = 0;
|
|
589
607
|
|
|
590
608
|
if (!multiple_threads) {
|
|
591
|
-
|
|
592
|
-
if (impl == 12 || impl == 13) {
|
|
593
|
-
std::unique_ptr<RH> handler(
|
|
594
|
-
static_cast<RH*>(this->make_knn_handler(
|
|
595
|
-
is_max,
|
|
596
|
-
impl,
|
|
597
|
-
n,
|
|
598
|
-
k,
|
|
599
|
-
distances,
|
|
600
|
-
labels,
|
|
601
|
-
sel,
|
|
602
|
-
context))
|
|
603
|
-
);
|
|
604
|
-
search_implem_12(
|
|
605
|
-
n, x, *handler.get(),
|
|
606
|
-
cq, &ndis, &nlist_visited, context, params);
|
|
607
|
-
} else if (impl == 14 || impl == 15) {
|
|
609
|
+
if (impl == 14 || impl == 15) {
|
|
608
610
|
search_implem_14(
|
|
609
|
-
n, x, k, distances, labels,
|
|
610
|
-
cq, impl, context, params);
|
|
611
|
+
n, x, k, distances, labels, cq, impl, context, params);
|
|
611
612
|
} else {
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
613
|
+
auto scanner = make_knn_scanner(
|
|
614
|
+
is_max, n, k, distances, labels, sel, impl, context);
|
|
615
|
+
auto* handler = scanner->handler();
|
|
616
|
+
if (impl == 12 || impl == 13) {
|
|
617
|
+
search_implem_12(
|
|
618
|
+
n,
|
|
619
|
+
x,
|
|
620
|
+
*handler,
|
|
621
|
+
cq,
|
|
622
|
+
&ndis,
|
|
623
|
+
&nlist_visited,
|
|
624
|
+
context,
|
|
625
|
+
params,
|
|
626
|
+
*scanner);
|
|
627
|
+
} else {
|
|
628
|
+
search_implem_10(
|
|
629
|
+
n,
|
|
630
|
+
x,
|
|
631
|
+
k,
|
|
632
|
+
*handler,
|
|
633
|
+
cq,
|
|
634
|
+
&ndis,
|
|
635
|
+
&nlist_visited,
|
|
636
|
+
context,
|
|
637
|
+
params,
|
|
638
|
+
*scanner);
|
|
639
|
+
}
|
|
626
640
|
}
|
|
627
|
-
// clang-format on
|
|
628
641
|
} else {
|
|
629
642
|
// explicitly slice over threads
|
|
630
643
|
int nslice = compute_search_nslice(this, n, cq.nprobe);
|
|
@@ -649,30 +662,43 @@ void IndexIVFFastScan::search_dispatch_implem(
|
|
|
649
662
|
// pointer
|
|
650
663
|
FastScanDistancePostProcessing thread_context = context;
|
|
651
664
|
if (thread_context.query_factors != nullptr) {
|
|
652
|
-
thread_context.query_factors += i0 *
|
|
665
|
+
thread_context.query_factors += i0 * cur_nprobe;
|
|
653
666
|
}
|
|
654
667
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
// clang-format off
|
|
668
|
+
auto scanner = make_knn_scanner(
|
|
669
|
+
is_max,
|
|
670
|
+
i1 - i0,
|
|
671
|
+
k,
|
|
672
|
+
dis_i,
|
|
673
|
+
lab_i,
|
|
674
|
+
sel,
|
|
675
|
+
impl,
|
|
676
|
+
thread_context);
|
|
677
|
+
auto* handler = scanner->handler();
|
|
666
678
|
if (impl == 12 || impl == 13) {
|
|
667
679
|
search_implem_12(
|
|
668
|
-
i1 - i0,
|
|
669
|
-
|
|
680
|
+
i1 - i0,
|
|
681
|
+
x + i0 * d,
|
|
682
|
+
*handler,
|
|
683
|
+
cq_i,
|
|
684
|
+
&ndis,
|
|
685
|
+
&nlist_visited,
|
|
686
|
+
thread_context,
|
|
687
|
+
params,
|
|
688
|
+
*scanner);
|
|
670
689
|
} else {
|
|
671
690
|
search_implem_10(
|
|
672
|
-
i1 - i0,
|
|
673
|
-
|
|
691
|
+
i1 - i0,
|
|
692
|
+
x + i0 * d,
|
|
693
|
+
k,
|
|
694
|
+
*handler,
|
|
695
|
+
cq_i,
|
|
696
|
+
&ndis,
|
|
697
|
+
&nlist_visited,
|
|
698
|
+
thread_context,
|
|
699
|
+
params,
|
|
700
|
+
*scanner);
|
|
674
701
|
}
|
|
675
|
-
// clang-format on
|
|
676
702
|
}
|
|
677
703
|
}
|
|
678
704
|
}
|
|
@@ -702,11 +728,23 @@ void IndexIVFFastScan::range_search_dispatch_implem(
|
|
|
702
728
|
if (n == 0) {
|
|
703
729
|
return;
|
|
704
730
|
}
|
|
731
|
+
// FastScan range early-stop budget: enabled only for ordered per-query
|
|
732
|
+
// scanning below.
|
|
733
|
+
const bool use_empty_result_early_exit =
|
|
734
|
+
params && params->max_empty_result_buckets != 0;
|
|
735
|
+
const int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT;
|
|
736
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
737
|
+
!use_empty_result_early_exit || pmode == 0,
|
|
738
|
+
"max_empty_result_buckets supported only for parallel_mode = 0");
|
|
739
|
+
|
|
705
740
|
// actual implementation used
|
|
706
741
|
int impl = implem;
|
|
707
742
|
|
|
708
743
|
if (impl == 0) {
|
|
709
|
-
if (
|
|
744
|
+
if (use_empty_result_early_exit) {
|
|
745
|
+
// Empty-bucket early stop needs per-query probe order.
|
|
746
|
+
impl = 10;
|
|
747
|
+
} else if (bbs == 32) {
|
|
710
748
|
impl = 12;
|
|
711
749
|
} else {
|
|
712
750
|
impl = 10;
|
|
@@ -722,28 +760,44 @@ void IndexIVFFastScan::range_search_dispatch_implem(
|
|
|
722
760
|
impl -= 100;
|
|
723
761
|
}
|
|
724
762
|
|
|
763
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
764
|
+
!use_empty_result_early_exit || impl == 10,
|
|
765
|
+
"max_empty_result_buckets is only supported by "
|
|
766
|
+
"IndexIVFFastScan range-search implem 10");
|
|
767
|
+
|
|
725
768
|
if (!multiple_threads && !cq.done()) {
|
|
726
769
|
cq.quantize(quantizer, n, x, quantizer_params);
|
|
727
|
-
invlists->prefetch_lists(cq.ids, n * cq.nprobe);
|
|
770
|
+
invlists->prefetch_lists(cq.ids, static_cast<int>(n * cq.nprobe));
|
|
728
771
|
}
|
|
729
772
|
|
|
730
773
|
size_t ndis = 0, nlist_visited = 0;
|
|
731
774
|
|
|
732
775
|
if (!multiple_threads) { // single thread
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
handler.reset(new RangeHandler<CMax<uint16_t, int64_t>, true>(
|
|
736
|
-
rres, radius, 0, sel));
|
|
737
|
-
} else {
|
|
738
|
-
handler.reset(new RangeHandler<CMin<uint16_t, int64_t>, true>(
|
|
739
|
-
rres, radius, 0, sel));
|
|
740
|
-
}
|
|
776
|
+
auto scanner = make_range_scanner(is_max, rres, radius, 0, sel);
|
|
777
|
+
auto* handler = scanner->handler();
|
|
741
778
|
if (impl == 12) {
|
|
742
779
|
search_implem_12(
|
|
743
|
-
n,
|
|
780
|
+
n,
|
|
781
|
+
x,
|
|
782
|
+
*handler,
|
|
783
|
+
cq,
|
|
784
|
+
&ndis,
|
|
785
|
+
&nlist_visited,
|
|
786
|
+
context,
|
|
787
|
+
nullptr,
|
|
788
|
+
*scanner);
|
|
744
789
|
} else if (impl == 10) {
|
|
745
790
|
search_implem_10(
|
|
746
|
-
n,
|
|
791
|
+
n,
|
|
792
|
+
x,
|
|
793
|
+
/*k=*/0, // range search has no k
|
|
794
|
+
*handler,
|
|
795
|
+
cq,
|
|
796
|
+
&ndis,
|
|
797
|
+
&nlist_visited,
|
|
798
|
+
context,
|
|
799
|
+
params,
|
|
800
|
+
*scanner);
|
|
747
801
|
} else {
|
|
748
802
|
FAISS_THROW_FMT("Range search implem %d not implemented", impl);
|
|
749
803
|
}
|
|
@@ -762,35 +816,33 @@ void IndexIVFFastScan::range_search_dispatch_implem(
|
|
|
762
816
|
if (!cq_i.done()) {
|
|
763
817
|
cq_i.quantize_slice(quantizer, x, quantizer_params);
|
|
764
818
|
}
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
CMax<uint16_t, int64_t>,
|
|
769
|
-
true>(pres, radius, 0, i0, i1, sel));
|
|
770
|
-
} else {
|
|
771
|
-
handler.reset(new PartialRangeHandler<
|
|
772
|
-
CMin<uint16_t, int64_t>,
|
|
773
|
-
true>(pres, radius, 0, i0, i1, sel));
|
|
774
|
-
}
|
|
819
|
+
auto scanner = make_partial_range_scanner(
|
|
820
|
+
is_max, pres, radius, 0, i0, i1, sel);
|
|
821
|
+
auto* handler = scanner->handler();
|
|
775
822
|
|
|
776
823
|
if (impl == 12 || impl == 13) {
|
|
777
824
|
search_implem_12(
|
|
778
825
|
i1 - i0,
|
|
779
826
|
x + i0 * d,
|
|
780
|
-
*handler
|
|
827
|
+
*handler,
|
|
781
828
|
cq_i,
|
|
782
829
|
&ndis,
|
|
783
830
|
&nlist_visited,
|
|
784
|
-
context
|
|
831
|
+
context,
|
|
832
|
+
nullptr,
|
|
833
|
+
*scanner);
|
|
785
834
|
} else {
|
|
786
835
|
search_implem_10(
|
|
787
836
|
i1 - i0,
|
|
788
837
|
x + i0 * d,
|
|
789
|
-
|
|
838
|
+
/*k=*/0,
|
|
839
|
+
*handler,
|
|
790
840
|
cq_i,
|
|
791
841
|
&ndis,
|
|
792
842
|
&nlist_visited,
|
|
793
|
-
context
|
|
843
|
+
context,
|
|
844
|
+
params,
|
|
845
|
+
*scanner);
|
|
794
846
|
}
|
|
795
847
|
}
|
|
796
848
|
pres.finalize();
|
|
@@ -811,7 +863,7 @@ void IndexIVFFastScan::search_implem_1(
|
|
|
811
863
|
idx_t* labels,
|
|
812
864
|
const CoarseQuantized& cq,
|
|
813
865
|
const FastScanDistancePostProcessing& context,
|
|
814
|
-
const IVFSearchParameters* params) const {
|
|
866
|
+
const IVFSearchParameters* /* params */) const {
|
|
815
867
|
FAISS_THROW_IF_NOT(orig_invlists);
|
|
816
868
|
|
|
817
869
|
size_t dim12 = ksub * M;
|
|
@@ -824,7 +876,7 @@ void IndexIVFFastScan::search_implem_1(
|
|
|
824
876
|
bool single_LUT = !lookup_table_is_3d();
|
|
825
877
|
|
|
826
878
|
size_t ndis = 0, nlist_visited = 0;
|
|
827
|
-
size_t
|
|
879
|
+
size_t cur_nprobe = cq.nprobe;
|
|
828
880
|
#pragma omp parallel for reduction(+ : ndis, nlist_visited)
|
|
829
881
|
for (idx_t i = 0; i < n; i++) {
|
|
830
882
|
int64_t* heap_ids = labels + i * k;
|
|
@@ -835,11 +887,11 @@ void IndexIVFFastScan::search_implem_1(
|
|
|
835
887
|
if (single_LUT) {
|
|
836
888
|
LUT = dis_tables.get() + i * dim12;
|
|
837
889
|
}
|
|
838
|
-
for (
|
|
890
|
+
for (size_t j = 0; j < cur_nprobe; j++) {
|
|
839
891
|
if (!single_LUT) {
|
|
840
|
-
LUT = dis_tables.get() + (i *
|
|
892
|
+
LUT = dis_tables.get() + (i * cur_nprobe + j) * dim12;
|
|
841
893
|
}
|
|
842
|
-
idx_t list_no = cq.ids[i *
|
|
894
|
+
idx_t list_no = cq.ids[i * cur_nprobe + j];
|
|
843
895
|
if (list_no < 0) {
|
|
844
896
|
continue;
|
|
845
897
|
}
|
|
@@ -850,7 +902,7 @@ void IndexIVFFastScan::search_implem_1(
|
|
|
850
902
|
InvertedLists::ScopedCodes codes(orig_invlists, list_no);
|
|
851
903
|
InvertedLists::ScopedIds ids(orig_invlists, list_no);
|
|
852
904
|
|
|
853
|
-
float bias = biases.get() ? biases[i *
|
|
905
|
+
float bias = biases.get() ? biases[i * cur_nprobe + j] : 0;
|
|
854
906
|
|
|
855
907
|
estimators_from_tables_generic<C>(
|
|
856
908
|
*this,
|
|
@@ -882,7 +934,7 @@ void IndexIVFFastScan::search_implem_2(
|
|
|
882
934
|
idx_t* labels,
|
|
883
935
|
const CoarseQuantized& cq,
|
|
884
936
|
const FastScanDistancePostProcessing& context,
|
|
885
|
-
const IVFSearchParameters* params) const {
|
|
937
|
+
const IVFSearchParameters* /* params */) const {
|
|
886
938
|
FAISS_THROW_IF_NOT(orig_invlists);
|
|
887
939
|
|
|
888
940
|
size_t dim12 = ksub * M2;
|
|
@@ -895,7 +947,7 @@ void IndexIVFFastScan::search_implem_2(
|
|
|
895
947
|
bool single_LUT = !lookup_table_is_3d();
|
|
896
948
|
|
|
897
949
|
size_t ndis = 0, nlist_visited = 0;
|
|
898
|
-
size_t
|
|
950
|
+
size_t cur_nprobe = cq.nprobe;
|
|
899
951
|
|
|
900
952
|
#pragma omp parallel for reduction(+ : ndis, nlist_visited)
|
|
901
953
|
for (idx_t i = 0; i < n; i++) {
|
|
@@ -908,11 +960,11 @@ void IndexIVFFastScan::search_implem_2(
|
|
|
908
960
|
if (single_LUT) {
|
|
909
961
|
LUT = dis_tables.get() + i * dim12;
|
|
910
962
|
}
|
|
911
|
-
for (
|
|
963
|
+
for (size_t j = 0; j < cur_nprobe; j++) {
|
|
912
964
|
if (!single_LUT) {
|
|
913
|
-
LUT = dis_tables.get() + (i *
|
|
965
|
+
LUT = dis_tables.get() + (i * cur_nprobe + j) * dim12;
|
|
914
966
|
}
|
|
915
|
-
idx_t list_no = cq.ids[i *
|
|
967
|
+
idx_t list_no = cq.ids[i * cur_nprobe + j];
|
|
916
968
|
if (list_no < 0) {
|
|
917
969
|
continue;
|
|
918
970
|
}
|
|
@@ -923,7 +975,7 @@ void IndexIVFFastScan::search_implem_2(
|
|
|
923
975
|
InvertedLists::ScopedCodes codes(orig_invlists, list_no);
|
|
924
976
|
InvertedLists::ScopedIds ids(orig_invlists, list_no);
|
|
925
977
|
|
|
926
|
-
uint16_t bias = biases.get() ? biases[i *
|
|
978
|
+
uint16_t bias = biases.get() ? biases[i * cur_nprobe + j] : 0;
|
|
927
979
|
|
|
928
980
|
estimators_from_tables_generic<C>(
|
|
929
981
|
*this,
|
|
@@ -962,12 +1014,14 @@ void IndexIVFFastScan::search_implem_2(
|
|
|
962
1014
|
void IndexIVFFastScan::search_implem_10(
|
|
963
1015
|
idx_t n,
|
|
964
1016
|
const float* x,
|
|
1017
|
+
idx_t k,
|
|
965
1018
|
SIMDResultHandlerToFloat& handler,
|
|
966
1019
|
const CoarseQuantized& cq,
|
|
967
1020
|
size_t* ndis_out,
|
|
968
1021
|
size_t* nlist_out,
|
|
969
1022
|
const FastScanDistancePostProcessing& context,
|
|
970
|
-
const IVFSearchParameters*
|
|
1023
|
+
const IVFSearchParameters* params,
|
|
1024
|
+
FastScanCodeScanner& scanner) const {
|
|
971
1025
|
size_t dim12 = ksub * M2;
|
|
972
1026
|
AlignedTable<uint8_t> dis_tables;
|
|
973
1027
|
AlignedTable<uint16_t> biases;
|
|
@@ -981,7 +1035,28 @@ void IndexIVFFastScan::search_implem_10(
|
|
|
981
1035
|
int qmap1[1];
|
|
982
1036
|
handler.q_map = qmap1;
|
|
983
1037
|
handler.begin(skip & 16 ? nullptr : normalizers.get());
|
|
984
|
-
size_t
|
|
1038
|
+
size_t cur_nprobe = cq.nprobe;
|
|
1039
|
+
|
|
1040
|
+
// Per-query early-stop options from SearchParametersIVF.
|
|
1041
|
+
const size_t param_max_codes = params ? params->max_codes : 0;
|
|
1042
|
+
const size_t param_max_lists_num = params ? params->max_lists_num : 0;
|
|
1043
|
+
const bool ensure_topk_full = params ? params->ensure_topk_full : false;
|
|
1044
|
+
const size_t cur_max_codes = (param_max_codes == 0)
|
|
1045
|
+
? std::numeric_limits<size_t>::max()
|
|
1046
|
+
: param_max_codes;
|
|
1047
|
+
const size_t cur_max_lists_num =
|
|
1048
|
+
(param_max_lists_num == 0) ? cur_nprobe : param_max_lists_num;
|
|
1049
|
+
// Effective budgets are the values tested in the probe loop below.
|
|
1050
|
+
// ensure_topk_full raises small budgets to reduce empty result slots.
|
|
1051
|
+
const size_t effective_max_codes = ensure_topk_full
|
|
1052
|
+
? std::max(cur_max_codes, (size_t)k)
|
|
1053
|
+
: cur_max_codes;
|
|
1054
|
+
const size_t effective_max_lists_num = ensure_topk_full
|
|
1055
|
+
? std::max(cur_max_lists_num, (size_t)k)
|
|
1056
|
+
: cur_max_lists_num;
|
|
1057
|
+
const bool is_range_search = k == 0;
|
|
1058
|
+
const size_t max_empty_result_buckets =
|
|
1059
|
+
(is_range_search && params) ? params->max_empty_result_buckets : 0;
|
|
985
1060
|
|
|
986
1061
|
// Allocate probe_map once and reuse it
|
|
987
1062
|
std::vector<int> probe_map;
|
|
@@ -989,13 +1064,30 @@ void IndexIVFFastScan::search_implem_10(
|
|
|
989
1064
|
|
|
990
1065
|
for (idx_t i = 0; i < n; i++) {
|
|
991
1066
|
const uint8_t* LUT = nullptr;
|
|
992
|
-
qmap1[0] = i;
|
|
1067
|
+
qmap1[0] = static_cast<int>(i);
|
|
993
1068
|
|
|
994
1069
|
if (single_LUT) {
|
|
995
1070
|
LUT = dis_tables.get() + i * dim12;
|
|
996
1071
|
}
|
|
997
|
-
|
|
998
|
-
|
|
1072
|
+
// Per-query counters. For k-NN, the handler count excludes rows
|
|
1073
|
+
// filtered by IDSelector.
|
|
1074
|
+
const size_t scan0 = handler.count_scanned_rows();
|
|
1075
|
+
size_t nscan_q = 0;
|
|
1076
|
+
size_t nlists_visited_q = 0;
|
|
1077
|
+
size_t nempty_result_buckets = 0;
|
|
1078
|
+
for (size_t j = 0; j < cur_nprobe; j++) {
|
|
1079
|
+
if (!is_range_search) {
|
|
1080
|
+
nscan_q = handler.count_scanned_rows() - scan0;
|
|
1081
|
+
}
|
|
1082
|
+
// Early-stop check: apply k-NN max_codes/max_lists_num before
|
|
1083
|
+
// starting the next list. nscan_q excludes IDSelector-filtered
|
|
1084
|
+
// rows.
|
|
1085
|
+
if (nscan_q >= effective_max_codes ||
|
|
1086
|
+
nlists_visited_q >= effective_max_lists_num) {
|
|
1087
|
+
break;
|
|
1088
|
+
}
|
|
1089
|
+
const size_t prev_in_range_num = handler.in_range_num;
|
|
1090
|
+
size_t ij = i * cur_nprobe + j;
|
|
999
1091
|
if (!single_LUT) {
|
|
1000
1092
|
LUT = dis_tables.get() + ij * dim12;
|
|
1001
1093
|
}
|
|
@@ -1005,10 +1097,22 @@ void IndexIVFFastScan::search_implem_10(
|
|
|
1005
1097
|
|
|
1006
1098
|
idx_t list_no = cq.ids[ij];
|
|
1007
1099
|
if (list_no < 0) {
|
|
1100
|
+
// Early-stop check: invalid probes count as empty range
|
|
1101
|
+
// buckets.
|
|
1102
|
+
if (max_empty_result_buckets > 0 &&
|
|
1103
|
+
++nempty_result_buckets >= max_empty_result_buckets) {
|
|
1104
|
+
break;
|
|
1105
|
+
}
|
|
1008
1106
|
continue;
|
|
1009
1107
|
}
|
|
1010
1108
|
size_t ls = invlists->list_size(list_no);
|
|
1011
1109
|
if (ls == 0) {
|
|
1110
|
+
// Early-stop check: empty inverted lists count as empty range
|
|
1111
|
+
// buckets.
|
|
1112
|
+
if (max_empty_result_buckets > 0 &&
|
|
1113
|
+
++nempty_result_buckets >= max_empty_result_buckets) {
|
|
1114
|
+
break;
|
|
1115
|
+
}
|
|
1012
1116
|
continue;
|
|
1013
1117
|
}
|
|
1014
1118
|
|
|
@@ -1023,19 +1127,35 @@ void IndexIVFFastScan::search_implem_10(
|
|
|
1023
1127
|
probe_map[0] = static_cast<int>(j);
|
|
1024
1128
|
handler.set_list_context(list_no, probe_map);
|
|
1025
1129
|
|
|
1026
|
-
|
|
1130
|
+
scanner.accumulate_loop(
|
|
1027
1131
|
1,
|
|
1028
1132
|
roundup(ls, bbs),
|
|
1029
1133
|
bbs,
|
|
1030
1134
|
M2,
|
|
1031
1135
|
codes.get(),
|
|
1032
1136
|
LUT,
|
|
1033
|
-
|
|
1034
|
-
context.norm_scaler,
|
|
1137
|
+
context.pq2x4_scale,
|
|
1035
1138
|
get_block_stride());
|
|
1036
1139
|
|
|
1037
1140
|
ndis += ls;
|
|
1038
1141
|
nlist_visited++;
|
|
1142
|
+
if (is_range_search) {
|
|
1143
|
+
nscan_q += ls;
|
|
1144
|
+
}
|
|
1145
|
+
nlists_visited_q++;
|
|
1146
|
+
|
|
1147
|
+
if (max_empty_result_buckets > 0) {
|
|
1148
|
+
// Early-stop check: apply the range-search empty-bucket
|
|
1149
|
+
// budget after each visited list; any hit resets the counter.
|
|
1150
|
+
if (handler.in_range_num == prev_in_range_num) {
|
|
1151
|
+
nempty_result_buckets++;
|
|
1152
|
+
if (nempty_result_buckets >= max_empty_result_buckets) {
|
|
1153
|
+
break;
|
|
1154
|
+
}
|
|
1155
|
+
} else {
|
|
1156
|
+
nempty_result_buckets = 0;
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1039
1159
|
}
|
|
1040
1160
|
}
|
|
1041
1161
|
|
|
@@ -1052,7 +1172,8 @@ void IndexIVFFastScan::search_implem_12(
|
|
|
1052
1172
|
size_t* ndis_out,
|
|
1053
1173
|
size_t* nlist_out,
|
|
1054
1174
|
const FastScanDistancePostProcessing& context,
|
|
1055
|
-
const IVFSearchParameters* /* params
|
|
1175
|
+
const IVFSearchParameters* /* params */,
|
|
1176
|
+
FastScanCodeScanner& scanner) const {
|
|
1056
1177
|
if (n == 0) { // does not work well with reservoir
|
|
1057
1178
|
return;
|
|
1058
1179
|
}
|
|
@@ -1073,15 +1194,15 @@ void IndexIVFFastScan::search_implem_12(
|
|
|
1073
1194
|
int rank; // this is the rank'th result of the coarse quantizer
|
|
1074
1195
|
};
|
|
1075
1196
|
bool single_LUT = !lookup_table_is_3d();
|
|
1076
|
-
size_t
|
|
1197
|
+
size_t cur_nprobe = cq.nprobe;
|
|
1077
1198
|
|
|
1078
1199
|
std::vector<QC> qcs;
|
|
1079
1200
|
{
|
|
1080
|
-
|
|
1081
|
-
for (
|
|
1082
|
-
for (
|
|
1201
|
+
size_t ij = 0;
|
|
1202
|
+
for (idx_t i = 0; i < n; i++) {
|
|
1203
|
+
for (size_t j = 0; j < cur_nprobe; j++) {
|
|
1083
1204
|
if (cq.ids[ij] >= 0) {
|
|
1084
|
-
qcs.push_back(QC{i, int(cq.ids[ij]), int(j)});
|
|
1205
|
+
qcs.push_back(QC{int(i), int(cq.ids[ij]), int(j)});
|
|
1085
1206
|
}
|
|
1086
1207
|
ij++;
|
|
1087
1208
|
}
|
|
@@ -1093,7 +1214,7 @@ void IndexIVFFastScan::search_implem_12(
|
|
|
1093
1214
|
|
|
1094
1215
|
// prepare the result handlers
|
|
1095
1216
|
|
|
1096
|
-
int actual_qbs2 = this->qbs2 ? this->qbs2 : 11;
|
|
1217
|
+
int actual_qbs2 = static_cast<int>(this->qbs2 ? this->qbs2 : 11);
|
|
1097
1218
|
|
|
1098
1219
|
std::vector<uint16_t> tmp_bias;
|
|
1099
1220
|
if (biases.get()) {
|
|
@@ -1130,7 +1251,7 @@ void IndexIVFFastScan::search_implem_12(
|
|
|
1130
1251
|
nlist_visited++;
|
|
1131
1252
|
|
|
1132
1253
|
// re-organize LUTs and biases into the right order
|
|
1133
|
-
int nc = i1 - i0;
|
|
1254
|
+
int nc = static_cast<int>(i1 - i0);
|
|
1134
1255
|
|
|
1135
1256
|
std::vector<int> q_map(nc), lut_entries(nc);
|
|
1136
1257
|
AlignedTable<uint8_t> LUT(nc * dim12);
|
|
@@ -1140,7 +1261,7 @@ void IndexIVFFastScan::search_implem_12(
|
|
|
1140
1261
|
for (size_t i = i0; i < i1; i++) {
|
|
1141
1262
|
const QC& qc = qcs[i];
|
|
1142
1263
|
q_map[i - i0] = qc.qno;
|
|
1143
|
-
int ij = qc.qno *
|
|
1264
|
+
int ij = static_cast<int>(qc.qno * cur_nprobe + qc.rank);
|
|
1144
1265
|
lut_entries[i - i0] = single_LUT ? qc.qno : ij;
|
|
1145
1266
|
if (biases.get()) {
|
|
1146
1267
|
tmp_bias[i - i0] = biases[ij];
|
|
@@ -1148,7 +1269,7 @@ void IndexIVFFastScan::search_implem_12(
|
|
|
1148
1269
|
}
|
|
1149
1270
|
pq4_pack_LUT_qbs_q_map(
|
|
1150
1271
|
qbs_for_list,
|
|
1151
|
-
M2,
|
|
1272
|
+
static_cast<int>(M2),
|
|
1152
1273
|
dis_tables.get(),
|
|
1153
1274
|
lut_entries.data(),
|
|
1154
1275
|
LUT.get());
|
|
@@ -1176,14 +1297,13 @@ void IndexIVFFastScan::search_implem_12(
|
|
|
1176
1297
|
}
|
|
1177
1298
|
handler.set_list_context(list_no, probe_map);
|
|
1178
1299
|
|
|
1179
|
-
|
|
1300
|
+
scanner.accumulate_loop_qbs(
|
|
1180
1301
|
qbs_for_list,
|
|
1181
1302
|
list_size,
|
|
1182
|
-
M2,
|
|
1303
|
+
static_cast<int>(M2),
|
|
1183
1304
|
codes.get(),
|
|
1184
1305
|
LUT.get(),
|
|
1185
|
-
|
|
1186
|
-
context.norm_scaler,
|
|
1306
|
+
context.pq2x4_scale,
|
|
1187
1307
|
get_block_stride());
|
|
1188
1308
|
// prepare for next loop
|
|
1189
1309
|
i0 = i1;
|
|
@@ -1229,15 +1349,15 @@ void IndexIVFFastScan::search_implem_14(
|
|
|
1229
1349
|
int rank; // this is the rank'th result of the coarse quantizer
|
|
1230
1350
|
};
|
|
1231
1351
|
bool single_LUT = !lookup_table_is_3d();
|
|
1232
|
-
size_t
|
|
1352
|
+
size_t cur_nprobe = cq.nprobe;
|
|
1233
1353
|
|
|
1234
1354
|
std::vector<QC> qcs;
|
|
1235
1355
|
{
|
|
1236
|
-
|
|
1237
|
-
for (
|
|
1238
|
-
for (
|
|
1356
|
+
size_t ij = 0;
|
|
1357
|
+
for (idx_t i = 0; i < n; i++) {
|
|
1358
|
+
for (size_t j = 0; j < cur_nprobe; j++) {
|
|
1239
1359
|
if (cq.ids[ij] >= 0) {
|
|
1240
|
-
qcs.push_back(QC{i, int(cq.ids[ij]), int(j)});
|
|
1360
|
+
qcs.push_back(QC{int(i), int(cq.ids[ij]), int(j)});
|
|
1241
1361
|
}
|
|
1242
1362
|
ij++;
|
|
1243
1363
|
}
|
|
@@ -1316,25 +1436,24 @@ void IndexIVFFastScan::search_implem_14(
|
|
|
1316
1436
|
std::vector<idx_t> local_idx(k * n);
|
|
1317
1437
|
std::vector<float> local_dis(k * n);
|
|
1318
1438
|
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
int actual_qbs2 = this->qbs2 ? this->qbs2 : 11;
|
|
1439
|
+
auto scanner = make_knn_scanner(
|
|
1440
|
+
is_max,
|
|
1441
|
+
n,
|
|
1442
|
+
k,
|
|
1443
|
+
local_dis.data(),
|
|
1444
|
+
local_idx.data(),
|
|
1445
|
+
sel,
|
|
1446
|
+
impl,
|
|
1447
|
+
context);
|
|
1448
|
+
SIMDResultHandlerToFloat* handler_ptr = scanner->handler();
|
|
1449
|
+
handler_ptr->begin(normalizers.get());
|
|
1450
|
+
|
|
1451
|
+
int actual_qbs2 = static_cast<int>(this->qbs2 ? this->qbs2 : 11);
|
|
1333
1452
|
|
|
1334
1453
|
std::vector<uint16_t> tmp_bias;
|
|
1335
1454
|
if (biases.get()) {
|
|
1336
1455
|
tmp_bias.resize(actual_qbs2);
|
|
1337
|
-
|
|
1456
|
+
handler_ptr->dbias = tmp_bias.data();
|
|
1338
1457
|
}
|
|
1339
1458
|
|
|
1340
1459
|
std::set<int> q_set;
|
|
@@ -1345,7 +1464,8 @@ void IndexIVFFastScan::search_implem_14(
|
|
|
1345
1464
|
probe_map.reserve(actual_qbs2);
|
|
1346
1465
|
|
|
1347
1466
|
#pragma omp for schedule(dynamic)
|
|
1348
|
-
for (idx_t cluster = 0; cluster < ses.size();
|
|
1467
|
+
for (idx_t cluster = 0; cluster < static_cast<idx_t>(ses.size());
|
|
1468
|
+
cluster++) {
|
|
1349
1469
|
size_t i0 = ses[cluster].start;
|
|
1350
1470
|
size_t i1 = ses[cluster].end;
|
|
1351
1471
|
size_t list_size = ses[cluster].list_size;
|
|
@@ -1353,7 +1473,7 @@ void IndexIVFFastScan::search_implem_14(
|
|
|
1353
1473
|
int list_no = qcs[i0].list_no;
|
|
1354
1474
|
|
|
1355
1475
|
// re-organize LUTs and biases into the right order
|
|
1356
|
-
int nc = i1 - i0;
|
|
1476
|
+
int nc = static_cast<int>(i1 - i0);
|
|
1357
1477
|
|
|
1358
1478
|
std::vector<int> q_map(nc), lut_entries(nc);
|
|
1359
1479
|
AlignedTable<uint8_t> LUT(nc * dim12);
|
|
@@ -1364,7 +1484,7 @@ void IndexIVFFastScan::search_implem_14(
|
|
|
1364
1484
|
const QC& qc = qcs[i];
|
|
1365
1485
|
q_map[i - i0] = qc.qno;
|
|
1366
1486
|
q_set.insert(qc.qno);
|
|
1367
|
-
int ij = qc.qno *
|
|
1487
|
+
int ij = static_cast<int>(qc.qno * cur_nprobe + qc.rank);
|
|
1368
1488
|
lut_entries[i - i0] = single_LUT ? qc.qno : ij;
|
|
1369
1489
|
if (biases.get()) {
|
|
1370
1490
|
tmp_bias[i - i0] = biases[ij];
|
|
@@ -1372,7 +1492,7 @@ void IndexIVFFastScan::search_implem_14(
|
|
|
1372
1492
|
}
|
|
1373
1493
|
pq4_pack_LUT_qbs_q_map(
|
|
1374
1494
|
qbs_for_list,
|
|
1375
|
-
M2,
|
|
1495
|
+
static_cast<int>(M2),
|
|
1376
1496
|
dis_tables.get(),
|
|
1377
1497
|
lut_entries.data(),
|
|
1378
1498
|
LUT.get());
|
|
@@ -1386,9 +1506,9 @@ void IndexIVFFastScan::search_implem_14(
|
|
|
1386
1506
|
|
|
1387
1507
|
// prepare the handler
|
|
1388
1508
|
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1509
|
+
handler_ptr->ntotal = list_size;
|
|
1510
|
+
handler_ptr->q_map = q_map.data();
|
|
1511
|
+
handler_ptr->id_map = ids.get();
|
|
1392
1512
|
|
|
1393
1513
|
// Set context information for handlers that need additional data
|
|
1394
1514
|
// All queries in this batch access the same list_no, but each
|
|
@@ -1398,21 +1518,20 @@ void IndexIVFFastScan::search_implem_14(
|
|
|
1398
1518
|
const QC& qc = qcs[i];
|
|
1399
1519
|
probe_map[i - i0] = qc.rank;
|
|
1400
1520
|
}
|
|
1401
|
-
|
|
1521
|
+
handler_ptr->set_list_context(list_no, probe_map);
|
|
1402
1522
|
|
|
1403
|
-
|
|
1523
|
+
scanner->accumulate_loop_qbs(
|
|
1404
1524
|
qbs_for_list,
|
|
1405
1525
|
list_size,
|
|
1406
|
-
M2,
|
|
1526
|
+
static_cast<int>(M2),
|
|
1407
1527
|
codes.get(),
|
|
1408
1528
|
LUT.get(),
|
|
1409
|
-
|
|
1410
|
-
context.norm_scaler,
|
|
1529
|
+
context.pq2x4_scale,
|
|
1411
1530
|
get_block_stride());
|
|
1412
1531
|
}
|
|
1413
1532
|
|
|
1414
1533
|
// labels is in-place for HeapHC
|
|
1415
|
-
|
|
1534
|
+
handler_ptr->end();
|
|
1416
1535
|
|
|
1417
1536
|
// merge per-thread results
|
|
1418
1537
|
#pragma omp single
|
|
@@ -1466,7 +1585,7 @@ void IndexIVFFastScan::reconstruct_from_offset(
|
|
|
1466
1585
|
for (size_t m = 0; m < M; m++) {
|
|
1467
1586
|
uint8_t c =
|
|
1468
1587
|
pq4_get_packed_element(list_codes.get(), bbs, M2, offset, m);
|
|
1469
|
-
bsw.write(c, nbits);
|
|
1588
|
+
bsw.write(c, static_cast<int>(nbits));
|
|
1470
1589
|
}
|
|
1471
1590
|
|
|
1472
1591
|
sa_decode(1, code.data(), recons);
|
|
@@ -1477,7 +1596,7 @@ void IndexIVFFastScan::reconstruct_orig_invlists() {
|
|
|
1477
1596
|
FAISS_THROW_IF_NOT(orig_invlists->list_size(0) == 0);
|
|
1478
1597
|
|
|
1479
1598
|
#pragma omp parallel for if (nlist > 100)
|
|
1480
|
-
for (idx_t list_no = 0; list_no < nlist; list_no++) {
|
|
1599
|
+
for (idx_t list_no = 0; list_no < static_cast<idx_t>(nlist); list_no++) {
|
|
1481
1600
|
InvertedLists::ScopedCodes codes(invlists, list_no);
|
|
1482
1601
|
InvertedLists::ScopedIds ids(invlists, list_no);
|
|
1483
1602
|
size_t list_size = invlists->list_size(list_no);
|
|
@@ -1489,7 +1608,7 @@ void IndexIVFFastScan::reconstruct_orig_invlists() {
|
|
|
1489
1608
|
for (size_t m = 0; m < M; m++) {
|
|
1490
1609
|
uint8_t c =
|
|
1491
1610
|
pq4_get_packed_element(codes.get(), bbs, M2, offset, m);
|
|
1492
|
-
bsw.write(c, nbits);
|
|
1611
|
+
bsw.write(c, static_cast<int>(nbits));
|
|
1493
1612
|
}
|
|
1494
1613
|
|
|
1495
1614
|
// get id
|
|
@@ -1516,7 +1635,7 @@ void IndexIVFFastScan::sa_decode(idx_t n, const uint8_t* codes, float* x)
|
|
|
1516
1635
|
fine_quantizer->decode(code + coarse_size, xi, 1);
|
|
1517
1636
|
if (by_residual) {
|
|
1518
1637
|
quantizer->reconstruct(list_no, residual.data());
|
|
1519
|
-
for (
|
|
1638
|
+
for (int j = 0; j < d; j++) {
|
|
1520
1639
|
xi[j] += residual[j];
|
|
1521
1640
|
}
|
|
1522
1641
|
}
|