faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/IndexIVFPQ.h>
|
|
11
11
|
|
|
12
|
-
#include <cassert>
|
|
13
12
|
#include <cinttypes>
|
|
14
13
|
#include <cmath>
|
|
15
14
|
#include <cstdint>
|
|
@@ -17,7 +16,6 @@
|
|
|
17
16
|
|
|
18
17
|
#include <algorithm>
|
|
19
18
|
|
|
20
|
-
#include <faiss/utils/Heap.h>
|
|
21
19
|
#include <faiss/utils/distances_dispatch.h>
|
|
22
20
|
#include <faiss/utils/utils.h>
|
|
23
21
|
|
|
@@ -30,9 +28,15 @@
|
|
|
30
28
|
#include <faiss/impl/IDSelector.h>
|
|
31
29
|
#include <faiss/impl/ProductQuantizer.h>
|
|
32
30
|
#include <faiss/impl/ResultHandler.h>
|
|
33
|
-
#include <faiss/impl/pq_code_distance/pq_code_distance-
|
|
31
|
+
#include <faiss/impl/pq_code_distance/pq_code_distance-generic.h>
|
|
34
32
|
#include <faiss/impl/simd_dispatch.h>
|
|
35
33
|
|
|
34
|
+
// Scalar (NONE) fallback for dynamic dispatch
|
|
35
|
+
#define THE_SIMD_LEVEL SIMDLevel::NONE
|
|
36
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
37
|
+
#include <faiss/impl/pq_code_distance/IVFPQScanner_impl.h>
|
|
38
|
+
#undef THE_SIMD_LEVEL
|
|
39
|
+
|
|
36
40
|
namespace faiss {
|
|
37
41
|
|
|
38
42
|
/*****************************************
|
|
@@ -40,17 +44,17 @@ namespace faiss {
|
|
|
40
44
|
******************************************/
|
|
41
45
|
|
|
42
46
|
IndexIVFPQ::IndexIVFPQ(
|
|
43
|
-
Index*
|
|
44
|
-
size_t
|
|
45
|
-
size_t
|
|
47
|
+
Index* quantizer_in,
|
|
48
|
+
size_t d_in,
|
|
49
|
+
size_t nlist_in,
|
|
46
50
|
size_t M,
|
|
47
51
|
size_t nbits_per_idx,
|
|
48
52
|
MetricType metric,
|
|
49
|
-
bool
|
|
50
|
-
: IndexIVF(
|
|
51
|
-
pq(
|
|
53
|
+
bool own_invlists_in)
|
|
54
|
+
: IndexIVF(quantizer_in, d_in, nlist_in, 0, metric, own_invlists_in),
|
|
55
|
+
pq(d_in, M, nbits_per_idx) {
|
|
52
56
|
code_size = pq.code_size;
|
|
53
|
-
if (
|
|
57
|
+
if (own_invlists_in) {
|
|
54
58
|
invlists->code_size = code_size;
|
|
55
59
|
}
|
|
56
60
|
is_trained = false;
|
|
@@ -66,12 +70,16 @@ IndexIVFPQ::IndexIVFPQ(
|
|
|
66
70
|
/****************************************************************
|
|
67
71
|
* training */
|
|
68
72
|
|
|
69
|
-
void IndexIVFPQ::train_encoder(
|
|
73
|
+
void IndexIVFPQ::train_encoder(
|
|
74
|
+
idx_t n,
|
|
75
|
+
const float* x,
|
|
76
|
+
const idx_t* /*assign*/) {
|
|
70
77
|
pq.train(n, x);
|
|
71
78
|
|
|
72
79
|
if (do_polysemous_training) {
|
|
73
|
-
if (verbose)
|
|
80
|
+
if (verbose) {
|
|
74
81
|
printf("doing polysemous training for PQ\n");
|
|
82
|
+
}
|
|
75
83
|
PolysemousTraining default_pt;
|
|
76
84
|
PolysemousTraining* pt =
|
|
77
85
|
polysemous_training ? polysemous_training : &default_pt;
|
|
@@ -96,8 +104,9 @@ void IndexIVFPQ::encode(idx_t key, const float* x, uint8_t* code) const {
|
|
|
96
104
|
std::vector<float> residual_vec(d);
|
|
97
105
|
quantizer->compute_residual(x, residual_vec.data(), key);
|
|
98
106
|
pq.compute_code(residual_vec.data(), code);
|
|
99
|
-
} else
|
|
107
|
+
} else {
|
|
100
108
|
pq.compute_code(x, code);
|
|
109
|
+
}
|
|
101
110
|
}
|
|
102
111
|
|
|
103
112
|
void IndexIVFPQ::encode_multiple(
|
|
@@ -106,8 +115,9 @@ void IndexIVFPQ::encode_multiple(
|
|
|
106
115
|
const float* x,
|
|
107
116
|
uint8_t* xcodes,
|
|
108
117
|
bool compute_keys) const {
|
|
109
|
-
if (compute_keys)
|
|
118
|
+
if (compute_keys) {
|
|
110
119
|
quantizer->assign(n, x, keys);
|
|
120
|
+
}
|
|
111
121
|
|
|
112
122
|
encode_vectors(n, x, keys, xcodes);
|
|
113
123
|
}
|
|
@@ -123,7 +133,7 @@ void IndexIVFPQ::decode_multiple(
|
|
|
123
133
|
for (size_t i = 0; i < n; i++) {
|
|
124
134
|
quantizer->reconstruct(keys[i], centroid.data());
|
|
125
135
|
float* xi = x + i * d;
|
|
126
|
-
for (
|
|
136
|
+
for (int j = 0; j < d; j++) {
|
|
127
137
|
xi[j] += centroid[j];
|
|
128
138
|
}
|
|
129
139
|
}
|
|
@@ -149,13 +159,15 @@ static std::unique_ptr<float[]> compute_residuals(
|
|
|
149
159
|
const idx_t* list_nos) {
|
|
150
160
|
size_t d = quantizer->d;
|
|
151
161
|
std::unique_ptr<float[]> residuals(new float[n * d]);
|
|
152
|
-
//
|
|
153
|
-
|
|
154
|
-
|
|
162
|
+
// Parallelize with OpenMP (each iteration is independent)
|
|
163
|
+
#pragma omp parallel for if (n > 1000)
|
|
164
|
+
for (idx_t i = 0; i < n; i++) {
|
|
165
|
+
if (list_nos[i] < 0) {
|
|
155
166
|
memset(residuals.get() + i * d, 0, sizeof(float) * d);
|
|
156
|
-
else
|
|
167
|
+
} else {
|
|
157
168
|
quantizer->compute_residual(
|
|
158
169
|
x + i * d, residuals.get() + i * d, list_nos[i]);
|
|
170
|
+
}
|
|
159
171
|
}
|
|
160
172
|
return residuals;
|
|
161
173
|
}
|
|
@@ -207,7 +219,7 @@ void IndexIVFPQ::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
|
|
|
207
219
|
pq.decode(code + coarse_size, xi);
|
|
208
220
|
if (by_residual) {
|
|
209
221
|
quantizer->reconstruct(list_no, residual.data());
|
|
210
|
-
for (
|
|
222
|
+
for (int j = 0; j < d; j++) {
|
|
211
223
|
xi[j] += residual[j];
|
|
212
224
|
}
|
|
213
225
|
}
|
|
@@ -282,14 +294,15 @@ void IndexIVFPQ::add_core_o(
|
|
|
282
294
|
double t2 = getmillisecs();
|
|
283
295
|
// TODO: parallelize?
|
|
284
296
|
size_t n_ignore = 0;
|
|
285
|
-
for (
|
|
297
|
+
for (idx_t i = 0; i < n; i++) {
|
|
286
298
|
idx_t key = idx[i];
|
|
287
299
|
idx_t id = xids ? xids[i] : ntotal + i;
|
|
288
300
|
if (key < 0) {
|
|
289
301
|
direct_map.add_single_id(id, -1, 0);
|
|
290
302
|
n_ignore++;
|
|
291
|
-
if (residuals_2)
|
|
303
|
+
if (residuals_2) {
|
|
292
304
|
memset(residuals_2, 0, sizeof(*residuals_2) * d);
|
|
305
|
+
}
|
|
293
306
|
continue;
|
|
294
307
|
}
|
|
295
308
|
|
|
@@ -301,8 +314,9 @@ void IndexIVFPQ::add_core_o(
|
|
|
301
314
|
float* res2 = residuals_2 + i * d;
|
|
302
315
|
const float* xi = to_encode + i * d;
|
|
303
316
|
pq.decode(code, res2);
|
|
304
|
-
for (int j = 0; j < d; j++)
|
|
317
|
+
for (int j = 0; j < d; j++) {
|
|
305
318
|
res2[j] = xi[j] - res2[j];
|
|
319
|
+
}
|
|
306
320
|
}
|
|
307
321
|
|
|
308
322
|
direct_map.add_single_id(id, key, offset);
|
|
@@ -311,8 +325,9 @@ void IndexIVFPQ::add_core_o(
|
|
|
311
325
|
double t3 = getmillisecs();
|
|
312
326
|
if (verbose) {
|
|
313
327
|
char comment[100] = {0};
|
|
314
|
-
if (n_ignore > 0)
|
|
328
|
+
if (n_ignore > 0) {
|
|
315
329
|
snprintf(comment, 100, "(%zd vectors ignored)", n_ignore);
|
|
330
|
+
}
|
|
316
331
|
printf(" add_core times: %.3f %.3f %.3f %s\n",
|
|
317
332
|
t1 - t0,
|
|
318
333
|
t2 - t1,
|
|
@@ -379,6 +394,7 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
379
394
|
AlignedTable<float>& precomputed_table,
|
|
380
395
|
bool by_residual,
|
|
381
396
|
bool verbose) {
|
|
397
|
+
FAISS_THROW_IF_NOT_MSG(quantizer, "IVF quantizer must not be null");
|
|
382
398
|
size_t nlist = quantizer->ntotal;
|
|
383
399
|
size_t d = quantizer->d;
|
|
384
400
|
FAISS_THROW_IF_NOT(d == pq.d);
|
|
@@ -388,6 +404,9 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
388
404
|
return;
|
|
389
405
|
}
|
|
390
406
|
|
|
407
|
+
const size_t m_ksub =
|
|
408
|
+
mul_no_overflow(pq.M, pq.ksub, "IVFPQ precomputed_table");
|
|
409
|
+
|
|
391
410
|
if (use_precomputed_table == 0) { // then choose the type of table
|
|
392
411
|
if (!(quantizer->metric_type == METRIC_L2 && by_residual)) {
|
|
393
412
|
if (verbose) {
|
|
@@ -399,10 +418,13 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
399
418
|
}
|
|
400
419
|
const MultiIndexQuantizer* miq =
|
|
401
420
|
dynamic_cast<const MultiIndexQuantizer*>(quantizer);
|
|
402
|
-
if (miq && pq.M % miq->pq.M == 0)
|
|
421
|
+
if (miq && pq.M % miq->pq.M == 0) {
|
|
403
422
|
use_precomputed_table = 2;
|
|
404
|
-
else {
|
|
405
|
-
size_t table_size =
|
|
423
|
+
} else {
|
|
424
|
+
size_t table_size = mul_no_overflow(
|
|
425
|
+
mul_no_overflow(m_ksub, nlist, "IVFPQ precomputed_table"),
|
|
426
|
+
sizeof(float),
|
|
427
|
+
"IVFPQ precomputed_table");
|
|
406
428
|
if (table_size > precomputed_table_max_bytes) {
|
|
407
429
|
if (verbose) {
|
|
408
430
|
printf("IndexIVFPQ::precompute_table: not precomputing table, "
|
|
@@ -422,22 +444,25 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
422
444
|
}
|
|
423
445
|
|
|
424
446
|
// squared norms of the PQ centroids
|
|
425
|
-
std::vector<float> r_norms(
|
|
426
|
-
for (
|
|
427
|
-
for (
|
|
447
|
+
std::vector<float> r_norms(m_ksub, NAN);
|
|
448
|
+
for (size_t m = 0; m < pq.M; m++) {
|
|
449
|
+
for (size_t j = 0; j < pq.ksub; j++) {
|
|
428
450
|
r_norms[m * pq.ksub + j] =
|
|
429
451
|
fvec_norm_L2sqr_dispatch(pq.get_centroids(m, j), pq.dsub);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
430
454
|
|
|
431
455
|
if (use_precomputed_table == 1) {
|
|
432
|
-
precomputed_table.resize(
|
|
456
|
+
precomputed_table.resize(
|
|
457
|
+
mul_no_overflow(nlist, m_ksub, "IVFPQ precomputed_table"));
|
|
433
458
|
std::vector<float> centroid(d);
|
|
434
459
|
|
|
435
460
|
for (size_t i = 0; i < nlist; i++) {
|
|
436
461
|
quantizer->reconstruct(i, centroid.data());
|
|
437
462
|
|
|
438
|
-
float* tab = &precomputed_table[i *
|
|
463
|
+
float* tab = &precomputed_table[i * m_ksub];
|
|
439
464
|
pq.compute_inner_prod_table(centroid.data(), tab);
|
|
440
|
-
fvec_madd_dispatch(
|
|
465
|
+
fvec_madd_dispatch(m_ksub, r_norms.data(), 2.0, tab, tab);
|
|
441
466
|
}
|
|
442
467
|
} else if (use_precomputed_table == 2) {
|
|
443
468
|
const MultiIndexQuantizer* miq =
|
|
@@ -446,12 +471,13 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
446
471
|
const ProductQuantizer& cpq = miq->pq;
|
|
447
472
|
FAISS_THROW_IF_NOT(pq.M % cpq.M == 0);
|
|
448
473
|
|
|
449
|
-
precomputed_table.resize(
|
|
474
|
+
precomputed_table.resize(
|
|
475
|
+
mul_no_overflow(cpq.ksub, m_ksub, "IVFPQ precomputed_table"));
|
|
450
476
|
|
|
451
477
|
// reorder PQ centroid table
|
|
452
478
|
std::vector<float> centroids(d * cpq.ksub, NAN);
|
|
453
479
|
|
|
454
|
-
for (
|
|
480
|
+
for (size_t m = 0; m < cpq.M; m++) {
|
|
455
481
|
for (size_t i = 0; i < cpq.ksub; i++) {
|
|
456
482
|
memcpy(centroids.data() + i * d + m * cpq.dsub,
|
|
457
483
|
cpq.get_centroids(m, i),
|
|
@@ -463,8 +489,8 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
463
489
|
cpq.ksub, centroids.data(), precomputed_table.data());
|
|
464
490
|
|
|
465
491
|
for (size_t i = 0; i < cpq.ksub; i++) {
|
|
466
|
-
float* tab = &precomputed_table[i *
|
|
467
|
-
fvec_madd_dispatch(
|
|
492
|
+
float* tab = &precomputed_table[i * m_ksub];
|
|
493
|
+
fvec_madd_dispatch(m_ksub, r_norms.data(), 2.0, tab, tab);
|
|
468
494
|
}
|
|
469
495
|
}
|
|
470
496
|
}
|
|
@@ -479,812 +505,13 @@ void IndexIVFPQ::precompute_table() {
|
|
|
479
505
|
verbose);
|
|
480
506
|
}
|
|
481
507
|
|
|
482
|
-
namespace {
|
|
483
|
-
|
|
484
|
-
#define TIC t0 = get_cycles()
|
|
485
|
-
#define TOC get_cycles() - t0
|
|
486
|
-
|
|
487
|
-
/** QueryTables manages the various ways of searching an
|
|
488
|
-
* IndexIVFPQ. The code contains a lot of branches, depending on:
|
|
489
|
-
* - metric_type: are we computing L2 or Inner product similarity?
|
|
490
|
-
* - by_residual: do we encode raw vectors or residuals?
|
|
491
|
-
* - use_precomputed_table: are x_R|x_C tables precomputed?
|
|
492
|
-
* - polysemous_ht: are we filtering with polysemous codes?
|
|
493
|
-
*/
|
|
494
|
-
struct QueryTables {
|
|
495
|
-
/*****************************************************
|
|
496
|
-
* General data from the IVFPQ
|
|
497
|
-
*****************************************************/
|
|
498
|
-
|
|
499
|
-
const IndexIVFPQ& ivfpq;
|
|
500
|
-
const IVFSearchParameters* params;
|
|
501
|
-
|
|
502
|
-
// copied from IndexIVFPQ for easier access
|
|
503
|
-
int d;
|
|
504
|
-
const ProductQuantizer& pq;
|
|
505
|
-
MetricType metric_type;
|
|
506
|
-
bool by_residual;
|
|
507
|
-
int use_precomputed_table;
|
|
508
|
-
int polysemous_ht;
|
|
509
|
-
|
|
510
|
-
// pre-allocated data buffers
|
|
511
|
-
float *sim_table, *sim_table_2;
|
|
512
|
-
float *residual_vec, *decoded_vec;
|
|
513
|
-
|
|
514
|
-
// single data buffer
|
|
515
|
-
std::vector<float> mem;
|
|
516
|
-
|
|
517
|
-
// for table pointers
|
|
518
|
-
std::vector<const float*> sim_table_ptrs;
|
|
519
|
-
|
|
520
|
-
explicit QueryTables(
|
|
521
|
-
const IndexIVFPQ& ivfpq,
|
|
522
|
-
const IVFSearchParameters* params)
|
|
523
|
-
: ivfpq(ivfpq),
|
|
524
|
-
d(ivfpq.d),
|
|
525
|
-
pq(ivfpq.pq),
|
|
526
|
-
metric_type(ivfpq.metric_type),
|
|
527
|
-
by_residual(ivfpq.by_residual),
|
|
528
|
-
use_precomputed_table(ivfpq.use_precomputed_table) {
|
|
529
|
-
mem.resize(pq.ksub * pq.M * 2 + d * 2);
|
|
530
|
-
sim_table = mem.data();
|
|
531
|
-
sim_table_2 = sim_table + pq.ksub * pq.M;
|
|
532
|
-
residual_vec = sim_table_2 + pq.ksub * pq.M;
|
|
533
|
-
decoded_vec = residual_vec + d;
|
|
534
|
-
|
|
535
|
-
// for polysemous
|
|
536
|
-
polysemous_ht = ivfpq.polysemous_ht;
|
|
537
|
-
if (auto ivfpq_params =
|
|
538
|
-
dynamic_cast<const IVFPQSearchParameters*>(params)) {
|
|
539
|
-
polysemous_ht = ivfpq_params->polysemous_ht;
|
|
540
|
-
}
|
|
541
|
-
if (polysemous_ht != 0) {
|
|
542
|
-
q_code.resize(pq.code_size);
|
|
543
|
-
}
|
|
544
|
-
init_list_cycles = 0;
|
|
545
|
-
sim_table_ptrs.resize(pq.M);
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
/*****************************************************
|
|
549
|
-
* What we do when query is known
|
|
550
|
-
*****************************************************/
|
|
551
|
-
|
|
552
|
-
// field specific to query
|
|
553
|
-
const float* qi;
|
|
554
|
-
|
|
555
|
-
// query-specific initialization
|
|
556
|
-
void init_query(const float* qi) {
|
|
557
|
-
this->qi = qi;
|
|
558
|
-
if (metric_type == METRIC_INNER_PRODUCT)
|
|
559
|
-
init_query_IP();
|
|
560
|
-
else
|
|
561
|
-
init_query_L2();
|
|
562
|
-
if (!by_residual && polysemous_ht != 0)
|
|
563
|
-
pq.compute_code(qi, q_code.data());
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
void init_query_IP() {
|
|
567
|
-
// precompute some tables specific to the query qi
|
|
568
|
-
pq.compute_inner_prod_table(qi, sim_table);
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
void init_query_L2() {
|
|
572
|
-
if (!by_residual) {
|
|
573
|
-
pq.compute_distance_table(qi, sim_table);
|
|
574
|
-
} else if (use_precomputed_table) {
|
|
575
|
-
pq.compute_inner_prod_table(qi, sim_table_2);
|
|
576
|
-
}
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
/*****************************************************
|
|
580
|
-
* When inverted list is known: prepare computations
|
|
581
|
-
*****************************************************/
|
|
582
|
-
|
|
583
|
-
// fields specific to list
|
|
584
|
-
idx_t key;
|
|
585
|
-
float coarse_dis;
|
|
586
|
-
std::vector<uint8_t> q_code;
|
|
587
|
-
|
|
588
|
-
uint64_t init_list_cycles;
|
|
589
|
-
|
|
590
|
-
/// once we know the query and the centroid, we can prepare the
|
|
591
|
-
/// sim_table that will be used for accumulation
|
|
592
|
-
/// and dis0, the initial value
|
|
593
|
-
float precompute_list_tables() {
|
|
594
|
-
float dis0 = 0;
|
|
595
|
-
uint64_t t0;
|
|
596
|
-
TIC;
|
|
597
|
-
if (by_residual) {
|
|
598
|
-
if (metric_type == METRIC_INNER_PRODUCT)
|
|
599
|
-
dis0 = precompute_list_tables_IP();
|
|
600
|
-
else
|
|
601
|
-
dis0 = precompute_list_tables_L2();
|
|
602
|
-
}
|
|
603
|
-
init_list_cycles += TOC;
|
|
604
|
-
return dis0;
|
|
605
|
-
}
|
|
606
|
-
|
|
607
|
-
float precompute_list_table_pointers() {
|
|
608
|
-
float dis0 = 0;
|
|
609
|
-
uint64_t t0;
|
|
610
|
-
TIC;
|
|
611
|
-
if (by_residual) {
|
|
612
|
-
if (metric_type == METRIC_INNER_PRODUCT)
|
|
613
|
-
FAISS_THROW_MSG("not implemented");
|
|
614
|
-
else
|
|
615
|
-
dis0 = precompute_list_table_pointers_L2();
|
|
616
|
-
}
|
|
617
|
-
init_list_cycles += TOC;
|
|
618
|
-
return dis0;
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
/*****************************************************
|
|
622
|
-
* compute tables for inner prod
|
|
623
|
-
*****************************************************/
|
|
624
|
-
|
|
625
|
-
float precompute_list_tables_IP() {
|
|
626
|
-
// prepare the sim_table that will be used for accumulation
|
|
627
|
-
// and dis0, the initial value
|
|
628
|
-
ivfpq.quantizer->reconstruct(key, decoded_vec);
|
|
629
|
-
// decoded_vec = centroid
|
|
630
|
-
float dis0 = fvec_inner_product_dispatch(qi, decoded_vec, d);
|
|
631
|
-
|
|
632
|
-
if (polysemous_ht) {
|
|
633
|
-
for (int i = 0; i < d; i++) {
|
|
634
|
-
residual_vec[i] = qi[i] - decoded_vec[i];
|
|
635
|
-
}
|
|
636
|
-
pq.compute_code(residual_vec, q_code.data());
|
|
637
|
-
}
|
|
638
|
-
return dis0;
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
/*****************************************************
|
|
642
|
-
* compute tables for L2 distance
|
|
643
|
-
*****************************************************/
|
|
644
|
-
|
|
645
|
-
float precompute_list_tables_L2() {
|
|
646
|
-
float dis0 = 0;
|
|
647
|
-
|
|
648
|
-
if (use_precomputed_table == 0 || use_precomputed_table == -1) {
|
|
649
|
-
ivfpq.quantizer->compute_residual(qi, residual_vec, key);
|
|
650
|
-
pq.compute_distance_table(residual_vec, sim_table);
|
|
651
|
-
|
|
652
|
-
if (polysemous_ht != 0) {
|
|
653
|
-
pq.compute_code(residual_vec, q_code.data());
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
} else if (use_precomputed_table == 1) {
|
|
657
|
-
dis0 = coarse_dis;
|
|
658
|
-
|
|
659
|
-
fvec_madd_dispatch(
|
|
660
|
-
pq.M * pq.ksub,
|
|
661
|
-
ivfpq.precomputed_table.data() + key * pq.ksub * pq.M,
|
|
662
|
-
-2.0,
|
|
663
|
-
sim_table_2,
|
|
664
|
-
sim_table);
|
|
665
|
-
|
|
666
|
-
if (polysemous_ht != 0) {
|
|
667
|
-
ivfpq.quantizer->compute_residual(qi, residual_vec, key);
|
|
668
|
-
pq.compute_code(residual_vec, q_code.data());
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
} else if (use_precomputed_table == 2) {
|
|
672
|
-
dis0 = coarse_dis;
|
|
673
|
-
|
|
674
|
-
const MultiIndexQuantizer* miq =
|
|
675
|
-
dynamic_cast<const MultiIndexQuantizer*>(ivfpq.quantizer);
|
|
676
|
-
FAISS_THROW_IF_NOT(miq);
|
|
677
|
-
const ProductQuantizer& cpq = miq->pq;
|
|
678
|
-
int Mf = pq.M / cpq.M;
|
|
679
|
-
|
|
680
|
-
const float* qtab = sim_table_2; // query-specific table
|
|
681
|
-
float* ltab = sim_table; // (output) list-specific table
|
|
682
|
-
|
|
683
|
-
long k = key;
|
|
684
|
-
for (int cm = 0; cm < cpq.M; cm++) {
|
|
685
|
-
// compute PQ index
|
|
686
|
-
int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
|
|
687
|
-
k >>= cpq.nbits;
|
|
688
|
-
|
|
689
|
-
// get corresponding table
|
|
690
|
-
const float* pc = ivfpq.precomputed_table.data() +
|
|
691
|
-
(ki * pq.M + cm * Mf) * pq.ksub;
|
|
692
|
-
|
|
693
|
-
if (polysemous_ht == 0) {
|
|
694
|
-
// sum up with query-specific table
|
|
695
|
-
fvec_madd_dispatch(Mf * pq.ksub, pc, -2.0, qtab, ltab);
|
|
696
|
-
ltab += Mf * pq.ksub;
|
|
697
|
-
qtab += Mf * pq.ksub;
|
|
698
|
-
} else {
|
|
699
|
-
for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
|
|
700
|
-
q_code[m] = fvec_madd_and_argmin_dispatch(
|
|
701
|
-
pq.ksub, pc, -2, qtab, ltab);
|
|
702
|
-
pc += pq.ksub;
|
|
703
|
-
ltab += pq.ksub;
|
|
704
|
-
qtab += pq.ksub;
|
|
705
|
-
}
|
|
706
|
-
}
|
|
707
|
-
}
|
|
708
|
-
}
|
|
709
|
-
|
|
710
|
-
return dis0;
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
float precompute_list_table_pointers_L2() {
|
|
714
|
-
float dis0 = 0;
|
|
715
|
-
|
|
716
|
-
if (use_precomputed_table == 1) {
|
|
717
|
-
dis0 = coarse_dis;
|
|
718
|
-
|
|
719
|
-
const float* s =
|
|
720
|
-
ivfpq.precomputed_table.data() + key * pq.ksub * pq.M;
|
|
721
|
-
for (int m = 0; m < pq.M; m++) {
|
|
722
|
-
sim_table_ptrs[m] = s;
|
|
723
|
-
s += pq.ksub;
|
|
724
|
-
}
|
|
725
|
-
} else if (use_precomputed_table == 2) {
|
|
726
|
-
dis0 = coarse_dis;
|
|
727
|
-
|
|
728
|
-
const MultiIndexQuantizer* miq =
|
|
729
|
-
dynamic_cast<const MultiIndexQuantizer*>(ivfpq.quantizer);
|
|
730
|
-
FAISS_THROW_IF_NOT(miq);
|
|
731
|
-
const ProductQuantizer& cpq = miq->pq;
|
|
732
|
-
int Mf = pq.M / cpq.M;
|
|
733
|
-
|
|
734
|
-
long k = key;
|
|
735
|
-
int m0 = 0;
|
|
736
|
-
for (int cm = 0; cm < cpq.M; cm++) {
|
|
737
|
-
int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
|
|
738
|
-
k >>= cpq.nbits;
|
|
739
|
-
|
|
740
|
-
const float* pc = ivfpq.precomputed_table.data() +
|
|
741
|
-
(ki * pq.M + cm * Mf) * pq.ksub;
|
|
742
|
-
|
|
743
|
-
for (int m = m0; m < m0 + Mf; m++) {
|
|
744
|
-
sim_table_ptrs[m] = pc;
|
|
745
|
-
pc += pq.ksub;
|
|
746
|
-
}
|
|
747
|
-
m0 += Mf;
|
|
748
|
-
}
|
|
749
|
-
} else {
|
|
750
|
-
FAISS_THROW_MSG("need precomputed tables");
|
|
751
|
-
}
|
|
752
|
-
|
|
753
|
-
if (polysemous_ht) {
|
|
754
|
-
FAISS_THROW_MSG("not implemented");
|
|
755
|
-
// Not clear that it makes sense to implemente this,
|
|
756
|
-
// because it costs M * ksub, which is what we wanted to
|
|
757
|
-
// avoid with the tables pointers.
|
|
758
|
-
}
|
|
759
|
-
|
|
760
|
-
return dis0;
|
|
761
|
-
}
|
|
762
|
-
};
|
|
763
|
-
|
|
764
|
-
template <class C, bool use_sel>
|
|
765
|
-
struct WrappedSearchResult {
|
|
766
|
-
ResultHandler& res;
|
|
767
|
-
size_t nup = 0;
|
|
768
|
-
idx_t list_no;
|
|
769
|
-
|
|
770
|
-
const idx_t* ids;
|
|
771
|
-
const IDSelector* sel;
|
|
772
|
-
|
|
773
|
-
WrappedSearchResult(
|
|
774
|
-
idx_t list_no,
|
|
775
|
-
const idx_t* ids,
|
|
776
|
-
const IDSelector* sel,
|
|
777
|
-
ResultHandler& res)
|
|
778
|
-
: res(res), list_no(list_no), ids(ids), sel(sel) {}
|
|
779
|
-
|
|
780
|
-
inline bool skip_entry(idx_t j) {
|
|
781
|
-
return use_sel && !sel->is_member(ids[j]);
|
|
782
|
-
}
|
|
783
|
-
|
|
784
|
-
inline void add(idx_t j, float dis) {
|
|
785
|
-
if (C::cmp(res.threshold, dis)) {
|
|
786
|
-
idx_t id = ids ? ids[j] : lo_build(this->list_no, j);
|
|
787
|
-
res.add_result(dis, id);
|
|
788
|
-
nup++;
|
|
789
|
-
}
|
|
790
|
-
}
|
|
791
|
-
};
|
|
792
|
-
|
|
793
|
-
/*****************************************************
|
|
794
|
-
* Scaning the codes.
|
|
795
|
-
* The scanning functions call their favorite precompute_*
|
|
796
|
-
* function to precompute the tables they need.
|
|
797
|
-
*****************************************************/
|
|
798
|
-
template <typename IDType, MetricType METRIC_TYPE, class PQCodeDist>
|
|
799
|
-
struct IVFPQScannerT : QueryTables {
|
|
800
|
-
using PQDecoder = typename PQCodeDist::PQDecoder;
|
|
801
|
-
const uint8_t* list_codes;
|
|
802
|
-
const IDType* list_ids;
|
|
803
|
-
size_t list_size;
|
|
804
|
-
|
|
805
|
-
IVFPQScannerT(const IndexIVFPQ& ivfpq, const IVFSearchParameters* params)
|
|
806
|
-
: QueryTables(ivfpq, params) {
|
|
807
|
-
assert(METRIC_TYPE == metric_type);
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
float dis0;
|
|
811
|
-
|
|
812
|
-
void init_list(idx_t list_no, float coarse_dis, int mode) {
|
|
813
|
-
this->key = list_no;
|
|
814
|
-
this->coarse_dis = coarse_dis;
|
|
815
|
-
|
|
816
|
-
if (mode == 2) {
|
|
817
|
-
dis0 = precompute_list_tables();
|
|
818
|
-
} else if (mode == 1) {
|
|
819
|
-
dis0 = precompute_list_table_pointers();
|
|
820
|
-
}
|
|
821
|
-
}
|
|
822
|
-
|
|
823
|
-
/*****************************************************
|
|
824
|
-
* Scaning the codes: simple PQ scan.
|
|
825
|
-
*****************************************************/
|
|
826
|
-
|
|
827
|
-
// This is the baseline version of scan_list_with_tables().
|
|
828
|
-
// It demonstrates what this function actually does.
|
|
829
|
-
//
|
|
830
|
-
// /// version of the scan where we use precomputed tables.
|
|
831
|
-
// template <class SearchResultType>
|
|
832
|
-
// void scan_list_with_table(
|
|
833
|
-
// size_t ncode,
|
|
834
|
-
// const uint8_t* codes,
|
|
835
|
-
// SearchResultType& res) const {
|
|
836
|
-
//
|
|
837
|
-
// for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
838
|
-
// if (res.skip_entry(j)) {
|
|
839
|
-
// continue;
|
|
840
|
-
// }
|
|
841
|
-
// float dis = dis0 + PQCodeDist::distance_single_code(
|
|
842
|
-
// pq, sim_table, codes);
|
|
843
|
-
// res.add(j, dis);
|
|
844
|
-
// }
|
|
845
|
-
// }
|
|
846
|
-
|
|
847
|
-
// This is the modified version of scan_list_with_tables().
|
|
848
|
-
// It was observed that doing manual unrolling of the loop that
|
|
849
|
-
// utilizes distance_single_code() speeds up the computations.
|
|
850
|
-
|
|
851
|
-
/// version of the scan where we use precomputed tables.
|
|
852
|
-
template <class SearchResultType>
|
|
853
|
-
void scan_list_with_table(
|
|
854
|
-
size_t ncode,
|
|
855
|
-
const uint8_t* codes,
|
|
856
|
-
SearchResultType& res) const {
|
|
857
|
-
int counter = 0;
|
|
858
|
-
|
|
859
|
-
size_t saved_j[4] = {0, 0, 0, 0};
|
|
860
|
-
for (size_t j = 0; j < ncode; j++) {
|
|
861
|
-
if (res.skip_entry(j)) {
|
|
862
|
-
continue;
|
|
863
|
-
}
|
|
864
|
-
|
|
865
|
-
saved_j[0] = (counter == 0) ? j : saved_j[0];
|
|
866
|
-
saved_j[1] = (counter == 1) ? j : saved_j[1];
|
|
867
|
-
saved_j[2] = (counter == 2) ? j : saved_j[2];
|
|
868
|
-
saved_j[3] = (counter == 3) ? j : saved_j[3];
|
|
869
|
-
|
|
870
|
-
counter += 1;
|
|
871
|
-
if (counter == 4) {
|
|
872
|
-
float distance_0 = 0;
|
|
873
|
-
float distance_1 = 0;
|
|
874
|
-
float distance_2 = 0;
|
|
875
|
-
float distance_3 = 0;
|
|
876
|
-
PQCodeDist::distance_four_codes(
|
|
877
|
-
pq.M,
|
|
878
|
-
pq.nbits,
|
|
879
|
-
sim_table,
|
|
880
|
-
codes + saved_j[0] * pq.code_size,
|
|
881
|
-
codes + saved_j[1] * pq.code_size,
|
|
882
|
-
codes + saved_j[2] * pq.code_size,
|
|
883
|
-
codes + saved_j[3] * pq.code_size,
|
|
884
|
-
distance_0,
|
|
885
|
-
distance_1,
|
|
886
|
-
distance_2,
|
|
887
|
-
distance_3);
|
|
888
|
-
|
|
889
|
-
res.add(saved_j[0], dis0 + distance_0);
|
|
890
|
-
res.add(saved_j[1], dis0 + distance_1);
|
|
891
|
-
res.add(saved_j[2], dis0 + distance_2);
|
|
892
|
-
res.add(saved_j[3], dis0 + distance_3);
|
|
893
|
-
counter = 0;
|
|
894
|
-
}
|
|
895
|
-
}
|
|
896
|
-
|
|
897
|
-
if (counter >= 1) {
|
|
898
|
-
float dis = dis0 +
|
|
899
|
-
PQCodeDist::distance_single_code(
|
|
900
|
-
pq.M,
|
|
901
|
-
pq.nbits,
|
|
902
|
-
sim_table,
|
|
903
|
-
codes + saved_j[0] * pq.code_size);
|
|
904
|
-
res.add(saved_j[0], dis);
|
|
905
|
-
}
|
|
906
|
-
if (counter >= 2) {
|
|
907
|
-
float dis = dis0 +
|
|
908
|
-
PQCodeDist::distance_single_code(
|
|
909
|
-
pq.M,
|
|
910
|
-
pq.nbits,
|
|
911
|
-
sim_table,
|
|
912
|
-
codes + saved_j[1] * pq.code_size);
|
|
913
|
-
res.add(saved_j[1], dis);
|
|
914
|
-
}
|
|
915
|
-
if (counter >= 3) {
|
|
916
|
-
float dis = dis0 +
|
|
917
|
-
PQCodeDist::distance_single_code(
|
|
918
|
-
pq.M,
|
|
919
|
-
pq.nbits,
|
|
920
|
-
sim_table,
|
|
921
|
-
codes + saved_j[2] * pq.code_size);
|
|
922
|
-
res.add(saved_j[2], dis);
|
|
923
|
-
}
|
|
924
|
-
}
|
|
925
|
-
|
|
926
|
-
/// tables are not precomputed, but pointers are provided to the
|
|
927
|
-
/// relevant X_c|x_r tables
|
|
928
|
-
template <class SearchResultType>
|
|
929
|
-
void scan_list_with_pointer(
|
|
930
|
-
size_t ncode,
|
|
931
|
-
const uint8_t* codes,
|
|
932
|
-
SearchResultType& res) const {
|
|
933
|
-
for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
934
|
-
if (res.skip_entry(j)) {
|
|
935
|
-
continue;
|
|
936
|
-
}
|
|
937
|
-
PQDecoder decoder(codes, pq.nbits);
|
|
938
|
-
float dis = dis0;
|
|
939
|
-
const float* tab = sim_table_2;
|
|
940
|
-
|
|
941
|
-
for (size_t m = 0; m < pq.M; m++) {
|
|
942
|
-
int ci = decoder.decode();
|
|
943
|
-
dis += sim_table_ptrs[m][ci] - 2 * tab[ci];
|
|
944
|
-
tab += pq.ksub;
|
|
945
|
-
}
|
|
946
|
-
res.add(j, dis);
|
|
947
|
-
}
|
|
948
|
-
}
|
|
949
|
-
|
|
950
|
-
/// nothing is precomputed: access residuals on-the-fly
|
|
951
|
-
template <class SearchResultType>
|
|
952
|
-
void scan_on_the_fly_dist(
|
|
953
|
-
size_t ncode,
|
|
954
|
-
const uint8_t* codes,
|
|
955
|
-
SearchResultType& res) const {
|
|
956
|
-
const float* dvec;
|
|
957
|
-
float dis0 = 0;
|
|
958
|
-
if (by_residual) {
|
|
959
|
-
if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
|
|
960
|
-
ivfpq.quantizer->reconstruct(key, residual_vec);
|
|
961
|
-
dis0 = fvec_inner_product_dispatch(residual_vec, qi, d);
|
|
962
|
-
} else {
|
|
963
|
-
ivfpq.quantizer->compute_residual(qi, residual_vec, key);
|
|
964
|
-
}
|
|
965
|
-
dvec = residual_vec;
|
|
966
|
-
} else {
|
|
967
|
-
dvec = qi;
|
|
968
|
-
dis0 = 0;
|
|
969
|
-
}
|
|
970
|
-
|
|
971
|
-
for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
972
|
-
if (res.skip_entry(j)) {
|
|
973
|
-
continue;
|
|
974
|
-
}
|
|
975
|
-
pq.decode(codes, decoded_vec);
|
|
976
|
-
|
|
977
|
-
float dis;
|
|
978
|
-
if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
|
|
979
|
-
dis = dis0 + fvec_inner_product_dispatch(decoded_vec, qi, d);
|
|
980
|
-
} else {
|
|
981
|
-
dis = fvec_L2sqr_dispatch(decoded_vec, dvec, d);
|
|
982
|
-
}
|
|
983
|
-
res.add(j, dis);
|
|
984
|
-
}
|
|
985
|
-
}
|
|
986
|
-
|
|
987
|
-
/*****************************************************
|
|
988
|
-
* Scanning codes with polysemous filtering
|
|
989
|
-
*****************************************************/
|
|
990
|
-
|
|
991
|
-
// This is the baseline version of scan_list_polysemous_hc().
|
|
992
|
-
// It demonstrates what this function actually does.
|
|
993
|
-
|
|
994
|
-
// template <class HammingComputer, class SearchResultType>
|
|
995
|
-
// void scan_list_polysemous_hc(
|
|
996
|
-
// size_t ncode,
|
|
997
|
-
// const uint8_t* codes,
|
|
998
|
-
// SearchResultType& res) const {
|
|
999
|
-
// int ht = ivfpq.polysemous_ht;
|
|
1000
|
-
// size_t n_hamming_pass = 0, nup = 0;
|
|
1001
|
-
//
|
|
1002
|
-
// int code_size = pq.code_size;
|
|
1003
|
-
//
|
|
1004
|
-
// HammingComputer hc(q_code.data(), code_size);
|
|
1005
|
-
//
|
|
1006
|
-
// for (size_t j = 0; j < ncode; j++, codes += code_size) {
|
|
1007
|
-
// if (res.skip_entry(j)) {
|
|
1008
|
-
// continue;
|
|
1009
|
-
// }
|
|
1010
|
-
// const uint8_t* b_code = codes;
|
|
1011
|
-
// int hd = hc.hamming(b_code);
|
|
1012
|
-
// if (hd < ht) {
|
|
1013
|
-
// n_hamming_pass++;
|
|
1014
|
-
//
|
|
1015
|
-
// float dis =
|
|
1016
|
-
// dis0 +
|
|
1017
|
-
// PQCodeDist::distance_single_code(
|
|
1018
|
-
// pq, sim_table, codes);
|
|
1019
|
-
//
|
|
1020
|
-
// res.add(j, dis);
|
|
1021
|
-
// }
|
|
1022
|
-
// }
|
|
1023
|
-
// #pragma omp critical
|
|
1024
|
-
// { indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
|
|
1025
|
-
// }
|
|
1026
|
-
|
|
1027
|
-
// This is the modified version of scan_list_with_tables().
|
|
1028
|
-
// It was observed that doing manual unrolling of the loop that
|
|
1029
|
-
// utilizes distance_single_code() speeds up the computations.
|
|
1030
|
-
|
|
1031
|
-
template <class HammingComputer, class SearchResultType>
|
|
1032
|
-
void scan_list_polysemous_hc(
|
|
1033
|
-
size_t ncode,
|
|
1034
|
-
const uint8_t* codes,
|
|
1035
|
-
SearchResultType& res) const {
|
|
1036
|
-
int ht = ivfpq.polysemous_ht;
|
|
1037
|
-
size_t n_hamming_pass = 0;
|
|
1038
|
-
|
|
1039
|
-
int code_size = pq.code_size;
|
|
1040
|
-
|
|
1041
|
-
size_t saved_j[8];
|
|
1042
|
-
int counter = 0;
|
|
1043
|
-
|
|
1044
|
-
HammingComputer hc(q_code.data(), code_size);
|
|
1045
|
-
|
|
1046
|
-
for (size_t j = 0; j < (ncode / 4) * 4; j += 4) {
|
|
1047
|
-
const uint8_t* b_code = codes + j * code_size;
|
|
1048
|
-
|
|
1049
|
-
// Unrolling is a key. Basically, doing multiple popcount
|
|
1050
|
-
// operations one after another speeds things up.
|
|
1051
|
-
|
|
1052
|
-
// 9999999 is just an arbitrary large number
|
|
1053
|
-
int hd0 = (res.skip_entry(j + 0))
|
|
1054
|
-
? 99999999
|
|
1055
|
-
: hc.hamming(b_code + 0 * code_size);
|
|
1056
|
-
int hd1 = (res.skip_entry(j + 1))
|
|
1057
|
-
? 99999999
|
|
1058
|
-
: hc.hamming(b_code + 1 * code_size);
|
|
1059
|
-
int hd2 = (res.skip_entry(j + 2))
|
|
1060
|
-
? 99999999
|
|
1061
|
-
: hc.hamming(b_code + 2 * code_size);
|
|
1062
|
-
int hd3 = (res.skip_entry(j + 3))
|
|
1063
|
-
? 99999999
|
|
1064
|
-
: hc.hamming(b_code + 3 * code_size);
|
|
1065
|
-
|
|
1066
|
-
saved_j[counter] = j + 0;
|
|
1067
|
-
counter = (hd0 < ht) ? (counter + 1) : counter;
|
|
1068
|
-
saved_j[counter] = j + 1;
|
|
1069
|
-
counter = (hd1 < ht) ? (counter + 1) : counter;
|
|
1070
|
-
saved_j[counter] = j + 2;
|
|
1071
|
-
counter = (hd2 < ht) ? (counter + 1) : counter;
|
|
1072
|
-
saved_j[counter] = j + 3;
|
|
1073
|
-
counter = (hd3 < ht) ? (counter + 1) : counter;
|
|
1074
|
-
|
|
1075
|
-
if (counter >= 4) {
|
|
1076
|
-
// process four codes at the same time
|
|
1077
|
-
n_hamming_pass += 4;
|
|
1078
|
-
|
|
1079
|
-
float distance_0 = dis0;
|
|
1080
|
-
float distance_1 = dis0;
|
|
1081
|
-
float distance_2 = dis0;
|
|
1082
|
-
float distance_3 = dis0;
|
|
1083
|
-
PQCodeDist::distance_four_codes(
|
|
1084
|
-
pq.M,
|
|
1085
|
-
pq.nbits,
|
|
1086
|
-
sim_table,
|
|
1087
|
-
codes + saved_j[0] * pq.code_size,
|
|
1088
|
-
codes + saved_j[1] * pq.code_size,
|
|
1089
|
-
codes + saved_j[2] * pq.code_size,
|
|
1090
|
-
codes + saved_j[3] * pq.code_size,
|
|
1091
|
-
distance_0,
|
|
1092
|
-
distance_1,
|
|
1093
|
-
distance_2,
|
|
1094
|
-
distance_3);
|
|
1095
|
-
|
|
1096
|
-
res.add(saved_j[0], dis0 + distance_0);
|
|
1097
|
-
res.add(saved_j[1], dis0 + distance_1);
|
|
1098
|
-
res.add(saved_j[2], dis0 + distance_2);
|
|
1099
|
-
res.add(saved_j[3], dis0 + distance_3);
|
|
1100
|
-
|
|
1101
|
-
//
|
|
1102
|
-
counter -= 4;
|
|
1103
|
-
saved_j[0] = saved_j[4];
|
|
1104
|
-
saved_j[1] = saved_j[5];
|
|
1105
|
-
saved_j[2] = saved_j[6];
|
|
1106
|
-
saved_j[3] = saved_j[7];
|
|
1107
|
-
}
|
|
1108
|
-
}
|
|
1109
|
-
|
|
1110
|
-
for (size_t kk = 0; kk < counter; kk++) {
|
|
1111
|
-
n_hamming_pass++;
|
|
1112
|
-
|
|
1113
|
-
float dis = dis0 +
|
|
1114
|
-
PQCodeDist::distance_single_code(
|
|
1115
|
-
pq.M,
|
|
1116
|
-
pq.nbits,
|
|
1117
|
-
sim_table,
|
|
1118
|
-
codes + saved_j[kk] * pq.code_size);
|
|
1119
|
-
|
|
1120
|
-
res.add(saved_j[kk], dis);
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
// process leftovers
|
|
1124
|
-
for (size_t j = (ncode / 4) * 4; j < ncode; j++) {
|
|
1125
|
-
if (res.skip_entry(j)) {
|
|
1126
|
-
continue;
|
|
1127
|
-
}
|
|
1128
|
-
const uint8_t* b_code = codes + j * code_size;
|
|
1129
|
-
int hd = hc.hamming(b_code);
|
|
1130
|
-
if (hd < ht) {
|
|
1131
|
-
n_hamming_pass++;
|
|
1132
|
-
|
|
1133
|
-
float dis = dis0 +
|
|
1134
|
-
PQCodeDist::distance_single_code(
|
|
1135
|
-
pq.M,
|
|
1136
|
-
pq.nbits,
|
|
1137
|
-
sim_table,
|
|
1138
|
-
codes + j * code_size);
|
|
1139
|
-
|
|
1140
|
-
res.add(j, dis);
|
|
1141
|
-
}
|
|
1142
|
-
}
|
|
1143
|
-
|
|
1144
|
-
#pragma omp critical
|
|
1145
|
-
{
|
|
1146
|
-
indexIVFPQ_stats.n_hamming_pass += n_hamming_pass;
|
|
1147
|
-
}
|
|
1148
|
-
}
|
|
1149
|
-
|
|
1150
|
-
template <class SearchResultType>
|
|
1151
|
-
struct Run_scan_list_polysemous_hc {
|
|
1152
|
-
using T = void;
|
|
1153
|
-
template <class HammingComputer, class... Types>
|
|
1154
|
-
void f(const IVFPQScannerT* scanner, Types... args) {
|
|
1155
|
-
scanner->scan_list_polysemous_hc<HammingComputer, SearchResultType>(
|
|
1156
|
-
args...);
|
|
1157
|
-
}
|
|
1158
|
-
};
|
|
1159
|
-
|
|
1160
|
-
template <class SearchResultType>
|
|
1161
|
-
void scan_list_polysemous(
|
|
1162
|
-
size_t ncode,
|
|
1163
|
-
const uint8_t* codes,
|
|
1164
|
-
SearchResultType& res) const {
|
|
1165
|
-
Run_scan_list_polysemous_hc<SearchResultType> r;
|
|
1166
|
-
dispatch_HammingComputer(pq.code_size, r, this, ncode, codes, res);
|
|
1167
|
-
}
|
|
1168
|
-
};
|
|
1169
|
-
|
|
1170
|
-
/* We put as many parameters as possible in template. Hopefully the
|
|
1171
|
-
* gain in runtime is worth the code bloat.
|
|
1172
|
-
*
|
|
1173
|
-
* C is the comparator < or >, it is directly related to METRIC_TYPE.
|
|
1174
|
-
*
|
|
1175
|
-
* precompute_mode is how much we precompute (2 = precompute distance tables,
|
|
1176
|
-
* 1 = precompute pointers to distances, 0 = compute distances one by one).
|
|
1177
|
-
* Currently only 2 is supported
|
|
1178
|
-
*
|
|
1179
|
-
* use_sel: store or ignore the IDSelector
|
|
1180
|
-
*/
|
|
1181
|
-
template <MetricType METRIC_TYPE, class C, class PQCodeDist, bool use_sel>
|
|
1182
|
-
struct IVFPQScanner : IVFPQScannerT<idx_t, METRIC_TYPE, PQCodeDist>,
|
|
1183
|
-
InvertedListScanner {
|
|
1184
|
-
int precompute_mode;
|
|
1185
|
-
const IDSelector* sel;
|
|
1186
|
-
|
|
1187
|
-
IVFPQScanner(
|
|
1188
|
-
const IndexIVFPQ& ivfpq,
|
|
1189
|
-
bool store_pairs,
|
|
1190
|
-
int precompute_mode,
|
|
1191
|
-
const IDSelector* sel)
|
|
1192
|
-
: IVFPQScannerT<idx_t, METRIC_TYPE, PQCodeDist>(ivfpq, nullptr),
|
|
1193
|
-
precompute_mode(precompute_mode),
|
|
1194
|
-
sel(sel) {
|
|
1195
|
-
this->store_pairs = store_pairs;
|
|
1196
|
-
this->keep_max = is_similarity_metric(METRIC_TYPE);
|
|
1197
|
-
this->code_size = this->pq.code_size;
|
|
1198
|
-
}
|
|
1199
|
-
|
|
1200
|
-
void set_query(const float* query) override {
|
|
1201
|
-
this->init_query(query);
|
|
1202
|
-
}
|
|
1203
|
-
|
|
1204
|
-
void set_list(idx_t list_no, float coarse_dis) override {
|
|
1205
|
-
this->list_no = list_no;
|
|
1206
|
-
this->init_list(list_no, coarse_dis, precompute_mode);
|
|
1207
|
-
}
|
|
1208
|
-
|
|
1209
|
-
float distance_to_code(const uint8_t* code) const override {
|
|
1210
|
-
assert(precompute_mode == 2);
|
|
1211
|
-
float dis = this->dis0 +
|
|
1212
|
-
PQCodeDist::distance_single_code(
|
|
1213
|
-
this->pq.M, this->pq.nbits, this->sim_table, code);
|
|
1214
|
-
return dis;
|
|
1215
|
-
}
|
|
1216
|
-
|
|
1217
|
-
size_t scan_codes(
|
|
1218
|
-
size_t ncode,
|
|
1219
|
-
const uint8_t* codes,
|
|
1220
|
-
const idx_t* ids,
|
|
1221
|
-
ResultHandler& handler) const override {
|
|
1222
|
-
WrappedSearchResult<C, use_sel> res(
|
|
1223
|
-
this->key,
|
|
1224
|
-
this->store_pairs ? nullptr : ids,
|
|
1225
|
-
this->sel,
|
|
1226
|
-
handler);
|
|
1227
|
-
|
|
1228
|
-
if (this->polysemous_ht > 0) {
|
|
1229
|
-
assert(precompute_mode == 2);
|
|
1230
|
-
this->scan_list_polysemous(ncode, codes, res);
|
|
1231
|
-
} else if (precompute_mode == 2) {
|
|
1232
|
-
this->scan_list_with_table(ncode, codes, res);
|
|
1233
|
-
} else if (precompute_mode == 1) {
|
|
1234
|
-
this->scan_list_with_pointer(ncode, codes, res);
|
|
1235
|
-
} else if (precompute_mode == 0) {
|
|
1236
|
-
this->scan_on_the_fly_dist(ncode, codes, res);
|
|
1237
|
-
} else {
|
|
1238
|
-
FAISS_THROW_MSG("bad precomp mode");
|
|
1239
|
-
}
|
|
1240
|
-
return res.nup;
|
|
1241
|
-
}
|
|
1242
|
-
};
|
|
1243
|
-
|
|
1244
|
-
} // anonymous namespace
|
|
1245
|
-
|
|
1246
508
|
InvertedListScanner* IndexIVFPQ::get_InvertedListScanner(
|
|
1247
509
|
bool store_pairs,
|
|
1248
510
|
const IDSelector* sel,
|
|
1249
511
|
const IVFSearchParameters*) const {
|
|
1250
512
|
return with_simd_level([&]<SIMDLevel SL>() -> InvertedListScanner* {
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
if (metric_type == METRIC_INNER_PRODUCT) {
|
|
1254
|
-
return new IVFPQScanner<
|
|
1255
|
-
METRIC_INNER_PRODUCT,
|
|
1256
|
-
CMin<float, idx_t>,
|
|
1257
|
-
PQCodeDist,
|
|
1258
|
-
use_sel>(*this, store_pairs, 2, sel);
|
|
1259
|
-
} else if (metric_type == METRIC_L2) {
|
|
1260
|
-
return new IVFPQScanner<
|
|
1261
|
-
METRIC_L2,
|
|
1262
|
-
CMax<float, idx_t>,
|
|
1263
|
-
PQCodeDist,
|
|
1264
|
-
use_sel>(*this, store_pairs, 2, sel);
|
|
1265
|
-
} else {
|
|
1266
|
-
FAISS_THROW_MSG("unsupported metric type");
|
|
1267
|
-
}
|
|
1268
|
-
};
|
|
1269
|
-
|
|
1270
|
-
auto with_decoder = [&]<bool use_sel>() -> InvertedListScanner* {
|
|
1271
|
-
if (pq.nbits == 8) {
|
|
1272
|
-
return make.template
|
|
1273
|
-
operator()<PQCodeDistance<PQDecoder8, SL>, use_sel>();
|
|
1274
|
-
} else if (pq.nbits == 16) {
|
|
1275
|
-
return make.template
|
|
1276
|
-
operator()<PQCodeDistance<PQDecoder16, SL>, use_sel>();
|
|
1277
|
-
} else {
|
|
1278
|
-
return make.template
|
|
1279
|
-
operator()<PQCodeDistance<PQDecoderGeneric, SL>, use_sel>();
|
|
1280
|
-
}
|
|
1281
|
-
};
|
|
1282
|
-
|
|
1283
|
-
if (sel) {
|
|
1284
|
-
return with_decoder.template operator()<true>();
|
|
1285
|
-
} else {
|
|
1286
|
-
return with_decoder.template operator()<false>();
|
|
1287
|
-
}
|
|
513
|
+
return pq_code_distance::make_IVFPQInvertedListScanner<SL>(
|
|
514
|
+
*this, store_pairs, sel);
|
|
1288
515
|
});
|
|
1289
516
|
}
|
|
1290
517
|
|
|
@@ -1320,25 +547,26 @@ size_t IndexIVFPQ::find_duplicates(idx_t* dup_ids, size_t* lims) const {
|
|
|
1320
547
|
for (size_t list_no = 0; list_no < nlist; list_no++) {
|
|
1321
548
|
size_t n = invlists->list_size(list_no);
|
|
1322
549
|
std::vector<int> ord(n);
|
|
1323
|
-
for (
|
|
1324
|
-
ord[i] = i;
|
|
550
|
+
for (size_t i = 0; i < n; i++) {
|
|
551
|
+
ord[i] = static_cast<int>(i);
|
|
552
|
+
}
|
|
1325
553
|
InvertedLists::ScopedCodes codes(invlists, list_no);
|
|
1326
554
|
CodeCmp cs = {codes.get(), code_size};
|
|
1327
555
|
std::sort(ord.begin(), ord.end(), cs);
|
|
1328
556
|
|
|
1329
557
|
InvertedLists::ScopedIds list_ids(invlists, list_no);
|
|
1330
558
|
int prev = -1; // all elements from prev to i-1 are equal
|
|
1331
|
-
for (
|
|
559
|
+
for (size_t i = 0; i < n; i++) {
|
|
1332
560
|
if (prev >= 0 && cs.cmp(ord[prev], ord[i]) == 0) {
|
|
1333
561
|
// same as previous => remember
|
|
1334
|
-
if (prev + 1 == i) { // start new group
|
|
562
|
+
if (static_cast<size_t>(prev + 1) == i) { // start new group
|
|
1335
563
|
ngroup++;
|
|
1336
564
|
lims[ngroup] = lims[ngroup - 1];
|
|
1337
565
|
dup_ids[lims[ngroup]++] = list_ids[ord[prev]];
|
|
1338
566
|
}
|
|
1339
567
|
dup_ids[lims[ngroup]++] = list_ids[ord[i]];
|
|
1340
568
|
} else { // not same as previous.
|
|
1341
|
-
prev = i;
|
|
569
|
+
prev = static_cast<int>(i);
|
|
1342
570
|
}
|
|
1343
571
|
}
|
|
1344
572
|
}
|