faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
#include <cinttypes>
|
|
11
11
|
#include <cstddef>
|
|
12
|
+
#include <cstdlib>
|
|
12
13
|
|
|
13
14
|
#include <faiss/IndexHNSW.h>
|
|
14
15
|
|
|
@@ -16,13 +17,7 @@
|
|
|
16
17
|
#include <faiss/impl/IDSelector.h>
|
|
17
18
|
#include <faiss/impl/ResultHandler.h>
|
|
18
19
|
#include <faiss/impl/VisitedTable.h>
|
|
19
|
-
|
|
20
|
-
#ifdef __AVX2__
|
|
21
|
-
#include <immintrin.h>
|
|
22
|
-
|
|
23
|
-
#include <limits>
|
|
24
|
-
#include <type_traits>
|
|
25
|
-
#endif
|
|
20
|
+
#include <faiss/impl/hnsw/MinimaxHeap.h>
|
|
26
21
|
|
|
27
22
|
namespace faiss {
|
|
28
23
|
|
|
@@ -31,7 +26,8 @@ namespace faiss {
|
|
|
31
26
|
**************************************************************/
|
|
32
27
|
|
|
33
28
|
int HNSW::nb_neighbors(int layer_no) const {
|
|
34
|
-
FAISS_THROW_IF_NOT(
|
|
29
|
+
FAISS_THROW_IF_NOT(
|
|
30
|
+
static_cast<size_t>(layer_no + 1) < cum_nneighbor_per_level.size());
|
|
35
31
|
return cum_nneighbor_per_level[layer_no + 1] -
|
|
36
32
|
cum_nneighbor_per_level[layer_no];
|
|
37
33
|
}
|
|
@@ -39,7 +35,7 @@ int HNSW::nb_neighbors(int layer_no) const {
|
|
|
39
35
|
void HNSW::set_nb_neighbors(int level_no, int n) {
|
|
40
36
|
FAISS_THROW_IF_NOT(levels.size() == 0);
|
|
41
37
|
int cur_n = nb_neighbors(level_no);
|
|
42
|
-
for (
|
|
38
|
+
for (size_t i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) {
|
|
43
39
|
cum_nneighbor_per_level[i] += n - cur_n;
|
|
44
40
|
}
|
|
45
41
|
}
|
|
@@ -67,7 +63,7 @@ HNSW::HNSW(int M) : rng(12345) {
|
|
|
67
63
|
int HNSW::random_level() {
|
|
68
64
|
double f = rng.rand_float();
|
|
69
65
|
// could be a bit faster with bisection
|
|
70
|
-
for (
|
|
66
|
+
for (size_t level = 0; level < assign_probas.size(); level++) {
|
|
71
67
|
if (f < assign_probas[level]) {
|
|
72
68
|
return level;
|
|
73
69
|
}
|
|
@@ -92,7 +88,7 @@ void HNSW::set_default_probas(int M, float levelMult) {
|
|
|
92
88
|
}
|
|
93
89
|
|
|
94
90
|
void HNSW::clear_neighbor_tables(int level) {
|
|
95
|
-
for (
|
|
91
|
+
for (size_t i = 0; i < levels.size(); i++) {
|
|
96
92
|
size_t begin, end;
|
|
97
93
|
neighbor_range(i, level, &begin, &end);
|
|
98
94
|
for (size_t j = begin; j < end; j++) {
|
|
@@ -111,14 +107,15 @@ void HNSW::reset() {
|
|
|
111
107
|
}
|
|
112
108
|
|
|
113
109
|
void HNSW::print_neighbor_stats(int level) const {
|
|
114
|
-
FAISS_THROW_IF_NOT(
|
|
110
|
+
FAISS_THROW_IF_NOT(
|
|
111
|
+
static_cast<size_t>(level) < cum_nneighbor_per_level.size());
|
|
115
112
|
printf("stats on level %d, max %d neighbors per vertex:\n",
|
|
116
113
|
level,
|
|
117
114
|
nb_neighbors(level));
|
|
118
115
|
size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
|
|
119
116
|
#pragma omp parallel for reduction(+ : tot_neigh) reduction(+ : tot_common) \
|
|
120
117
|
reduction(+ : tot_reciprocal) reduction(+ : n_node)
|
|
121
|
-
for (
|
|
118
|
+
for (idx_t i = 0; i < static_cast<idx_t>(levels.size()); i++) {
|
|
122
119
|
if (levels[i] > level) {
|
|
123
120
|
n_node++;
|
|
124
121
|
size_t begin, end;
|
|
@@ -130,7 +127,7 @@ void HNSW::print_neighbor_stats(int level) const {
|
|
|
130
127
|
}
|
|
131
128
|
neighset.insert(neighbors[j]);
|
|
132
129
|
}
|
|
133
|
-
|
|
130
|
+
size_t n_neigh = neighset.size();
|
|
134
131
|
int n_common = 0;
|
|
135
132
|
int n_reciprocal = 0;
|
|
136
133
|
for (size_t j = begin; j < end; j++) {
|
|
@@ -179,7 +176,7 @@ void HNSW::fill_with_random_links(size_t n) {
|
|
|
179
176
|
|
|
180
177
|
for (int level = max_level_2 - 1; level >= 0; --level) {
|
|
181
178
|
std::vector<int> elts;
|
|
182
|
-
for (
|
|
179
|
+
for (size_t i = 0; i < n; i++) {
|
|
183
180
|
if (levels[i] > level) {
|
|
184
181
|
elts.push_back(i);
|
|
185
182
|
}
|
|
@@ -190,10 +187,10 @@ void HNSW::fill_with_random_links(size_t n) {
|
|
|
190
187
|
continue;
|
|
191
188
|
}
|
|
192
189
|
|
|
193
|
-
for (
|
|
190
|
+
for (size_t ii = 0; ii < elts.size(); ii++) {
|
|
194
191
|
int i = elts[ii];
|
|
195
192
|
size_t begin, end;
|
|
196
|
-
neighbor_range(i,
|
|
193
|
+
neighbor_range(i, level, &begin, &end);
|
|
197
194
|
for (size_t j = begin; j < end; j++) {
|
|
198
195
|
int other = 0;
|
|
199
196
|
do {
|
|
@@ -213,14 +210,14 @@ int HNSW::prepare_level_tab(size_t n, bool preset_levels) {
|
|
|
213
210
|
FAISS_ASSERT(n0 + n == levels.size());
|
|
214
211
|
} else {
|
|
215
212
|
FAISS_ASSERT(n0 == levels.size());
|
|
216
|
-
for (
|
|
213
|
+
for (size_t i = 0; i < n; i++) {
|
|
217
214
|
int pt_level = random_level();
|
|
218
215
|
levels.push_back(pt_level + 1);
|
|
219
216
|
}
|
|
220
217
|
}
|
|
221
218
|
|
|
222
219
|
int max_level_2 = 0;
|
|
223
|
-
for (
|
|
220
|
+
for (size_t i = 0; i < n; i++) {
|
|
224
221
|
int pt_level = levels[i + n0] - 1;
|
|
225
222
|
if (pt_level > max_level_2) {
|
|
226
223
|
max_level_2 = pt_level;
|
|
@@ -240,7 +237,7 @@ void HNSW::shrink_neighbor_list(
|
|
|
240
237
|
DistanceComputer& qdis,
|
|
241
238
|
std::priority_queue<NodeDistFarther>& input,
|
|
242
239
|
std::vector<NodeDistFarther>& output,
|
|
243
|
-
|
|
240
|
+
size_t max_size,
|
|
244
241
|
bool keep_max_size_level0) {
|
|
245
242
|
// This prevents number of neighbors at
|
|
246
243
|
// level 0 from being shrunk to less than 2 * M.
|
|
@@ -265,7 +262,7 @@ void HNSW::shrink_neighbor_list(
|
|
|
265
262
|
|
|
266
263
|
if (good) {
|
|
267
264
|
output.push_back(v1);
|
|
268
|
-
if (output.size() >= max_size) {
|
|
265
|
+
if (output.size() >= static_cast<size_t>(max_size)) {
|
|
269
266
|
return;
|
|
270
267
|
}
|
|
271
268
|
} else if (keep_max_size_level0) {
|
|
@@ -273,7 +270,8 @@ void HNSW::shrink_neighbor_list(
|
|
|
273
270
|
}
|
|
274
271
|
}
|
|
275
272
|
size_t idx = 0;
|
|
276
|
-
while (keep_max_size_level0 &&
|
|
273
|
+
while (keep_max_size_level0 &&
|
|
274
|
+
(output.size() < static_cast<size_t>(max_size)) &&
|
|
277
275
|
(idx < outsiders.size())) {
|
|
278
276
|
output.push_back(outsiders[idx++]);
|
|
279
277
|
}
|
|
@@ -293,9 +291,9 @@ using NodeDistFarther = HNSW::NodeDistFarther;
|
|
|
293
291
|
void shrink_neighbor_list(
|
|
294
292
|
DistanceComputer& qdis,
|
|
295
293
|
std::priority_queue<NodeDistCloser>& resultSet1,
|
|
296
|
-
|
|
294
|
+
size_t max_size,
|
|
297
295
|
bool keep_max_size_level0 = false) {
|
|
298
|
-
if (resultSet1.size() < max_size) {
|
|
296
|
+
if (resultSet1.size() < static_cast<size_t>(max_size)) {
|
|
299
297
|
return;
|
|
300
298
|
}
|
|
301
299
|
std::priority_queue<NodeDistFarther> resultSet;
|
|
@@ -348,7 +346,9 @@ void add_link(
|
|
|
348
346
|
resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh);
|
|
349
347
|
}
|
|
350
348
|
|
|
351
|
-
|
|
349
|
+
size_t max_size = end - begin;
|
|
350
|
+
max_size -= max_size * std::clamp(hnsw.prune_headroom, 0.0f, 0.5f);
|
|
351
|
+
shrink_neighbor_list(qdis, resultSet, max_size, keep_max_size_level0);
|
|
352
352
|
|
|
353
353
|
// ...and back
|
|
354
354
|
size_t i = begin;
|
|
@@ -418,11 +418,12 @@ void search_neighbors_to_add(
|
|
|
418
418
|
float dis = qdis(nodeId);
|
|
419
419
|
NodeDistFarther evE1(dis, nodeId);
|
|
420
420
|
|
|
421
|
-
if (results.size() < hnsw.efConstruction ||
|
|
421
|
+
if (results.size() < static_cast<size_t>(hnsw.efConstruction) ||
|
|
422
422
|
results.top().d > dis) {
|
|
423
423
|
results.emplace(dis, nodeId);
|
|
424
424
|
candidates.emplace(dis, nodeId);
|
|
425
|
-
if (results.size() >
|
|
425
|
+
if (results.size() >
|
|
426
|
+
static_cast<size_t>(hnsw.efConstruction)) {
|
|
426
427
|
results.pop();
|
|
427
428
|
}
|
|
428
429
|
}
|
|
@@ -433,11 +434,12 @@ void search_neighbors_to_add(
|
|
|
433
434
|
// the following version processes 4 neighbors at a time
|
|
434
435
|
auto update_with_candidate = [&](const storage_idx_t idx,
|
|
435
436
|
const float dis) {
|
|
436
|
-
if (results.size() < hnsw.efConstruction ||
|
|
437
|
+
if (results.size() < static_cast<size_t>(hnsw.efConstruction) ||
|
|
437
438
|
results.top().d > dis) {
|
|
438
439
|
results.emplace(dis, idx);
|
|
439
440
|
candidates.emplace(dis, idx);
|
|
440
|
-
if (results.size() >
|
|
441
|
+
if (results.size() >
|
|
442
|
+
static_cast<size_t>(hnsw.efConstruction)) {
|
|
441
443
|
results.pop();
|
|
442
444
|
}
|
|
443
445
|
}
|
|
@@ -479,7 +481,7 @@ void search_neighbors_to_add(
|
|
|
479
481
|
}
|
|
480
482
|
|
|
481
483
|
// process leftovers
|
|
482
|
-
for (
|
|
484
|
+
for (int icnt = 0; icnt < n_buffered; icnt++) {
|
|
483
485
|
float dis = qdis(buffered_ids[icnt]);
|
|
484
486
|
update_with_candidate(buffered_ids[icnt], dis);
|
|
485
487
|
}
|
|
@@ -497,7 +499,7 @@ void HNSW::add_links_starting_from(
|
|
|
497
499
|
storage_idx_t nearest,
|
|
498
500
|
float d_nearest,
|
|
499
501
|
int level,
|
|
500
|
-
|
|
502
|
+
LockVector& locks,
|
|
501
503
|
VisitedTable& vt,
|
|
502
504
|
bool keep_max_size_level0) {
|
|
503
505
|
std::priority_queue<NodeDistCloser> link_targets;
|
|
@@ -519,13 +521,13 @@ void HNSW::add_links_starting_from(
|
|
|
519
521
|
link_targets.pop();
|
|
520
522
|
}
|
|
521
523
|
|
|
522
|
-
|
|
524
|
+
locks.unlock(pt_id);
|
|
523
525
|
for (storage_idx_t other_id : neighbors_to_add) {
|
|
524
|
-
|
|
526
|
+
locks.lock(other_id);
|
|
525
527
|
add_link(*this, ptdis, other_id, pt_id, level, keep_max_size_level0);
|
|
526
|
-
|
|
528
|
+
locks.unlock(other_id);
|
|
527
529
|
}
|
|
528
|
-
|
|
530
|
+
locks.lock(pt_id);
|
|
529
531
|
}
|
|
530
532
|
|
|
531
533
|
/**************************************************************
|
|
@@ -536,19 +538,19 @@ void HNSW::add_with_locks(
|
|
|
536
538
|
DistanceComputer& ptdis,
|
|
537
539
|
int pt_level,
|
|
538
540
|
int pt_id,
|
|
539
|
-
|
|
541
|
+
LockVector& locks,
|
|
540
542
|
VisitedTable& vt,
|
|
541
543
|
bool keep_max_size_level0) {
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
storage_idx_t nearest;
|
|
544
|
+
storage_idx_t nearest = entry_point;
|
|
545
|
+
if (nearest == -1) { // avoid locking after the first point.
|
|
545
546
|
#pragma omp critical
|
|
546
|
-
|
|
547
|
-
nearest = entry_point;
|
|
548
|
-
|
|
549
|
-
if (nearest == -1) {
|
|
547
|
+
if (entry_point == -1) { // double-check under lock.
|
|
550
548
|
max_level = pt_level;
|
|
551
549
|
entry_point = pt_id;
|
|
550
|
+
// leave nearest = -1 to trigger early exit after critical block.
|
|
551
|
+
} else {
|
|
552
|
+
// else: Another thread set the entry point.
|
|
553
|
+
nearest = entry_point;
|
|
552
554
|
}
|
|
553
555
|
}
|
|
554
556
|
|
|
@@ -556,11 +558,12 @@ void HNSW::add_with_locks(
|
|
|
556
558
|
return;
|
|
557
559
|
}
|
|
558
560
|
|
|
559
|
-
|
|
561
|
+
locks.lock(pt_id);
|
|
560
562
|
|
|
561
563
|
int level = max_level; // level at which we start adding neighbors
|
|
562
564
|
float d_nearest = ptdis(nearest);
|
|
563
565
|
|
|
566
|
+
// greedy search on upper levels
|
|
564
567
|
for (; level > pt_level; level--) {
|
|
565
568
|
greedy_update_nearest(*this, ptdis, level, nearest, d_nearest);
|
|
566
569
|
}
|
|
@@ -572,16 +575,19 @@ void HNSW::add_with_locks(
|
|
|
572
575
|
nearest,
|
|
573
576
|
d_nearest,
|
|
574
577
|
level,
|
|
575
|
-
locks
|
|
578
|
+
locks,
|
|
576
579
|
vt,
|
|
577
580
|
keep_max_size_level0);
|
|
578
581
|
}
|
|
579
582
|
|
|
580
|
-
|
|
583
|
+
locks.unlock(pt_id);
|
|
581
584
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
+
#pragma omp critical
|
|
586
|
+
{
|
|
587
|
+
if (pt_level > max_level) {
|
|
588
|
+
max_level = pt_level;
|
|
589
|
+
entry_point = pt_id;
|
|
590
|
+
}
|
|
585
591
|
}
|
|
586
592
|
}
|
|
587
593
|
|
|
@@ -589,7 +595,6 @@ void HNSW::add_with_locks(
|
|
|
589
595
|
* Searching
|
|
590
596
|
**************************************************************/
|
|
591
597
|
|
|
592
|
-
using MinimaxHeap = HNSW::MinimaxHeap;
|
|
593
598
|
using Node = HNSW::Node;
|
|
594
599
|
using C = HNSW::C;
|
|
595
600
|
|
|
@@ -726,7 +731,7 @@ int search_from_candidates(
|
|
|
726
731
|
}
|
|
727
732
|
}
|
|
728
733
|
|
|
729
|
-
for (
|
|
734
|
+
for (int icnt = 0; icnt < counter; icnt++) {
|
|
730
735
|
float dis = qdis(saved_j[icnt]);
|
|
731
736
|
add_to_heap(saved_j[icnt], dis);
|
|
732
737
|
|
|
@@ -807,6 +812,10 @@ int search_from_candidates_panorama(
|
|
|
807
812
|
float query_norm_sq = query_cum_sums[0] * query_cum_sums[0];
|
|
808
813
|
|
|
809
814
|
int nstep = 0;
|
|
815
|
+
const size_t d = static_cast<size_t>(panorama_index->d);
|
|
816
|
+
|
|
817
|
+
PanoramaStats local_pano_stats;
|
|
818
|
+
local_pano_stats.reset();
|
|
810
819
|
|
|
811
820
|
while (candidates.size() > 0) {
|
|
812
821
|
float d0 = 0;
|
|
@@ -845,6 +854,7 @@ int search_from_candidates_panorama(
|
|
|
845
854
|
initial_size += is_selected && vt.set(v1) ? 1 : 0;
|
|
846
855
|
}
|
|
847
856
|
|
|
857
|
+
local_pano_stats.total_dims += initial_size * d;
|
|
848
858
|
size_t batch_size = initial_size;
|
|
849
859
|
size_t curr_panorama_level = 0;
|
|
850
860
|
const size_t num_panorama_levels = panorama_index->pano.n_levels;
|
|
@@ -960,6 +970,8 @@ int search_from_candidates_panorama(
|
|
|
960
970
|
}
|
|
961
971
|
}
|
|
962
972
|
|
|
973
|
+
local_pano_stats.total_dims_scanned +=
|
|
974
|
+
batch_size * (end_dim - start_dim);
|
|
963
975
|
batch_size = next_batch_size;
|
|
964
976
|
curr_panorama_level++;
|
|
965
977
|
}
|
|
@@ -968,6 +980,7 @@ int search_from_candidates_panorama(
|
|
|
968
980
|
for (size_t i = 0; i < batch_size; i++) {
|
|
969
981
|
idx_t idx = index_array[i];
|
|
970
982
|
if (res.add_result(exact_distances[i], idx)) {
|
|
983
|
+
threshold = res.threshold;
|
|
971
984
|
nres += 1;
|
|
972
985
|
}
|
|
973
986
|
candidates.push(idx, exact_distances[i]);
|
|
@@ -988,9 +1001,22 @@ int search_from_candidates_panorama(
|
|
|
988
1001
|
stats.nhops += nstep;
|
|
989
1002
|
}
|
|
990
1003
|
|
|
1004
|
+
indexPanorama_stats.add(local_pano_stats);
|
|
991
1005
|
return nres;
|
|
992
1006
|
}
|
|
993
1007
|
|
|
1008
|
+
template <typename T, typename Container, typename Compare>
|
|
1009
|
+
void reservePriorityQueue(
|
|
1010
|
+
std::priority_queue<T, Container, Compare>& q,
|
|
1011
|
+
std::size_t size) {
|
|
1012
|
+
struct Access : std::priority_queue<T, Container, Compare> {
|
|
1013
|
+
using std::priority_queue<T, Container, Compare>::c;
|
|
1014
|
+
};
|
|
1015
|
+
Access access{std::move(q)};
|
|
1016
|
+
access.c.reserve(size);
|
|
1017
|
+
q = std::move(access);
|
|
1018
|
+
}
|
|
1019
|
+
|
|
994
1020
|
std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
995
1021
|
const HNSW& hnsw,
|
|
996
1022
|
const Node& node,
|
|
@@ -1000,7 +1026,10 @@ std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
|
1000
1026
|
HNSWStats& stats) {
|
|
1001
1027
|
int ndis = 0;
|
|
1002
1028
|
std::priority_queue<Node> top_candidates;
|
|
1029
|
+
reservePriorityQueue(top_candidates, ef);
|
|
1030
|
+
|
|
1003
1031
|
std::priority_queue<Node, std::vector<Node>, std::greater<Node>> candidates;
|
|
1032
|
+
reservePriorityQueue(candidates, ef);
|
|
1004
1033
|
|
|
1005
1034
|
top_candidates.push(node);
|
|
1006
1035
|
candidates.push(node);
|
|
@@ -1077,7 +1106,7 @@ std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
|
1077
1106
|
}
|
|
1078
1107
|
}
|
|
1079
1108
|
|
|
1080
|
-
for (
|
|
1109
|
+
for (int icnt = 0; icnt < counter; icnt++) {
|
|
1081
1110
|
float dis = qdis(saved_j[icnt]);
|
|
1082
1111
|
add_to_heap(saved_j[icnt], dis);
|
|
1083
1112
|
|
|
@@ -1157,7 +1186,7 @@ HNSWStats greedy_update_nearest(
|
|
|
1157
1186
|
}
|
|
1158
1187
|
|
|
1159
1188
|
// process leftovers
|
|
1160
|
-
for (
|
|
1189
|
+
for (int icnt = 0; icnt < n_buffered; icnt++) {
|
|
1161
1190
|
float dis = qdis(buffered_ids[icnt]);
|
|
1162
1191
|
update_with_candidate(buffered_ids[icnt], dis);
|
|
1163
1192
|
}
|
|
@@ -1173,7 +1202,6 @@ HNSWStats greedy_update_nearest(
|
|
|
1173
1202
|
}
|
|
1174
1203
|
|
|
1175
1204
|
namespace {
|
|
1176
|
-
using MinimaxHeap = HNSW::MinimaxHeap;
|
|
1177
1205
|
using Node = HNSW::Node;
|
|
1178
1206
|
using C = HNSW::C;
|
|
1179
1207
|
|
|
@@ -1201,12 +1229,12 @@ HNSWStats HNSW::search(
|
|
|
1201
1229
|
int k = extract_k_from_ResultHandler(res);
|
|
1202
1230
|
|
|
1203
1231
|
bool bounded_queue = this->search_bounded_queue;
|
|
1204
|
-
int
|
|
1232
|
+
int cur_efSearch = this->efSearch;
|
|
1205
1233
|
if (params) {
|
|
1206
1234
|
if (const SearchParametersHNSW* hnsw_params =
|
|
1207
1235
|
dynamic_cast<const SearchParametersHNSW*>(params)) {
|
|
1208
1236
|
bounded_queue = hnsw_params->bounded_queue;
|
|
1209
|
-
|
|
1237
|
+
cur_efSearch = hnsw_params->efSearch;
|
|
1210
1238
|
}
|
|
1211
1239
|
}
|
|
1212
1240
|
|
|
@@ -1220,7 +1248,7 @@ HNSWStats HNSW::search(
|
|
|
1220
1248
|
stats.combine(local_stats);
|
|
1221
1249
|
}
|
|
1222
1250
|
|
|
1223
|
-
int ef = std::max(
|
|
1251
|
+
int ef = std::max(cur_efSearch, k);
|
|
1224
1252
|
if (bounded_queue) { // this is the most common branch, for now we only
|
|
1225
1253
|
// support Panorama search in this branch
|
|
1226
1254
|
MinimaxHeap candidates(ef);
|
|
@@ -1248,7 +1276,7 @@ HNSWStats HNSW::search(
|
|
|
1248
1276
|
search_from_candidate_unbounded(
|
|
1249
1277
|
*this, Node(d_nearest, nearest), qdis, ef, &vt, stats);
|
|
1250
1278
|
|
|
1251
|
-
while (top_candidates.size() > k) {
|
|
1279
|
+
while (top_candidates.size() > static_cast<size_t>(k)) {
|
|
1252
1280
|
top_candidates.pop();
|
|
1253
1281
|
}
|
|
1254
1282
|
|
|
@@ -1278,11 +1306,11 @@ void HNSW::search_level_0(
|
|
|
1278
1306
|
const SearchParameters* params) const {
|
|
1279
1307
|
const HNSW& hnsw = *this;
|
|
1280
1308
|
|
|
1281
|
-
auto
|
|
1309
|
+
auto cur_efSearch = hnsw.efSearch;
|
|
1282
1310
|
if (params) {
|
|
1283
1311
|
if (const SearchParametersHNSW* hnsw_params =
|
|
1284
1312
|
dynamic_cast<const SearchParametersHNSW*>(params)) {
|
|
1285
|
-
|
|
1313
|
+
cur_efSearch = hnsw_params->efSearch;
|
|
1286
1314
|
}
|
|
1287
1315
|
}
|
|
1288
1316
|
|
|
@@ -1291,7 +1319,7 @@ void HNSW::search_level_0(
|
|
|
1291
1319
|
if (search_type == 1) {
|
|
1292
1320
|
int nres = 0;
|
|
1293
1321
|
|
|
1294
|
-
for (
|
|
1322
|
+
for (idx_t j = 0; j < nprobe; j++) {
|
|
1295
1323
|
storage_idx_t cj = nearest_i[j];
|
|
1296
1324
|
|
|
1297
1325
|
if (cj < 0) {
|
|
@@ -1302,7 +1330,7 @@ void HNSW::search_level_0(
|
|
|
1302
1330
|
continue;
|
|
1303
1331
|
}
|
|
1304
1332
|
|
|
1305
|
-
int candidates_size = std::max(
|
|
1333
|
+
int candidates_size = std::max(cur_efSearch, k);
|
|
1306
1334
|
MinimaxHeap candidates(candidates_size);
|
|
1307
1335
|
|
|
1308
1336
|
candidates.push(cj, nearest_d[j]);
|
|
@@ -1320,11 +1348,11 @@ void HNSW::search_level_0(
|
|
|
1320
1348
|
nres = std::min(nres, candidates_size);
|
|
1321
1349
|
}
|
|
1322
1350
|
} else if (search_type == 2) {
|
|
1323
|
-
int candidates_size = std::max(
|
|
1351
|
+
int candidates_size = std::max(cur_efSearch, int(k));
|
|
1324
1352
|
candidates_size = std::max(candidates_size, int(nprobe));
|
|
1325
1353
|
|
|
1326
1354
|
MinimaxHeap candidates(candidates_size);
|
|
1327
|
-
for (
|
|
1355
|
+
for (idx_t j = 0; j < nprobe; j++) {
|
|
1328
1356
|
storage_idx_t cj = nearest_i[j];
|
|
1329
1357
|
|
|
1330
1358
|
if (cj < 0) {
|
|
@@ -1371,257 +1399,4 @@ void HNSW::permute_entries(const idx_t* map) {
|
|
|
1371
1399
|
neighbors = std::move(new_neighbors);
|
|
1372
1400
|
}
|
|
1373
1401
|
|
|
1374
|
-
/**************************************************************
|
|
1375
|
-
* MinimaxHeap
|
|
1376
|
-
**************************************************************/
|
|
1377
|
-
|
|
1378
|
-
void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
|
|
1379
|
-
if (k == n) {
|
|
1380
|
-
if (v >= dis[0]) {
|
|
1381
|
-
return;
|
|
1382
|
-
}
|
|
1383
|
-
if (ids[0] != -1) {
|
|
1384
|
-
--nvalid;
|
|
1385
|
-
}
|
|
1386
|
-
faiss::heap_pop<HC>(k--, dis.data(), ids.data());
|
|
1387
|
-
}
|
|
1388
|
-
faiss::heap_push<HC>(++k, dis.data(), ids.data(), v, i);
|
|
1389
|
-
++nvalid;
|
|
1390
|
-
}
|
|
1391
|
-
|
|
1392
|
-
float HNSW::MinimaxHeap::max() const {
|
|
1393
|
-
return dis[0];
|
|
1394
|
-
}
|
|
1395
|
-
|
|
1396
|
-
int HNSW::MinimaxHeap::size() const {
|
|
1397
|
-
return nvalid;
|
|
1398
|
-
}
|
|
1399
|
-
|
|
1400
|
-
void HNSW::MinimaxHeap::clear() {
|
|
1401
|
-
nvalid = k = 0;
|
|
1402
|
-
}
|
|
1403
|
-
|
|
1404
|
-
#ifdef __AVX512F__
|
|
1405
|
-
|
|
1406
|
-
int HNSW::MinimaxHeap::pop_min(float* vmin_out) {
|
|
1407
|
-
assert(k > 0);
|
|
1408
|
-
static_assert(
|
|
1409
|
-
std::is_same<storage_idx_t, int32_t>::value,
|
|
1410
|
-
"This code expects storage_idx_t to be int32_t");
|
|
1411
|
-
|
|
1412
|
-
int32_t min_idx = -1;
|
|
1413
|
-
float min_dis = std::numeric_limits<float>::infinity();
|
|
1414
|
-
|
|
1415
|
-
__m512i min_indices = _mm512_set1_epi32(-1);
|
|
1416
|
-
__m512 min_distances =
|
|
1417
|
-
_mm512_set1_ps(std::numeric_limits<float>::infinity());
|
|
1418
|
-
__m512i current_indices = _mm512_setr_epi32(
|
|
1419
|
-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
|
1420
|
-
__m512i offset = _mm512_set1_epi32(16);
|
|
1421
|
-
|
|
1422
|
-
// The following loop tracks the rightmost index with the min distance.
|
|
1423
|
-
// -1 index values are ignored.
|
|
1424
|
-
const int k16 = (k / 16) * 16;
|
|
1425
|
-
for (size_t iii = 0; iii < k16; iii += 16) {
|
|
1426
|
-
__m512i indices =
|
|
1427
|
-
_mm512_loadu_si512((const __m512i*)(ids.data() + iii));
|
|
1428
|
-
__m512 distances = _mm512_loadu_ps(dis.data() + iii);
|
|
1429
|
-
|
|
1430
|
-
// This mask filters out -1 values among indices.
|
|
1431
|
-
__mmask16 m1mask =
|
|
1432
|
-
_mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
|
|
1433
|
-
|
|
1434
|
-
__mmask16 dmask =
|
|
1435
|
-
_mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
|
|
1436
|
-
__mmask16 finalmask = m1mask | dmask;
|
|
1437
|
-
|
|
1438
|
-
const __m512i min_indices_new = _mm512_mask_blend_epi32(
|
|
1439
|
-
finalmask, current_indices, min_indices);
|
|
1440
|
-
const __m512 min_distances_new =
|
|
1441
|
-
_mm512_mask_blend_ps(finalmask, distances, min_distances);
|
|
1442
|
-
|
|
1443
|
-
min_indices = min_indices_new;
|
|
1444
|
-
min_distances = min_distances_new;
|
|
1445
|
-
|
|
1446
|
-
current_indices = _mm512_add_epi32(current_indices, offset);
|
|
1447
|
-
}
|
|
1448
|
-
|
|
1449
|
-
// leftovers
|
|
1450
|
-
if (k16 != k) {
|
|
1451
|
-
const __mmask16 kmask = (1 << (k - k16)) - 1;
|
|
1452
|
-
|
|
1453
|
-
__m512i indices = _mm512_mask_loadu_epi32(
|
|
1454
|
-
_mm512_set1_epi32(-1), kmask, ids.data() + k16);
|
|
1455
|
-
__m512 distances = _mm512_maskz_loadu_ps(kmask, dis.data() + k16);
|
|
1456
|
-
|
|
1457
|
-
// This mask filters out -1 values among indices.
|
|
1458
|
-
__mmask16 m1mask =
|
|
1459
|
-
_mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
|
|
1460
|
-
|
|
1461
|
-
__mmask16 dmask =
|
|
1462
|
-
_mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
|
|
1463
|
-
__mmask16 finalmask = m1mask | dmask;
|
|
1464
|
-
|
|
1465
|
-
const __m512i min_indices_new = _mm512_mask_blend_epi32(
|
|
1466
|
-
finalmask, current_indices, min_indices);
|
|
1467
|
-
const __m512 min_distances_new =
|
|
1468
|
-
_mm512_mask_blend_ps(finalmask, distances, min_distances);
|
|
1469
|
-
|
|
1470
|
-
min_indices = min_indices_new;
|
|
1471
|
-
min_distances = min_distances_new;
|
|
1472
|
-
}
|
|
1473
|
-
|
|
1474
|
-
// grab min distance
|
|
1475
|
-
min_dis = _mm512_reduce_min_ps(min_distances);
|
|
1476
|
-
// blend
|
|
1477
|
-
__mmask16 mindmask =
|
|
1478
|
-
_mm512_cmpeq_ps_mask(min_distances, _mm512_set1_ps(min_dis));
|
|
1479
|
-
// pick the max one
|
|
1480
|
-
min_idx = _mm512_mask_reduce_max_epi32(mindmask, min_indices);
|
|
1481
|
-
|
|
1482
|
-
if (min_idx == -1) {
|
|
1483
|
-
return -1;
|
|
1484
|
-
}
|
|
1485
|
-
|
|
1486
|
-
if (vmin_out) {
|
|
1487
|
-
*vmin_out = min_dis;
|
|
1488
|
-
}
|
|
1489
|
-
int ret = ids[min_idx];
|
|
1490
|
-
ids[min_idx] = -1;
|
|
1491
|
-
--nvalid;
|
|
1492
|
-
return ret;
|
|
1493
|
-
}
|
|
1494
|
-
|
|
1495
|
-
#elif __AVX2__
|
|
1496
|
-
|
|
1497
|
-
int HNSW::MinimaxHeap::pop_min(float* vmin_out) {
|
|
1498
|
-
assert(k > 0);
|
|
1499
|
-
static_assert(
|
|
1500
|
-
std::is_same<storage_idx_t, int32_t>::value,
|
|
1501
|
-
"This code expects storage_idx_t to be int32_t");
|
|
1502
|
-
|
|
1503
|
-
int32_t min_idx = -1;
|
|
1504
|
-
float min_dis = std::numeric_limits<float>::infinity();
|
|
1505
|
-
|
|
1506
|
-
size_t iii = 0;
|
|
1507
|
-
|
|
1508
|
-
__m256i min_indices = _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, -1);
|
|
1509
|
-
__m256 min_distances =
|
|
1510
|
-
_mm256_set1_ps(std::numeric_limits<float>::infinity());
|
|
1511
|
-
__m256i current_indices = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
|
1512
|
-
__m256i offset = _mm256_set1_epi32(8);
|
|
1513
|
-
|
|
1514
|
-
// The baseline version is available in non-AVX2 branch.
|
|
1515
|
-
|
|
1516
|
-
// The following loop tracks the rightmost index with the min distance.
|
|
1517
|
-
// -1 index values are ignored.
|
|
1518
|
-
const int k8 = (k / 8) * 8;
|
|
1519
|
-
for (; iii < k8; iii += 8) {
|
|
1520
|
-
__m256i indices =
|
|
1521
|
-
_mm256_loadu_si256((const __m256i*)(ids.data() + iii));
|
|
1522
|
-
__m256 distances = _mm256_loadu_ps(dis.data() + iii);
|
|
1523
|
-
|
|
1524
|
-
// This mask filters out -1 values among indices.
|
|
1525
|
-
__m256i m1mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), indices);
|
|
1526
|
-
|
|
1527
|
-
__m256i dmask = _mm256_castps_si256(
|
|
1528
|
-
_mm256_cmp_ps(min_distances, distances, _CMP_LT_OS));
|
|
1529
|
-
__m256 finalmask = _mm256_castsi256_ps(_mm256_or_si256(m1mask, dmask));
|
|
1530
|
-
|
|
1531
|
-
const __m256i min_indices_new = _mm256_castps_si256(_mm256_blendv_ps(
|
|
1532
|
-
_mm256_castsi256_ps(current_indices),
|
|
1533
|
-
_mm256_castsi256_ps(min_indices),
|
|
1534
|
-
finalmask));
|
|
1535
|
-
|
|
1536
|
-
const __m256 min_distances_new =
|
|
1537
|
-
_mm256_blendv_ps(distances, min_distances, finalmask);
|
|
1538
|
-
|
|
1539
|
-
min_indices = min_indices_new;
|
|
1540
|
-
min_distances = min_distances_new;
|
|
1541
|
-
|
|
1542
|
-
current_indices = _mm256_add_epi32(current_indices, offset);
|
|
1543
|
-
}
|
|
1544
|
-
|
|
1545
|
-
// Vectorizing is doable, but is not practical
|
|
1546
|
-
int32_t vidx8[8];
|
|
1547
|
-
float vdis8[8];
|
|
1548
|
-
_mm256_storeu_ps(vdis8, min_distances);
|
|
1549
|
-
_mm256_storeu_si256((__m256i*)vidx8, min_indices);
|
|
1550
|
-
|
|
1551
|
-
for (size_t j = 0; j < 8; j++) {
|
|
1552
|
-
if (min_dis > vdis8[j] || (min_dis == vdis8[j] && min_idx < vidx8[j])) {
|
|
1553
|
-
min_idx = vidx8[j];
|
|
1554
|
-
min_dis = vdis8[j];
|
|
1555
|
-
}
|
|
1556
|
-
}
|
|
1557
|
-
|
|
1558
|
-
// process last values. Vectorizing is doable, but is not practical
|
|
1559
|
-
for (; iii < k; iii++) {
|
|
1560
|
-
if (ids[iii] != -1 && dis[iii] <= min_dis) {
|
|
1561
|
-
min_dis = dis[iii];
|
|
1562
|
-
min_idx = iii;
|
|
1563
|
-
}
|
|
1564
|
-
}
|
|
1565
|
-
|
|
1566
|
-
if (min_idx == -1) {
|
|
1567
|
-
return -1;
|
|
1568
|
-
}
|
|
1569
|
-
|
|
1570
|
-
if (vmin_out) {
|
|
1571
|
-
*vmin_out = min_dis;
|
|
1572
|
-
}
|
|
1573
|
-
int ret = ids[min_idx];
|
|
1574
|
-
ids[min_idx] = -1;
|
|
1575
|
-
--nvalid;
|
|
1576
|
-
return ret;
|
|
1577
|
-
}
|
|
1578
|
-
|
|
1579
|
-
#else
|
|
1580
|
-
|
|
1581
|
-
// baseline non-vectorized version
|
|
1582
|
-
int HNSW::MinimaxHeap::pop_min(float* vmin_out) {
|
|
1583
|
-
assert(k > 0);
|
|
1584
|
-
// returns min. This is an O(n) operation
|
|
1585
|
-
int i = k - 1;
|
|
1586
|
-
while (i >= 0) {
|
|
1587
|
-
if (ids[i] != -1) {
|
|
1588
|
-
break;
|
|
1589
|
-
}
|
|
1590
|
-
i--;
|
|
1591
|
-
}
|
|
1592
|
-
if (i == -1) {
|
|
1593
|
-
return -1;
|
|
1594
|
-
}
|
|
1595
|
-
int imin = i;
|
|
1596
|
-
float vmin = dis[i];
|
|
1597
|
-
i--;
|
|
1598
|
-
while (i >= 0) {
|
|
1599
|
-
if (ids[i] != -1 && dis[i] < vmin) {
|
|
1600
|
-
vmin = dis[i];
|
|
1601
|
-
imin = i;
|
|
1602
|
-
}
|
|
1603
|
-
i--;
|
|
1604
|
-
}
|
|
1605
|
-
if (vmin_out) {
|
|
1606
|
-
*vmin_out = vmin;
|
|
1607
|
-
}
|
|
1608
|
-
int ret = ids[imin];
|
|
1609
|
-
ids[imin] = -1;
|
|
1610
|
-
--nvalid;
|
|
1611
|
-
|
|
1612
|
-
return ret;
|
|
1613
|
-
}
|
|
1614
|
-
#endif
|
|
1615
|
-
|
|
1616
|
-
int HNSW::MinimaxHeap::count_below(float thresh) {
|
|
1617
|
-
int n_below = 0;
|
|
1618
|
-
for (int i = 0; i < k; i++) {
|
|
1619
|
-
if (dis[i] < thresh) {
|
|
1620
|
-
n_below++;
|
|
1621
|
-
}
|
|
1622
|
-
}
|
|
1623
|
-
|
|
1624
|
-
return n_below;
|
|
1625
|
-
}
|
|
1626
|
-
|
|
1627
1402
|
} // namespace faiss
|