faiss 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +4 -4
- data/ext/faiss/index.cpp +63 -45
- data/ext/faiss/index_binary.cpp +37 -27
- data/ext/faiss/kmeans.cpp +9 -8
- data/ext/faiss/pca_matrix.cpp +9 -7
- data/ext/faiss/product_quantizer.cpp +13 -11
- data/ext/faiss/utils.cpp +4 -2
- data/ext/faiss/utils.h +4 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +214 -82
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +97 -249
- data/vendor/faiss/faiss/Clustering.h +18 -0
- data/vendor/faiss/faiss/IVFlib.cpp +67 -44
- data/vendor/faiss/faiss/Index.cpp +25 -12
- data/vendor/faiss/faiss/Index.h +26 -4
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
- data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
- data/vendor/faiss/faiss/IndexFastScan.h +35 -24
- data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
- data/vendor/faiss/faiss/IndexFlat.h +32 -14
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
- data/vendor/faiss/faiss/IndexHNSW.h +30 -14
- data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
- data/vendor/faiss/faiss/IndexIVF.h +47 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
- data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
- data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
- data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
- data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
- data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +13 -13
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +29 -6
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
- data/vendor/faiss/faiss/VectorTransform.h +39 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +55 -51
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
- data/vendor/faiss/faiss/impl/HNSW.h +21 -40
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
- data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
- data/vendor/faiss/faiss/impl/NSG.h +20 -10
- data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
- data/vendor/faiss/faiss/impl/Panorama.h +265 -78
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
- data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +115 -28
- data/vendor/faiss/faiss/index_io.h +53 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +507 -559
- data/vendor/faiss/faiss/utils/distances.h +118 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
- data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
- data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +21 -14
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +156 -42
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -7,22 +7,17 @@
|
|
|
7
7
|
|
|
8
8
|
#include <faiss/impl/HNSW.h>
|
|
9
9
|
|
|
10
|
+
#include <cinttypes>
|
|
10
11
|
#include <cstddef>
|
|
12
|
+
#include <cstdlib>
|
|
11
13
|
|
|
12
14
|
#include <faiss/IndexHNSW.h>
|
|
13
15
|
|
|
14
|
-
#include <faiss/impl/AuxIndexStructures.h>
|
|
15
16
|
#include <faiss/impl/DistanceComputer.h>
|
|
16
17
|
#include <faiss/impl/IDSelector.h>
|
|
17
18
|
#include <faiss/impl/ResultHandler.h>
|
|
18
|
-
#include <faiss/
|
|
19
|
-
|
|
20
|
-
#ifdef __AVX2__
|
|
21
|
-
#include <immintrin.h>
|
|
22
|
-
|
|
23
|
-
#include <limits>
|
|
24
|
-
#include <type_traits>
|
|
25
|
-
#endif
|
|
19
|
+
#include <faiss/impl/VisitedTable.h>
|
|
20
|
+
#include <faiss/impl/hnsw/MinimaxHeap.h>
|
|
26
21
|
|
|
27
22
|
namespace faiss {
|
|
28
23
|
|
|
@@ -31,7 +26,8 @@ namespace faiss {
|
|
|
31
26
|
**************************************************************/
|
|
32
27
|
|
|
33
28
|
int HNSW::nb_neighbors(int layer_no) const {
|
|
34
|
-
FAISS_THROW_IF_NOT(
|
|
29
|
+
FAISS_THROW_IF_NOT(
|
|
30
|
+
static_cast<size_t>(layer_no + 1) < cum_nneighbor_per_level.size());
|
|
35
31
|
return cum_nneighbor_per_level[layer_no + 1] -
|
|
36
32
|
cum_nneighbor_per_level[layer_no];
|
|
37
33
|
}
|
|
@@ -39,17 +35,21 @@ int HNSW::nb_neighbors(int layer_no) const {
|
|
|
39
35
|
void HNSW::set_nb_neighbors(int level_no, int n) {
|
|
40
36
|
FAISS_THROW_IF_NOT(levels.size() == 0);
|
|
41
37
|
int cur_n = nb_neighbors(level_no);
|
|
42
|
-
for (
|
|
38
|
+
for (size_t i = level_no + 1; i < cum_nneighbor_per_level.size(); i++) {
|
|
43
39
|
cum_nneighbor_per_level[i] += n - cur_n;
|
|
44
40
|
}
|
|
45
41
|
}
|
|
46
42
|
|
|
47
43
|
int HNSW::cum_nb_neighbors(int layer_no) const {
|
|
44
|
+
FAISS_CHECK_RANGE_DEBUG(layer_no, 0, (int)cum_nneighbor_per_level.size());
|
|
48
45
|
return cum_nneighbor_per_level[layer_no];
|
|
49
46
|
}
|
|
50
47
|
|
|
51
48
|
void HNSW::neighbor_range(idx_t no, int layer_no, size_t* begin, size_t* end)
|
|
52
49
|
const {
|
|
50
|
+
FAISS_CHECK_RANGE_DEBUG(no, 0, (idx_t)offsets.size());
|
|
51
|
+
FAISS_CHECK_RANGE_DEBUG(
|
|
52
|
+
layer_no, 0, (int)cum_nneighbor_per_level.size() - 1);
|
|
53
53
|
size_t o = offsets[no];
|
|
54
54
|
*begin = o + cum_nb_neighbors(layer_no);
|
|
55
55
|
*end = o + cum_nb_neighbors(layer_no + 1);
|
|
@@ -63,7 +63,7 @@ HNSW::HNSW(int M) : rng(12345) {
|
|
|
63
63
|
int HNSW::random_level() {
|
|
64
64
|
double f = rng.rand_float();
|
|
65
65
|
// could be a bit faster with bisection
|
|
66
|
-
for (
|
|
66
|
+
for (size_t level = 0; level < assign_probas.size(); level++) {
|
|
67
67
|
if (f < assign_probas[level]) {
|
|
68
68
|
return level;
|
|
69
69
|
}
|
|
@@ -88,7 +88,7 @@ void HNSW::set_default_probas(int M, float levelMult) {
|
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
void HNSW::clear_neighbor_tables(int level) {
|
|
91
|
-
for (
|
|
91
|
+
for (size_t i = 0; i < levels.size(); i++) {
|
|
92
92
|
size_t begin, end;
|
|
93
93
|
neighbor_range(i, level, &begin, &end);
|
|
94
94
|
for (size_t j = begin; j < end; j++) {
|
|
@@ -107,14 +107,15 @@ void HNSW::reset() {
|
|
|
107
107
|
}
|
|
108
108
|
|
|
109
109
|
void HNSW::print_neighbor_stats(int level) const {
|
|
110
|
-
FAISS_THROW_IF_NOT(
|
|
110
|
+
FAISS_THROW_IF_NOT(
|
|
111
|
+
static_cast<size_t>(level) < cum_nneighbor_per_level.size());
|
|
111
112
|
printf("stats on level %d, max %d neighbors per vertex:\n",
|
|
112
113
|
level,
|
|
113
114
|
nb_neighbors(level));
|
|
114
115
|
size_t tot_neigh = 0, tot_common = 0, tot_reciprocal = 0, n_node = 0;
|
|
115
116
|
#pragma omp parallel for reduction(+ : tot_neigh) reduction(+ : tot_common) \
|
|
116
117
|
reduction(+ : tot_reciprocal) reduction(+ : n_node)
|
|
117
|
-
for (
|
|
118
|
+
for (idx_t i = 0; i < static_cast<idx_t>(levels.size()); i++) {
|
|
118
119
|
if (levels[i] > level) {
|
|
119
120
|
n_node++;
|
|
120
121
|
size_t begin, end;
|
|
@@ -126,7 +127,7 @@ void HNSW::print_neighbor_stats(int level) const {
|
|
|
126
127
|
}
|
|
127
128
|
neighset.insert(neighbors[j]);
|
|
128
129
|
}
|
|
129
|
-
|
|
130
|
+
size_t n_neigh = neighset.size();
|
|
130
131
|
int n_common = 0;
|
|
131
132
|
int n_reciprocal = 0;
|
|
132
133
|
for (size_t j = begin; j < end; j++) {
|
|
@@ -175,7 +176,7 @@ void HNSW::fill_with_random_links(size_t n) {
|
|
|
175
176
|
|
|
176
177
|
for (int level = max_level_2 - 1; level >= 0; --level) {
|
|
177
178
|
std::vector<int> elts;
|
|
178
|
-
for (
|
|
179
|
+
for (size_t i = 0; i < n; i++) {
|
|
179
180
|
if (levels[i] > level) {
|
|
180
181
|
elts.push_back(i);
|
|
181
182
|
}
|
|
@@ -186,10 +187,10 @@ void HNSW::fill_with_random_links(size_t n) {
|
|
|
186
187
|
continue;
|
|
187
188
|
}
|
|
188
189
|
|
|
189
|
-
for (
|
|
190
|
+
for (size_t ii = 0; ii < elts.size(); ii++) {
|
|
190
191
|
int i = elts[ii];
|
|
191
192
|
size_t begin, end;
|
|
192
|
-
neighbor_range(i,
|
|
193
|
+
neighbor_range(i, level, &begin, &end);
|
|
193
194
|
for (size_t j = begin; j < end; j++) {
|
|
194
195
|
int other = 0;
|
|
195
196
|
do {
|
|
@@ -209,14 +210,14 @@ int HNSW::prepare_level_tab(size_t n, bool preset_levels) {
|
|
|
209
210
|
FAISS_ASSERT(n0 + n == levels.size());
|
|
210
211
|
} else {
|
|
211
212
|
FAISS_ASSERT(n0 == levels.size());
|
|
212
|
-
for (
|
|
213
|
+
for (size_t i = 0; i < n; i++) {
|
|
213
214
|
int pt_level = random_level();
|
|
214
215
|
levels.push_back(pt_level + 1);
|
|
215
216
|
}
|
|
216
217
|
}
|
|
217
218
|
|
|
218
219
|
int max_level_2 = 0;
|
|
219
|
-
for (
|
|
220
|
+
for (size_t i = 0; i < n; i++) {
|
|
220
221
|
int pt_level = levels[i + n0] - 1;
|
|
221
222
|
if (pt_level > max_level_2) {
|
|
222
223
|
max_level_2 = pt_level;
|
|
@@ -236,7 +237,7 @@ void HNSW::shrink_neighbor_list(
|
|
|
236
237
|
DistanceComputer& qdis,
|
|
237
238
|
std::priority_queue<NodeDistFarther>& input,
|
|
238
239
|
std::vector<NodeDistFarther>& output,
|
|
239
|
-
|
|
240
|
+
size_t max_size,
|
|
240
241
|
bool keep_max_size_level0) {
|
|
241
242
|
// This prevents number of neighbors at
|
|
242
243
|
// level 0 from being shrunk to less than 2 * M.
|
|
@@ -261,7 +262,7 @@ void HNSW::shrink_neighbor_list(
|
|
|
261
262
|
|
|
262
263
|
if (good) {
|
|
263
264
|
output.push_back(v1);
|
|
264
|
-
if (output.size() >= max_size) {
|
|
265
|
+
if (output.size() >= static_cast<size_t>(max_size)) {
|
|
265
266
|
return;
|
|
266
267
|
}
|
|
267
268
|
} else if (keep_max_size_level0) {
|
|
@@ -269,7 +270,8 @@ void HNSW::shrink_neighbor_list(
|
|
|
269
270
|
}
|
|
270
271
|
}
|
|
271
272
|
size_t idx = 0;
|
|
272
|
-
while (keep_max_size_level0 &&
|
|
273
|
+
while (keep_max_size_level0 &&
|
|
274
|
+
(output.size() < static_cast<size_t>(max_size)) &&
|
|
273
275
|
(idx < outsiders.size())) {
|
|
274
276
|
output.push_back(outsiders[idx++]);
|
|
275
277
|
}
|
|
@@ -289,9 +291,9 @@ using NodeDistFarther = HNSW::NodeDistFarther;
|
|
|
289
291
|
void shrink_neighbor_list(
|
|
290
292
|
DistanceComputer& qdis,
|
|
291
293
|
std::priority_queue<NodeDistCloser>& resultSet1,
|
|
292
|
-
|
|
294
|
+
size_t max_size,
|
|
293
295
|
bool keep_max_size_level0 = false) {
|
|
294
|
-
if (resultSet1.size() < max_size) {
|
|
296
|
+
if (resultSet1.size() < static_cast<size_t>(max_size)) {
|
|
295
297
|
return;
|
|
296
298
|
}
|
|
297
299
|
std::priority_queue<NodeDistFarther> resultSet;
|
|
@@ -344,7 +346,9 @@ void add_link(
|
|
|
344
346
|
resultSet.emplace(qdis.symmetric_dis(src, neigh), neigh);
|
|
345
347
|
}
|
|
346
348
|
|
|
347
|
-
|
|
349
|
+
size_t max_size = end - begin;
|
|
350
|
+
max_size -= max_size * std::clamp(hnsw.prune_headroom, 0.0f, 0.5f);
|
|
351
|
+
shrink_neighbor_list(qdis, resultSet, max_size, keep_max_size_level0);
|
|
348
352
|
|
|
349
353
|
// ...and back
|
|
350
354
|
size_t i = begin;
|
|
@@ -407,19 +411,19 @@ void search_neighbors_to_add(
|
|
|
407
411
|
if (nodeId < 0) {
|
|
408
412
|
break;
|
|
409
413
|
}
|
|
410
|
-
if (vt.
|
|
414
|
+
if (!vt.set(nodeId)) {
|
|
411
415
|
continue;
|
|
412
416
|
}
|
|
413
|
-
vt.set(nodeId);
|
|
414
417
|
|
|
415
418
|
float dis = qdis(nodeId);
|
|
416
419
|
NodeDistFarther evE1(dis, nodeId);
|
|
417
420
|
|
|
418
|
-
if (results.size() < hnsw.efConstruction ||
|
|
421
|
+
if (results.size() < static_cast<size_t>(hnsw.efConstruction) ||
|
|
419
422
|
results.top().d > dis) {
|
|
420
423
|
results.emplace(dis, nodeId);
|
|
421
424
|
candidates.emplace(dis, nodeId);
|
|
422
|
-
if (results.size() >
|
|
425
|
+
if (results.size() >
|
|
426
|
+
static_cast<size_t>(hnsw.efConstruction)) {
|
|
423
427
|
results.pop();
|
|
424
428
|
}
|
|
425
429
|
}
|
|
@@ -430,11 +434,12 @@ void search_neighbors_to_add(
|
|
|
430
434
|
// the following version processes 4 neighbors at a time
|
|
431
435
|
auto update_with_candidate = [&](const storage_idx_t idx,
|
|
432
436
|
const float dis) {
|
|
433
|
-
if (results.size() < hnsw.efConstruction ||
|
|
437
|
+
if (results.size() < static_cast<size_t>(hnsw.efConstruction) ||
|
|
434
438
|
results.top().d > dis) {
|
|
435
439
|
results.emplace(dis, idx);
|
|
436
440
|
candidates.emplace(dis, idx);
|
|
437
|
-
if (results.size() >
|
|
441
|
+
if (results.size() >
|
|
442
|
+
static_cast<size_t>(hnsw.efConstruction)) {
|
|
438
443
|
results.pop();
|
|
439
444
|
}
|
|
440
445
|
}
|
|
@@ -448,10 +453,9 @@ void search_neighbors_to_add(
|
|
|
448
453
|
if (nodeId < 0) {
|
|
449
454
|
break;
|
|
450
455
|
}
|
|
451
|
-
if (vt.
|
|
456
|
+
if (!vt.set(nodeId)) {
|
|
452
457
|
continue;
|
|
453
458
|
}
|
|
454
|
-
vt.set(nodeId);
|
|
455
459
|
|
|
456
460
|
buffered_ids[n_buffered] = nodeId;
|
|
457
461
|
n_buffered += 1;
|
|
@@ -477,7 +481,7 @@ void search_neighbors_to_add(
|
|
|
477
481
|
}
|
|
478
482
|
|
|
479
483
|
// process leftovers
|
|
480
|
-
for (
|
|
484
|
+
for (int icnt = 0; icnt < n_buffered; icnt++) {
|
|
481
485
|
float dis = qdis(buffered_ids[icnt]);
|
|
482
486
|
update_with_candidate(buffered_ids[icnt], dis);
|
|
483
487
|
}
|
|
@@ -495,7 +499,7 @@ void HNSW::add_links_starting_from(
|
|
|
495
499
|
storage_idx_t nearest,
|
|
496
500
|
float d_nearest,
|
|
497
501
|
int level,
|
|
498
|
-
|
|
502
|
+
LockVector& locks,
|
|
499
503
|
VisitedTable& vt,
|
|
500
504
|
bool keep_max_size_level0) {
|
|
501
505
|
std::priority_queue<NodeDistCloser> link_targets;
|
|
@@ -517,13 +521,13 @@ void HNSW::add_links_starting_from(
|
|
|
517
521
|
link_targets.pop();
|
|
518
522
|
}
|
|
519
523
|
|
|
520
|
-
|
|
524
|
+
locks.unlock(pt_id);
|
|
521
525
|
for (storage_idx_t other_id : neighbors_to_add) {
|
|
522
|
-
|
|
526
|
+
locks.lock(other_id);
|
|
523
527
|
add_link(*this, ptdis, other_id, pt_id, level, keep_max_size_level0);
|
|
524
|
-
|
|
528
|
+
locks.unlock(other_id);
|
|
525
529
|
}
|
|
526
|
-
|
|
530
|
+
locks.lock(pt_id);
|
|
527
531
|
}
|
|
528
532
|
|
|
529
533
|
/**************************************************************
|
|
@@ -534,19 +538,19 @@ void HNSW::add_with_locks(
|
|
|
534
538
|
DistanceComputer& ptdis,
|
|
535
539
|
int pt_level,
|
|
536
540
|
int pt_id,
|
|
537
|
-
|
|
541
|
+
LockVector& locks,
|
|
538
542
|
VisitedTable& vt,
|
|
539
543
|
bool keep_max_size_level0) {
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
storage_idx_t nearest;
|
|
544
|
+
storage_idx_t nearest = entry_point;
|
|
545
|
+
if (nearest == -1) { // avoid locking after the first point.
|
|
543
546
|
#pragma omp critical
|
|
544
|
-
|
|
545
|
-
nearest = entry_point;
|
|
546
|
-
|
|
547
|
-
if (nearest == -1) {
|
|
547
|
+
if (entry_point == -1) { // double-check under lock.
|
|
548
548
|
max_level = pt_level;
|
|
549
549
|
entry_point = pt_id;
|
|
550
|
+
// leave nearest = -1 to trigger early exit after critical block.
|
|
551
|
+
} else {
|
|
552
|
+
// else: Another thread set the entry point.
|
|
553
|
+
nearest = entry_point;
|
|
550
554
|
}
|
|
551
555
|
}
|
|
552
556
|
|
|
@@ -554,11 +558,12 @@ void HNSW::add_with_locks(
|
|
|
554
558
|
return;
|
|
555
559
|
}
|
|
556
560
|
|
|
557
|
-
|
|
561
|
+
locks.lock(pt_id);
|
|
558
562
|
|
|
559
563
|
int level = max_level; // level at which we start adding neighbors
|
|
560
564
|
float d_nearest = ptdis(nearest);
|
|
561
565
|
|
|
566
|
+
// greedy search on upper levels
|
|
562
567
|
for (; level > pt_level; level--) {
|
|
563
568
|
greedy_update_nearest(*this, ptdis, level, nearest, d_nearest);
|
|
564
569
|
}
|
|
@@ -570,16 +575,19 @@ void HNSW::add_with_locks(
|
|
|
570
575
|
nearest,
|
|
571
576
|
d_nearest,
|
|
572
577
|
level,
|
|
573
|
-
locks
|
|
578
|
+
locks,
|
|
574
579
|
vt,
|
|
575
580
|
keep_max_size_level0);
|
|
576
581
|
}
|
|
577
582
|
|
|
578
|
-
|
|
583
|
+
locks.unlock(pt_id);
|
|
579
584
|
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
585
|
+
#pragma omp critical
|
|
586
|
+
{
|
|
587
|
+
if (pt_level > max_level) {
|
|
588
|
+
max_level = pt_level;
|
|
589
|
+
entry_point = pt_id;
|
|
590
|
+
}
|
|
583
591
|
}
|
|
584
592
|
}
|
|
585
593
|
|
|
@@ -587,7 +595,6 @@ void HNSW::add_with_locks(
|
|
|
587
595
|
* Searching
|
|
588
596
|
**************************************************************/
|
|
589
597
|
|
|
590
|
-
using MinimaxHeap = HNSW::MinimaxHeap;
|
|
591
598
|
using Node = HNSW::Node;
|
|
592
599
|
using C = HNSW::C;
|
|
593
600
|
|
|
@@ -616,7 +623,7 @@ static inline void extract_search_params(
|
|
|
616
623
|
int search_from_candidates(
|
|
617
624
|
const HNSW& hnsw,
|
|
618
625
|
DistanceComputer& qdis,
|
|
619
|
-
ResultHandler
|
|
626
|
+
ResultHandler& res,
|
|
620
627
|
MinimaxHeap& candidates,
|
|
621
628
|
VisitedTable& vt,
|
|
622
629
|
HNSWStats& stats,
|
|
@@ -675,7 +682,7 @@ int search_from_candidates(
|
|
|
675
682
|
break;
|
|
676
683
|
}
|
|
677
684
|
|
|
678
|
-
|
|
685
|
+
vt.prefetch(v1);
|
|
679
686
|
jmax += 1;
|
|
680
687
|
}
|
|
681
688
|
|
|
@@ -699,10 +706,8 @@ int search_from_candidates(
|
|
|
699
706
|
for (size_t j = begin; j < jmax; j++) {
|
|
700
707
|
int v1 = hnsw.neighbors[j];
|
|
701
708
|
|
|
702
|
-
bool vget = vt.get(v1);
|
|
703
|
-
vt.set(v1);
|
|
704
709
|
saved_j[counter] = v1;
|
|
705
|
-
counter +=
|
|
710
|
+
counter += vt.set(v1) ? 1 : 0;
|
|
706
711
|
|
|
707
712
|
if (counter == 4) {
|
|
708
713
|
float dis[4];
|
|
@@ -726,7 +731,7 @@ int search_from_candidates(
|
|
|
726
731
|
}
|
|
727
732
|
}
|
|
728
733
|
|
|
729
|
-
for (
|
|
734
|
+
for (int icnt = 0; icnt < counter; icnt++) {
|
|
730
735
|
float dis = qdis(saved_j[icnt]);
|
|
731
736
|
add_to_heap(saved_j[icnt], dis);
|
|
732
737
|
|
|
@@ -755,7 +760,7 @@ int search_from_candidates_panorama(
|
|
|
755
760
|
const HNSW& hnsw,
|
|
756
761
|
const IndexHNSW* index,
|
|
757
762
|
DistanceComputer& qdis,
|
|
758
|
-
ResultHandler
|
|
763
|
+
ResultHandler& res,
|
|
759
764
|
MinimaxHeap& candidates,
|
|
760
765
|
VisitedTable& vt,
|
|
761
766
|
HNSWStats& stats,
|
|
@@ -802,16 +807,15 @@ int search_from_candidates_panorama(
|
|
|
802
807
|
std::vector<float> exact_distances(M);
|
|
803
808
|
|
|
804
809
|
const float* query = flat_codes_qdis->q;
|
|
805
|
-
std::vector<float> query_cum_sums(panorama_index->
|
|
806
|
-
|
|
807
|
-
query,
|
|
808
|
-
query_cum_sums.data(),
|
|
809
|
-
panorama_index->d,
|
|
810
|
-
panorama_index->num_panorama_levels,
|
|
811
|
-
panorama_index->panorama_level_width);
|
|
810
|
+
std::vector<float> query_cum_sums(panorama_index->pano.n_levels + 1);
|
|
811
|
+
panorama_index->pano.compute_query_cum_sums(query, query_cum_sums.data());
|
|
812
812
|
float query_norm_sq = query_cum_sums[0] * query_cum_sums[0];
|
|
813
813
|
|
|
814
814
|
int nstep = 0;
|
|
815
|
+
const size_t d = static_cast<size_t>(panorama_index->d);
|
|
816
|
+
|
|
817
|
+
PanoramaStats local_pano_stats;
|
|
818
|
+
local_pano_stats.reset();
|
|
815
819
|
|
|
816
820
|
while (candidates.size() > 0) {
|
|
817
821
|
float d0 = 0;
|
|
@@ -847,21 +851,20 @@ int search_from_candidates_panorama(
|
|
|
847
851
|
query_norm_sq + cum_sums_v1[0] * cum_sums_v1[0];
|
|
848
852
|
|
|
849
853
|
bool is_selected = !sel || sel->is_member(v1);
|
|
850
|
-
initial_size += is_selected &&
|
|
851
|
-
|
|
852
|
-
vt.set(v1);
|
|
854
|
+
initial_size += is_selected && vt.set(v1) ? 1 : 0;
|
|
853
855
|
}
|
|
854
856
|
|
|
857
|
+
local_pano_stats.total_dims += initial_size * d;
|
|
855
858
|
size_t batch_size = initial_size;
|
|
856
859
|
size_t curr_panorama_level = 0;
|
|
857
|
-
const size_t num_panorama_levels = panorama_index->
|
|
860
|
+
const size_t num_panorama_levels = panorama_index->pano.n_levels;
|
|
858
861
|
while (curr_panorama_level < num_panorama_levels && batch_size > 0) {
|
|
859
862
|
float query_cum_norm = query_cum_sums[curr_panorama_level + 1];
|
|
860
863
|
|
|
861
|
-
|
|
862
|
-
panorama_index->
|
|
863
|
-
size_t
|
|
864
|
-
|
|
864
|
+
size_t start_dim = curr_panorama_level *
|
|
865
|
+
panorama_index->pano.level_width_floats;
|
|
866
|
+
size_t end_dim = (curr_panorama_level + 1) *
|
|
867
|
+
panorama_index->pano.level_width_floats;
|
|
865
868
|
end_dim = std::min(end_dim, static_cast<size_t>(panorama_index->d));
|
|
866
869
|
|
|
867
870
|
size_t i = 0;
|
|
@@ -967,6 +970,8 @@ int search_from_candidates_panorama(
|
|
|
967
970
|
}
|
|
968
971
|
}
|
|
969
972
|
|
|
973
|
+
local_pano_stats.total_dims_scanned +=
|
|
974
|
+
batch_size * (end_dim - start_dim);
|
|
970
975
|
batch_size = next_batch_size;
|
|
971
976
|
curr_panorama_level++;
|
|
972
977
|
}
|
|
@@ -975,6 +980,7 @@ int search_from_candidates_panorama(
|
|
|
975
980
|
for (size_t i = 0; i < batch_size; i++) {
|
|
976
981
|
idx_t idx = index_array[i];
|
|
977
982
|
if (res.add_result(exact_distances[i], idx)) {
|
|
983
|
+
threshold = res.threshold;
|
|
978
984
|
nres += 1;
|
|
979
985
|
}
|
|
980
986
|
candidates.push(idx, exact_distances[i]);
|
|
@@ -995,9 +1001,22 @@ int search_from_candidates_panorama(
|
|
|
995
1001
|
stats.nhops += nstep;
|
|
996
1002
|
}
|
|
997
1003
|
|
|
1004
|
+
indexPanorama_stats.add(local_pano_stats);
|
|
998
1005
|
return nres;
|
|
999
1006
|
}
|
|
1000
1007
|
|
|
1008
|
+
template <typename T, typename Container, typename Compare>
|
|
1009
|
+
void reservePriorityQueue(
|
|
1010
|
+
std::priority_queue<T, Container, Compare>& q,
|
|
1011
|
+
std::size_t size) {
|
|
1012
|
+
struct Access : std::priority_queue<T, Container, Compare> {
|
|
1013
|
+
using std::priority_queue<T, Container, Compare>::c;
|
|
1014
|
+
};
|
|
1015
|
+
Access access{std::move(q)};
|
|
1016
|
+
access.c.reserve(size);
|
|
1017
|
+
q = std::move(access);
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1001
1020
|
std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
1002
1021
|
const HNSW& hnsw,
|
|
1003
1022
|
const Node& node,
|
|
@@ -1007,7 +1026,10 @@ std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
|
1007
1026
|
HNSWStats& stats) {
|
|
1008
1027
|
int ndis = 0;
|
|
1009
1028
|
std::priority_queue<Node> top_candidates;
|
|
1029
|
+
reservePriorityQueue(top_candidates, ef);
|
|
1030
|
+
|
|
1010
1031
|
std::priority_queue<Node, std::vector<Node>, std::greater<Node>> candidates;
|
|
1032
|
+
reservePriorityQueue(candidates, ef);
|
|
1011
1033
|
|
|
1012
1034
|
top_candidates.push(node);
|
|
1013
1035
|
candidates.push(node);
|
|
@@ -1037,7 +1059,7 @@ std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
|
1037
1059
|
break;
|
|
1038
1060
|
}
|
|
1039
1061
|
|
|
1040
|
-
|
|
1062
|
+
vt->prefetch(v1);
|
|
1041
1063
|
jmax += 1;
|
|
1042
1064
|
}
|
|
1043
1065
|
|
|
@@ -1059,10 +1081,8 @@ std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
|
1059
1081
|
for (size_t j = begin; j < jmax; j++) {
|
|
1060
1082
|
int v1 = hnsw.neighbors[j];
|
|
1061
1083
|
|
|
1062
|
-
bool vget = vt->get(v1);
|
|
1063
|
-
vt->set(v1);
|
|
1064
1084
|
saved_j[counter] = v1;
|
|
1065
|
-
counter +=
|
|
1085
|
+
counter += vt->set(v1) ? 1 : 0;
|
|
1066
1086
|
|
|
1067
1087
|
if (counter == 4) {
|
|
1068
1088
|
float dis[4];
|
|
@@ -1086,7 +1106,7 @@ std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
|
|
|
1086
1106
|
}
|
|
1087
1107
|
}
|
|
1088
1108
|
|
|
1089
|
-
for (
|
|
1109
|
+
for (int icnt = 0; icnt < counter; icnt++) {
|
|
1090
1110
|
float dis = qdis(saved_j[icnt]);
|
|
1091
1111
|
add_to_heap(saved_j[icnt], dis);
|
|
1092
1112
|
|
|
@@ -1166,7 +1186,7 @@ HNSWStats greedy_update_nearest(
|
|
|
1166
1186
|
}
|
|
1167
1187
|
|
|
1168
1188
|
// process leftovers
|
|
1169
|
-
for (
|
|
1189
|
+
for (int icnt = 0; icnt < n_buffered; icnt++) {
|
|
1170
1190
|
float dis = qdis(buffered_ids[icnt]);
|
|
1171
1191
|
update_with_candidate(buffered_ids[icnt], dis);
|
|
1172
1192
|
}
|
|
@@ -1182,12 +1202,11 @@ HNSWStats greedy_update_nearest(
|
|
|
1182
1202
|
}
|
|
1183
1203
|
|
|
1184
1204
|
namespace {
|
|
1185
|
-
using MinimaxHeap = HNSW::MinimaxHeap;
|
|
1186
1205
|
using Node = HNSW::Node;
|
|
1187
1206
|
using C = HNSW::C;
|
|
1188
1207
|
|
|
1189
1208
|
// just used as a lower bound for the minmaxheap, but it is set for heap search
|
|
1190
|
-
int extract_k_from_ResultHandler(ResultHandler
|
|
1209
|
+
int extract_k_from_ResultHandler(ResultHandler& res) {
|
|
1191
1210
|
using RH = HeapBlockResultHandler<C>;
|
|
1192
1211
|
if (auto hres = dynamic_cast<RH::SingleResultHandler*>(&res)) {
|
|
1193
1212
|
return hres->k;
|
|
@@ -1200,7 +1219,7 @@ int extract_k_from_ResultHandler(ResultHandler<C>& res) {
|
|
|
1200
1219
|
HNSWStats HNSW::search(
|
|
1201
1220
|
DistanceComputer& qdis,
|
|
1202
1221
|
const IndexHNSW* index,
|
|
1203
|
-
ResultHandler
|
|
1222
|
+
ResultHandler& res,
|
|
1204
1223
|
VisitedTable& vt,
|
|
1205
1224
|
const SearchParameters* params) const {
|
|
1206
1225
|
HNSWStats stats;
|
|
@@ -1210,12 +1229,12 @@ HNSWStats HNSW::search(
|
|
|
1210
1229
|
int k = extract_k_from_ResultHandler(res);
|
|
1211
1230
|
|
|
1212
1231
|
bool bounded_queue = this->search_bounded_queue;
|
|
1213
|
-
int
|
|
1232
|
+
int cur_efSearch = this->efSearch;
|
|
1214
1233
|
if (params) {
|
|
1215
1234
|
if (const SearchParametersHNSW* hnsw_params =
|
|
1216
1235
|
dynamic_cast<const SearchParametersHNSW*>(params)) {
|
|
1217
1236
|
bounded_queue = hnsw_params->bounded_queue;
|
|
1218
|
-
|
|
1237
|
+
cur_efSearch = hnsw_params->efSearch;
|
|
1219
1238
|
}
|
|
1220
1239
|
}
|
|
1221
1240
|
|
|
@@ -1229,7 +1248,7 @@ HNSWStats HNSW::search(
|
|
|
1229
1248
|
stats.combine(local_stats);
|
|
1230
1249
|
}
|
|
1231
1250
|
|
|
1232
|
-
int ef = std::max(
|
|
1251
|
+
int ef = std::max(cur_efSearch, k);
|
|
1233
1252
|
if (bounded_queue) { // this is the most common branch, for now we only
|
|
1234
1253
|
// support Panorama search in this branch
|
|
1235
1254
|
MinimaxHeap candidates(ef);
|
|
@@ -1257,7 +1276,7 @@ HNSWStats HNSW::search(
|
|
|
1257
1276
|
search_from_candidate_unbounded(
|
|
1258
1277
|
*this, Node(d_nearest, nearest), qdis, ef, &vt, stats);
|
|
1259
1278
|
|
|
1260
|
-
while (top_candidates.size() > k) {
|
|
1279
|
+
while (top_candidates.size() > static_cast<size_t>(k)) {
|
|
1261
1280
|
top_candidates.pop();
|
|
1262
1281
|
}
|
|
1263
1282
|
|
|
@@ -1277,7 +1296,7 @@ HNSWStats HNSW::search(
|
|
|
1277
1296
|
|
|
1278
1297
|
void HNSW::search_level_0(
|
|
1279
1298
|
DistanceComputer& qdis,
|
|
1280
|
-
ResultHandler
|
|
1299
|
+
ResultHandler& res,
|
|
1281
1300
|
idx_t nprobe,
|
|
1282
1301
|
const storage_idx_t* nearest_i,
|
|
1283
1302
|
const float* nearest_d,
|
|
@@ -1287,11 +1306,11 @@ void HNSW::search_level_0(
|
|
|
1287
1306
|
const SearchParameters* params) const {
|
|
1288
1307
|
const HNSW& hnsw = *this;
|
|
1289
1308
|
|
|
1290
|
-
auto
|
|
1309
|
+
auto cur_efSearch = hnsw.efSearch;
|
|
1291
1310
|
if (params) {
|
|
1292
1311
|
if (const SearchParametersHNSW* hnsw_params =
|
|
1293
1312
|
dynamic_cast<const SearchParametersHNSW*>(params)) {
|
|
1294
|
-
|
|
1313
|
+
cur_efSearch = hnsw_params->efSearch;
|
|
1295
1314
|
}
|
|
1296
1315
|
}
|
|
1297
1316
|
|
|
@@ -1300,7 +1319,7 @@ void HNSW::search_level_0(
|
|
|
1300
1319
|
if (search_type == 1) {
|
|
1301
1320
|
int nres = 0;
|
|
1302
1321
|
|
|
1303
|
-
for (
|
|
1322
|
+
for (idx_t j = 0; j < nprobe; j++) {
|
|
1304
1323
|
storage_idx_t cj = nearest_i[j];
|
|
1305
1324
|
|
|
1306
1325
|
if (cj < 0) {
|
|
@@ -1311,7 +1330,7 @@ void HNSW::search_level_0(
|
|
|
1311
1330
|
continue;
|
|
1312
1331
|
}
|
|
1313
1332
|
|
|
1314
|
-
int candidates_size = std::max(
|
|
1333
|
+
int candidates_size = std::max(cur_efSearch, k);
|
|
1315
1334
|
MinimaxHeap candidates(candidates_size);
|
|
1316
1335
|
|
|
1317
1336
|
candidates.push(cj, nearest_d[j]);
|
|
@@ -1329,11 +1348,11 @@ void HNSW::search_level_0(
|
|
|
1329
1348
|
nres = std::min(nres, candidates_size);
|
|
1330
1349
|
}
|
|
1331
1350
|
} else if (search_type == 2) {
|
|
1332
|
-
int candidates_size = std::max(
|
|
1351
|
+
int candidates_size = std::max(cur_efSearch, int(k));
|
|
1333
1352
|
candidates_size = std::max(candidates_size, int(nprobe));
|
|
1334
1353
|
|
|
1335
1354
|
MinimaxHeap candidates(candidates_size);
|
|
1336
|
-
for (
|
|
1355
|
+
for (idx_t j = 0; j < nprobe; j++) {
|
|
1337
1356
|
storage_idx_t cj = nearest_i[j];
|
|
1338
1357
|
|
|
1339
1358
|
if (cj < 0) {
|
|
@@ -1380,257 +1399,4 @@ void HNSW::permute_entries(const idx_t* map) {
|
|
|
1380
1399
|
neighbors = std::move(new_neighbors);
|
|
1381
1400
|
}
|
|
1382
1401
|
|
|
1383
|
-
/**************************************************************
|
|
1384
|
-
* MinimaxHeap
|
|
1385
|
-
**************************************************************/
|
|
1386
|
-
|
|
1387
|
-
void HNSW::MinimaxHeap::push(storage_idx_t i, float v) {
|
|
1388
|
-
if (k == n) {
|
|
1389
|
-
if (v >= dis[0]) {
|
|
1390
|
-
return;
|
|
1391
|
-
}
|
|
1392
|
-
if (ids[0] != -1) {
|
|
1393
|
-
--nvalid;
|
|
1394
|
-
}
|
|
1395
|
-
faiss::heap_pop<HC>(k--, dis.data(), ids.data());
|
|
1396
|
-
}
|
|
1397
|
-
faiss::heap_push<HC>(++k, dis.data(), ids.data(), v, i);
|
|
1398
|
-
++nvalid;
|
|
1399
|
-
}
|
|
1400
|
-
|
|
1401
|
-
float HNSW::MinimaxHeap::max() const {
|
|
1402
|
-
return dis[0];
|
|
1403
|
-
}
|
|
1404
|
-
|
|
1405
|
-
int HNSW::MinimaxHeap::size() const {
|
|
1406
|
-
return nvalid;
|
|
1407
|
-
}
|
|
1408
|
-
|
|
1409
|
-
void HNSW::MinimaxHeap::clear() {
|
|
1410
|
-
nvalid = k = 0;
|
|
1411
|
-
}
|
|
1412
|
-
|
|
1413
|
-
#ifdef __AVX512F__
|
|
1414
|
-
|
|
1415
|
-
int HNSW::MinimaxHeap::pop_min(float* vmin_out) {
|
|
1416
|
-
assert(k > 0);
|
|
1417
|
-
static_assert(
|
|
1418
|
-
std::is_same<storage_idx_t, int32_t>::value,
|
|
1419
|
-
"This code expects storage_idx_t to be int32_t");
|
|
1420
|
-
|
|
1421
|
-
int32_t min_idx = -1;
|
|
1422
|
-
float min_dis = std::numeric_limits<float>::infinity();
|
|
1423
|
-
|
|
1424
|
-
__m512i min_indices = _mm512_set1_epi32(-1);
|
|
1425
|
-
__m512 min_distances =
|
|
1426
|
-
_mm512_set1_ps(std::numeric_limits<float>::infinity());
|
|
1427
|
-
__m512i current_indices = _mm512_setr_epi32(
|
|
1428
|
-
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
|
1429
|
-
__m512i offset = _mm512_set1_epi32(16);
|
|
1430
|
-
|
|
1431
|
-
// The following loop tracks the rightmost index with the min distance.
|
|
1432
|
-
// -1 index values are ignored.
|
|
1433
|
-
const int k16 = (k / 16) * 16;
|
|
1434
|
-
for (size_t iii = 0; iii < k16; iii += 16) {
|
|
1435
|
-
__m512i indices =
|
|
1436
|
-
_mm512_loadu_si512((const __m512i*)(ids.data() + iii));
|
|
1437
|
-
__m512 distances = _mm512_loadu_ps(dis.data() + iii);
|
|
1438
|
-
|
|
1439
|
-
// This mask filters out -1 values among indices.
|
|
1440
|
-
__mmask16 m1mask =
|
|
1441
|
-
_mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
|
|
1442
|
-
|
|
1443
|
-
__mmask16 dmask =
|
|
1444
|
-
_mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
|
|
1445
|
-
__mmask16 finalmask = m1mask | dmask;
|
|
1446
|
-
|
|
1447
|
-
const __m512i min_indices_new = _mm512_mask_blend_epi32(
|
|
1448
|
-
finalmask, current_indices, min_indices);
|
|
1449
|
-
const __m512 min_distances_new =
|
|
1450
|
-
_mm512_mask_blend_ps(finalmask, distances, min_distances);
|
|
1451
|
-
|
|
1452
|
-
min_indices = min_indices_new;
|
|
1453
|
-
min_distances = min_distances_new;
|
|
1454
|
-
|
|
1455
|
-
current_indices = _mm512_add_epi32(current_indices, offset);
|
|
1456
|
-
}
|
|
1457
|
-
|
|
1458
|
-
// leftovers
|
|
1459
|
-
if (k16 != k) {
|
|
1460
|
-
const __mmask16 kmask = (1 << (k - k16)) - 1;
|
|
1461
|
-
|
|
1462
|
-
__m512i indices = _mm512_mask_loadu_epi32(
|
|
1463
|
-
_mm512_set1_epi32(-1), kmask, ids.data() + k16);
|
|
1464
|
-
__m512 distances = _mm512_maskz_loadu_ps(kmask, dis.data() + k16);
|
|
1465
|
-
|
|
1466
|
-
// This mask filters out -1 values among indices.
|
|
1467
|
-
__mmask16 m1mask =
|
|
1468
|
-
_mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
|
|
1469
|
-
|
|
1470
|
-
__mmask16 dmask =
|
|
1471
|
-
_mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
|
|
1472
|
-
__mmask16 finalmask = m1mask | dmask;
|
|
1473
|
-
|
|
1474
|
-
const __m512i min_indices_new = _mm512_mask_blend_epi32(
|
|
1475
|
-
finalmask, current_indices, min_indices);
|
|
1476
|
-
const __m512 min_distances_new =
|
|
1477
|
-
_mm512_mask_blend_ps(finalmask, distances, min_distances);
|
|
1478
|
-
|
|
1479
|
-
min_indices = min_indices_new;
|
|
1480
|
-
min_distances = min_distances_new;
|
|
1481
|
-
}
|
|
1482
|
-
|
|
1483
|
-
// grab min distance
|
|
1484
|
-
min_dis = _mm512_reduce_min_ps(min_distances);
|
|
1485
|
-
// blend
|
|
1486
|
-
__mmask16 mindmask =
|
|
1487
|
-
_mm512_cmpeq_ps_mask(min_distances, _mm512_set1_ps(min_dis));
|
|
1488
|
-
// pick the max one
|
|
1489
|
-
min_idx = _mm512_mask_reduce_max_epi32(mindmask, min_indices);
|
|
1490
|
-
|
|
1491
|
-
if (min_idx == -1) {
|
|
1492
|
-
return -1;
|
|
1493
|
-
}
|
|
1494
|
-
|
|
1495
|
-
if (vmin_out) {
|
|
1496
|
-
*vmin_out = min_dis;
|
|
1497
|
-
}
|
|
1498
|
-
int ret = ids[min_idx];
|
|
1499
|
-
ids[min_idx] = -1;
|
|
1500
|
-
--nvalid;
|
|
1501
|
-
return ret;
|
|
1502
|
-
}
|
|
1503
|
-
|
|
1504
|
-
#elif __AVX2__
|
|
1505
|
-
|
|
1506
|
-
int HNSW::MinimaxHeap::pop_min(float* vmin_out) {
|
|
1507
|
-
assert(k > 0);
|
|
1508
|
-
static_assert(
|
|
1509
|
-
std::is_same<storage_idx_t, int32_t>::value,
|
|
1510
|
-
"This code expects storage_idx_t to be int32_t");
|
|
1511
|
-
|
|
1512
|
-
int32_t min_idx = -1;
|
|
1513
|
-
float min_dis = std::numeric_limits<float>::infinity();
|
|
1514
|
-
|
|
1515
|
-
size_t iii = 0;
|
|
1516
|
-
|
|
1517
|
-
__m256i min_indices = _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, -1);
|
|
1518
|
-
__m256 min_distances =
|
|
1519
|
-
_mm256_set1_ps(std::numeric_limits<float>::infinity());
|
|
1520
|
-
__m256i current_indices = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
|
1521
|
-
__m256i offset = _mm256_set1_epi32(8);
|
|
1522
|
-
|
|
1523
|
-
// The baseline version is available in non-AVX2 branch.
|
|
1524
|
-
|
|
1525
|
-
// The following loop tracks the rightmost index with the min distance.
|
|
1526
|
-
// -1 index values are ignored.
|
|
1527
|
-
const int k8 = (k / 8) * 8;
|
|
1528
|
-
for (; iii < k8; iii += 8) {
|
|
1529
|
-
__m256i indices =
|
|
1530
|
-
_mm256_loadu_si256((const __m256i*)(ids.data() + iii));
|
|
1531
|
-
__m256 distances = _mm256_loadu_ps(dis.data() + iii);
|
|
1532
|
-
|
|
1533
|
-
// This mask filters out -1 values among indices.
|
|
1534
|
-
__m256i m1mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), indices);
|
|
1535
|
-
|
|
1536
|
-
__m256i dmask = _mm256_castps_si256(
|
|
1537
|
-
_mm256_cmp_ps(min_distances, distances, _CMP_LT_OS));
|
|
1538
|
-
__m256 finalmask = _mm256_castsi256_ps(_mm256_or_si256(m1mask, dmask));
|
|
1539
|
-
|
|
1540
|
-
const __m256i min_indices_new = _mm256_castps_si256(_mm256_blendv_ps(
|
|
1541
|
-
_mm256_castsi256_ps(current_indices),
|
|
1542
|
-
_mm256_castsi256_ps(min_indices),
|
|
1543
|
-
finalmask));
|
|
1544
|
-
|
|
1545
|
-
const __m256 min_distances_new =
|
|
1546
|
-
_mm256_blendv_ps(distances, min_distances, finalmask);
|
|
1547
|
-
|
|
1548
|
-
min_indices = min_indices_new;
|
|
1549
|
-
min_distances = min_distances_new;
|
|
1550
|
-
|
|
1551
|
-
current_indices = _mm256_add_epi32(current_indices, offset);
|
|
1552
|
-
}
|
|
1553
|
-
|
|
1554
|
-
// Vectorizing is doable, but is not practical
|
|
1555
|
-
int32_t vidx8[8];
|
|
1556
|
-
float vdis8[8];
|
|
1557
|
-
_mm256_storeu_ps(vdis8, min_distances);
|
|
1558
|
-
_mm256_storeu_si256((__m256i*)vidx8, min_indices);
|
|
1559
|
-
|
|
1560
|
-
for (size_t j = 0; j < 8; j++) {
|
|
1561
|
-
if (min_dis > vdis8[j] || (min_dis == vdis8[j] && min_idx < vidx8[j])) {
|
|
1562
|
-
min_idx = vidx8[j];
|
|
1563
|
-
min_dis = vdis8[j];
|
|
1564
|
-
}
|
|
1565
|
-
}
|
|
1566
|
-
|
|
1567
|
-
// process last values. Vectorizing is doable, but is not practical
|
|
1568
|
-
for (; iii < k; iii++) {
|
|
1569
|
-
if (ids[iii] != -1 && dis[iii] <= min_dis) {
|
|
1570
|
-
min_dis = dis[iii];
|
|
1571
|
-
min_idx = iii;
|
|
1572
|
-
}
|
|
1573
|
-
}
|
|
1574
|
-
|
|
1575
|
-
if (min_idx == -1) {
|
|
1576
|
-
return -1;
|
|
1577
|
-
}
|
|
1578
|
-
|
|
1579
|
-
if (vmin_out) {
|
|
1580
|
-
*vmin_out = min_dis;
|
|
1581
|
-
}
|
|
1582
|
-
int ret = ids[min_idx];
|
|
1583
|
-
ids[min_idx] = -1;
|
|
1584
|
-
--nvalid;
|
|
1585
|
-
return ret;
|
|
1586
|
-
}
|
|
1587
|
-
|
|
1588
|
-
#else
|
|
1589
|
-
|
|
1590
|
-
// baseline non-vectorized version
|
|
1591
|
-
int HNSW::MinimaxHeap::pop_min(float* vmin_out) {
|
|
1592
|
-
assert(k > 0);
|
|
1593
|
-
// returns min. This is an O(n) operation
|
|
1594
|
-
int i = k - 1;
|
|
1595
|
-
while (i >= 0) {
|
|
1596
|
-
if (ids[i] != -1) {
|
|
1597
|
-
break;
|
|
1598
|
-
}
|
|
1599
|
-
i--;
|
|
1600
|
-
}
|
|
1601
|
-
if (i == -1) {
|
|
1602
|
-
return -1;
|
|
1603
|
-
}
|
|
1604
|
-
int imin = i;
|
|
1605
|
-
float vmin = dis[i];
|
|
1606
|
-
i--;
|
|
1607
|
-
while (i >= 0) {
|
|
1608
|
-
if (ids[i] != -1 && dis[i] < vmin) {
|
|
1609
|
-
vmin = dis[i];
|
|
1610
|
-
imin = i;
|
|
1611
|
-
}
|
|
1612
|
-
i--;
|
|
1613
|
-
}
|
|
1614
|
-
if (vmin_out) {
|
|
1615
|
-
*vmin_out = vmin;
|
|
1616
|
-
}
|
|
1617
|
-
int ret = ids[imin];
|
|
1618
|
-
ids[imin] = -1;
|
|
1619
|
-
--nvalid;
|
|
1620
|
-
|
|
1621
|
-
return ret;
|
|
1622
|
-
}
|
|
1623
|
-
#endif
|
|
1624
|
-
|
|
1625
|
-
int HNSW::MinimaxHeap::count_below(float thresh) {
|
|
1626
|
-
int n_below = 0;
|
|
1627
|
-
for (int i = 0; i < k; i++) {
|
|
1628
|
-
if (dis[i] < thresh) {
|
|
1629
|
-
n_below++;
|
|
1630
|
-
}
|
|
1631
|
-
}
|
|
1632
|
-
|
|
1633
|
-
return n_below;
|
|
1634
|
-
}
|
|
1635
|
-
|
|
1636
1402
|
} // namespace faiss
|