faiss 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +4 -4
- data/ext/faiss/index.cpp +63 -45
- data/ext/faiss/index_binary.cpp +37 -27
- data/ext/faiss/kmeans.cpp +9 -8
- data/ext/faiss/pca_matrix.cpp +9 -7
- data/ext/faiss/product_quantizer.cpp +13 -11
- data/ext/faiss/utils.cpp +4 -2
- data/ext/faiss/utils.h +4 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +214 -82
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +97 -249
- data/vendor/faiss/faiss/Clustering.h +18 -0
- data/vendor/faiss/faiss/IVFlib.cpp +67 -44
- data/vendor/faiss/faiss/Index.cpp +25 -12
- data/vendor/faiss/faiss/Index.h +26 -4
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
- data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
- data/vendor/faiss/faiss/IndexFastScan.h +35 -24
- data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
- data/vendor/faiss/faiss/IndexFlat.h +32 -14
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
- data/vendor/faiss/faiss/IndexHNSW.h +30 -14
- data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
- data/vendor/faiss/faiss/IndexIVF.h +47 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
- data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
- data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
- data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
- data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
- data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +13 -13
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +29 -6
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
- data/vendor/faiss/faiss/VectorTransform.h +39 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +55 -51
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
- data/vendor/faiss/faiss/impl/HNSW.h +21 -40
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
- data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
- data/vendor/faiss/faiss/impl/NSG.h +20 -10
- data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
- data/vendor/faiss/faiss/impl/Panorama.h +265 -78
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
- data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +115 -28
- data/vendor/faiss/faiss/index_io.h +53 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +507 -559
- data/vendor/faiss/faiss/utils/distances.h +118 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
- data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
- data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +21 -14
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +156 -42
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include <faiss/IndexHNSW.h>
|
|
9
9
|
|
|
10
10
|
#include <omp.h>
|
|
11
|
+
#include <atomic>
|
|
11
12
|
#include <cinttypes>
|
|
12
13
|
#include <cstdio>
|
|
13
14
|
#include <cstdlib>
|
|
@@ -26,13 +27,15 @@
|
|
|
26
27
|
#include <faiss/IndexIVFPQ.h>
|
|
27
28
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
28
29
|
#include <faiss/impl/FaissAssert.h>
|
|
30
|
+
#include <faiss/impl/FaissException.h>
|
|
29
31
|
#include <faiss/impl/ResultHandler.h>
|
|
32
|
+
#include <faiss/impl/VisitedTable.h>
|
|
33
|
+
#include <faiss/impl/hnsw/MinimaxHeap.h>
|
|
30
34
|
#include <faiss/utils/random.h>
|
|
31
35
|
#include <faiss/utils/sorting.h>
|
|
32
36
|
|
|
33
37
|
namespace faiss {
|
|
34
38
|
|
|
35
|
-
using MinimaxHeap = HNSW::MinimaxHeap;
|
|
36
39
|
using storage_idx_t = HNSW::storage_idx_t;
|
|
37
40
|
using NodeDistFarther = HNSW::NodeDistFarther;
|
|
38
41
|
|
|
@@ -81,10 +84,8 @@ void hnsw_add_vertices(
|
|
|
81
84
|
printf(" max_level = %d\n", max_level);
|
|
82
85
|
}
|
|
83
86
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
omp_init_lock(&locks[i]);
|
|
87
|
-
}
|
|
87
|
+
auto& locks = index_hnsw.locks;
|
|
88
|
+
locks.prepare(ntotal);
|
|
88
89
|
|
|
89
90
|
// add vectors from highest to lowest level
|
|
90
91
|
std::vector<int> hist;
|
|
@@ -93,10 +94,10 @@ void hnsw_add_vertices(
|
|
|
93
94
|
{ // make buckets with vectors of the same level
|
|
94
95
|
|
|
95
96
|
// build histogram
|
|
96
|
-
for (
|
|
97
|
-
storage_idx_t pt_id = i + n0;
|
|
97
|
+
for (size_t i = 0; i < n; i++) {
|
|
98
|
+
storage_idx_t pt_id = static_cast<storage_idx_t>(i + n0);
|
|
98
99
|
int pt_level = hnsw.levels[pt_id] - 1;
|
|
99
|
-
while (pt_level >= hist.size()) {
|
|
100
|
+
while (pt_level >= static_cast<int>(hist.size())) {
|
|
100
101
|
hist.push_back(0);
|
|
101
102
|
}
|
|
102
103
|
hist[pt_level]++;
|
|
@@ -104,13 +105,13 @@ void hnsw_add_vertices(
|
|
|
104
105
|
|
|
105
106
|
// accumulate
|
|
106
107
|
std::vector<int> offsets(hist.size() + 1, 0);
|
|
107
|
-
for (
|
|
108
|
+
for (size_t i = 0; i < hist.size() - 1; i++) {
|
|
108
109
|
offsets[i + 1] = offsets[i] + hist[i];
|
|
109
110
|
}
|
|
110
111
|
|
|
111
112
|
// bucket sort
|
|
112
|
-
for (
|
|
113
|
-
storage_idx_t pt_id = i + n0;
|
|
113
|
+
for (size_t i = 0; i < n; i++) {
|
|
114
|
+
storage_idx_t pt_id = static_cast<storage_idx_t>(i + n0);
|
|
114
115
|
int pt_level = hnsw.levels[pt_id] - 1;
|
|
115
116
|
order[offsets[pt_level]++] = pt_id;
|
|
116
117
|
}
|
|
@@ -122,39 +123,41 @@ void hnsw_add_vertices(
|
|
|
122
123
|
{ // perform add
|
|
123
124
|
RandomGenerator rng2(789);
|
|
124
125
|
|
|
125
|
-
|
|
126
|
+
size_t i1 = static_cast<int>(n);
|
|
126
127
|
|
|
127
|
-
for (int pt_level = hist.size() - 1;
|
|
128
|
+
for (int pt_level = static_cast<int>(hist.size()) - 1;
|
|
128
129
|
pt_level >= int(!index_hnsw.init_level0);
|
|
129
130
|
pt_level--) {
|
|
130
|
-
|
|
131
|
+
size_t i0 = i1 - hist[pt_level];
|
|
131
132
|
|
|
132
133
|
if (verbose) {
|
|
133
|
-
printf("Adding %
|
|
134
|
+
printf("Adding %zu elements at level %d\n", i1 - i0, pt_level);
|
|
134
135
|
}
|
|
135
136
|
|
|
136
137
|
// random permutation to get rid of dataset order bias
|
|
137
|
-
for (
|
|
138
|
-
std::swap(
|
|
138
|
+
for (size_t j = i0; j < i1; j++) {
|
|
139
|
+
std::swap(
|
|
140
|
+
order[j],
|
|
141
|
+
order[j + rng2.rand_int(static_cast<int>(i1 - j))]);
|
|
139
142
|
}
|
|
140
143
|
|
|
141
144
|
bool interrupt = false;
|
|
142
145
|
|
|
143
146
|
#pragma omp parallel if (i1 > i0 + 100)
|
|
144
147
|
{
|
|
145
|
-
VisitedTable vt(ntotal);
|
|
148
|
+
VisitedTable vt(ntotal, hnsw.use_visited_hashset);
|
|
146
149
|
|
|
147
150
|
std::unique_ptr<DistanceComputer> dis(
|
|
148
151
|
storage_distance_computer(index_hnsw.storage));
|
|
149
|
-
|
|
150
|
-
|
|
152
|
+
bool do_display = verbose && omp_get_thread_num() == 0;
|
|
153
|
+
size_t prev_display = 0;
|
|
151
154
|
size_t counter = 0;
|
|
152
155
|
|
|
153
156
|
// here we should do schedule(dynamic) but this segfaults for
|
|
154
157
|
// some versions of LLVM. The performance impact should not be
|
|
155
158
|
// too large when (i1 - i0) / num_threads >> 1
|
|
156
159
|
#pragma omp for schedule(static)
|
|
157
|
-
for (
|
|
160
|
+
for (int64_t i = i0; i < i1; i++) {
|
|
158
161
|
storage_idx_t pt_id = order[i];
|
|
159
162
|
dis->set_query(x + (pt_id - n0) * d);
|
|
160
163
|
|
|
@@ -171,9 +174,9 @@ void hnsw_add_vertices(
|
|
|
171
174
|
vt,
|
|
172
175
|
index_hnsw.keep_max_size_level0 && (pt_level == 0));
|
|
173
176
|
|
|
174
|
-
if (
|
|
177
|
+
if (do_display && i - i0 > prev_display + 10000) {
|
|
175
178
|
prev_display = i - i0;
|
|
176
|
-
printf(" %
|
|
179
|
+
printf(" %zu / %zu\r", i - i0, i1 - i0);
|
|
177
180
|
fflush(stdout);
|
|
178
181
|
}
|
|
179
182
|
if (counter % check_period == 0) {
|
|
@@ -198,9 +201,8 @@ void hnsw_add_vertices(
|
|
|
198
201
|
if (verbose) {
|
|
199
202
|
printf("Done in %.3f ms\n", getmillisecs() - t0);
|
|
200
203
|
}
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
omp_destroy_lock(&locks[i]);
|
|
204
|
+
if (!index_hnsw.retain_locks) {
|
|
205
|
+
locks.clear();
|
|
204
206
|
}
|
|
205
207
|
}
|
|
206
208
|
|
|
@@ -210,11 +212,13 @@ void hnsw_add_vertices(
|
|
|
210
212
|
* IndexHNSW implementation
|
|
211
213
|
**************************************************************/
|
|
212
214
|
|
|
213
|
-
IndexHNSW::IndexHNSW(int
|
|
214
|
-
: Index(
|
|
215
|
+
IndexHNSW::IndexHNSW(int d_in, int M, MetricType metric)
|
|
216
|
+
: Index(d_in, metric), hnsw(M) {}
|
|
215
217
|
|
|
216
|
-
IndexHNSW::IndexHNSW(Index*
|
|
217
|
-
: Index(
|
|
218
|
+
IndexHNSW::IndexHNSW(Index* storage_in, int M)
|
|
219
|
+
: Index(storage_in->d, storage_in->metric_type),
|
|
220
|
+
hnsw(M),
|
|
221
|
+
storage(storage_in) {
|
|
218
222
|
metric_arg = storage->metric_arg;
|
|
219
223
|
}
|
|
220
224
|
|
|
@@ -262,28 +266,48 @@ void hnsw_search(
|
|
|
262
266
|
|
|
263
267
|
for (idx_t i0 = 0; i0 < n; i0 += check_period) {
|
|
264
268
|
idx_t i1 = std::min(i0 + check_period, n);
|
|
269
|
+
std::exception_ptr ex;
|
|
270
|
+
std::atomic<bool> interrupt{false};
|
|
265
271
|
|
|
266
272
|
#pragma omp parallel if (i1 - i0 > 1)
|
|
267
273
|
{
|
|
268
|
-
VisitedTable vt
|
|
269
|
-
typename BlockResultHandler::SingleResultHandler
|
|
270
|
-
|
|
271
|
-
std::unique_ptr<DistanceComputer> dis
|
|
272
|
-
|
|
274
|
+
std::unique_ptr<VisitedTable> vt;
|
|
275
|
+
std::unique_ptr<typename BlockResultHandler::SingleResultHandler>
|
|
276
|
+
res;
|
|
277
|
+
std::unique_ptr<DistanceComputer> dis;
|
|
278
|
+
try {
|
|
279
|
+
vt = std::make_unique<VisitedTable>(
|
|
280
|
+
index->ntotal, hnsw.use_visited_hashset);
|
|
281
|
+
res = std::make_unique<
|
|
282
|
+
typename BlockResultHandler::SingleResultHandler>(bres);
|
|
283
|
+
dis.reset(storage_distance_computer(index->storage));
|
|
284
|
+
} catch (...) {
|
|
285
|
+
omp_capture_exception(ex, [&] { interrupt = true; });
|
|
286
|
+
}
|
|
273
287
|
|
|
274
288
|
#pragma omp for reduction(+ : n1, n2, ndis, nhops) schedule(guided)
|
|
275
289
|
for (idx_t i = i0; i < i1; i++) {
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
290
|
+
if (interrupt.load(std::memory_order_relaxed)) {
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
try {
|
|
294
|
+
res->begin(i);
|
|
295
|
+
dis->set_query(x + i * index->d);
|
|
296
|
+
|
|
297
|
+
HNSWStats stats =
|
|
298
|
+
hnsw.search(*dis, index, *res, *vt, params);
|
|
299
|
+
n1 += stats.n1;
|
|
300
|
+
n2 += stats.n2;
|
|
301
|
+
ndis += stats.ndis;
|
|
302
|
+
nhops += stats.nhops;
|
|
303
|
+
res->end();
|
|
304
|
+
vt->advance();
|
|
305
|
+
} catch (...) {
|
|
306
|
+
omp_capture_exception(ex, [&] { interrupt = true; });
|
|
307
|
+
}
|
|
285
308
|
}
|
|
286
309
|
}
|
|
310
|
+
omp_rethrow_if_exception(ex);
|
|
287
311
|
InterruptCallback::check();
|
|
288
312
|
}
|
|
289
313
|
|
|
@@ -308,7 +332,7 @@ void IndexHNSW::search(
|
|
|
308
332
|
|
|
309
333
|
if (is_similarity_metric(this->metric_type)) {
|
|
310
334
|
// we need to revert the negated distances
|
|
311
|
-
for (
|
|
335
|
+
for (idx_t i = 0; i < k * n; i++) {
|
|
312
336
|
distances[i] = -distances[i];
|
|
313
337
|
}
|
|
314
338
|
}
|
|
@@ -333,20 +357,35 @@ void IndexHNSW::range_search(
|
|
|
333
357
|
}
|
|
334
358
|
}
|
|
335
359
|
|
|
360
|
+
void IndexHNSW::search1(
|
|
361
|
+
const float* x,
|
|
362
|
+
ResultHandler& handler,
|
|
363
|
+
SearchParameters* params) const {
|
|
364
|
+
SingleQueryBlockResultHandler<HNSW::C, false> bres(handler);
|
|
365
|
+
hnsw_search(this, 1, x, bres, params);
|
|
366
|
+
}
|
|
367
|
+
|
|
336
368
|
void IndexHNSW::add(idx_t n, const float* x) {
|
|
337
369
|
FAISS_THROW_IF_NOT_MSG(
|
|
338
370
|
storage,
|
|
339
371
|
"Please use IndexHNSWFlat (or variants) instead of IndexHNSW directly");
|
|
340
372
|
FAISS_THROW_IF_NOT(is_trained);
|
|
341
|
-
|
|
373
|
+
size_t n0 = ntotal;
|
|
342
374
|
storage->add(n, x);
|
|
343
375
|
ntotal = storage->ntotal;
|
|
344
376
|
|
|
345
|
-
hnsw_add_vertices(
|
|
377
|
+
hnsw_add_vertices(
|
|
378
|
+
*this,
|
|
379
|
+
n0,
|
|
380
|
+
n,
|
|
381
|
+
x,
|
|
382
|
+
verbose,
|
|
383
|
+
hnsw.levels.size() == static_cast<size_t>(ntotal));
|
|
346
384
|
}
|
|
347
385
|
|
|
348
386
|
void IndexHNSW::reset() {
|
|
349
387
|
hnsw.reset();
|
|
388
|
+
locks.clear();
|
|
350
389
|
storage->reset();
|
|
351
390
|
ntotal = 0;
|
|
352
391
|
}
|
|
@@ -418,42 +457,59 @@ void IndexHNSW::search_level_0(
|
|
|
418
457
|
FAISS_THROW_IF_NOT(k > 0);
|
|
419
458
|
FAISS_THROW_IF_NOT(nprobe > 0);
|
|
420
459
|
|
|
421
|
-
|
|
460
|
+
size_t hnsw_ntotal = hnsw.levels.size();
|
|
422
461
|
|
|
423
462
|
using RH = HeapBlockResultHandler<HNSW::C>;
|
|
424
463
|
RH bres(n, distances, labels, k);
|
|
425
464
|
|
|
465
|
+
std::exception_ptr ex;
|
|
466
|
+
std::atomic<bool> interrupt{false};
|
|
426
467
|
#pragma omp parallel
|
|
427
468
|
{
|
|
428
|
-
std::unique_ptr<DistanceComputer> qdis
|
|
429
|
-
storage_distance_computer(storage));
|
|
469
|
+
std::unique_ptr<DistanceComputer> qdis;
|
|
430
470
|
HNSWStats search_stats;
|
|
431
|
-
VisitedTable vt
|
|
432
|
-
RH::SingleResultHandler res
|
|
471
|
+
std::unique_ptr<VisitedTable> vt;
|
|
472
|
+
std::unique_ptr<RH::SingleResultHandler> res;
|
|
473
|
+
try {
|
|
474
|
+
qdis.reset(storage_distance_computer(storage));
|
|
475
|
+
vt = std::make_unique<VisitedTable>(
|
|
476
|
+
hnsw_ntotal, hnsw.use_visited_hashset);
|
|
477
|
+
res = std::make_unique<RH::SingleResultHandler>(bres);
|
|
478
|
+
} catch (...) {
|
|
479
|
+
omp_capture_exception(ex, [&] { interrupt = true; });
|
|
480
|
+
}
|
|
433
481
|
|
|
434
482
|
#pragma omp for
|
|
435
483
|
for (idx_t i = 0; i < n; i++) {
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
484
|
+
if (interrupt.load(std::memory_order_relaxed)) {
|
|
485
|
+
continue;
|
|
486
|
+
}
|
|
487
|
+
try {
|
|
488
|
+
res->begin(i);
|
|
489
|
+
qdis->set_query(x + i * d);
|
|
490
|
+
|
|
491
|
+
hnsw.search_level_0(
|
|
492
|
+
*qdis.get(),
|
|
493
|
+
*res,
|
|
494
|
+
nprobe,
|
|
495
|
+
nearest + i * nprobe,
|
|
496
|
+
nearest_d + i * nprobe,
|
|
497
|
+
search_type,
|
|
498
|
+
search_stats,
|
|
499
|
+
*vt,
|
|
500
|
+
params);
|
|
501
|
+
res->end();
|
|
502
|
+
vt->advance();
|
|
503
|
+
} catch (...) {
|
|
504
|
+
omp_capture_exception(ex, [&] { interrupt = true; });
|
|
505
|
+
}
|
|
451
506
|
}
|
|
452
507
|
#pragma omp critical
|
|
453
508
|
{
|
|
454
509
|
hnsw_stats.combine(search_stats);
|
|
455
510
|
}
|
|
456
511
|
}
|
|
512
|
+
omp_rethrow_if_exception(ex);
|
|
457
513
|
if (is_similarity_metric(this->metric_type)) {
|
|
458
514
|
// we need to revert the negated distances
|
|
459
515
|
#pragma omp parallel for
|
|
@@ -478,8 +534,8 @@ void IndexHNSW::init_level_0_from_knngraph(
|
|
|
478
534
|
|
|
479
535
|
std::priority_queue<NodeDistFarther> initial_list;
|
|
480
536
|
|
|
481
|
-
for (
|
|
482
|
-
int v1 = I[i * k + j];
|
|
537
|
+
for (int j = 0; j < k; j++) {
|
|
538
|
+
int v1 = static_cast<int>(I[i * k + j]);
|
|
483
539
|
if (v1 == i) {
|
|
484
540
|
continue;
|
|
485
541
|
}
|
|
@@ -509,14 +565,11 @@ void IndexHNSW::init_level_0_from_entry_points(
|
|
|
509
565
|
int n,
|
|
510
566
|
const storage_idx_t* points,
|
|
511
567
|
const storage_idx_t* nearests) {
|
|
512
|
-
|
|
513
|
-
for (int i = 0; i < ntotal; i++) {
|
|
514
|
-
omp_init_lock(&locks[i]);
|
|
515
|
-
}
|
|
568
|
+
locks.prepare(ntotal);
|
|
516
569
|
|
|
517
570
|
#pragma omp parallel
|
|
518
571
|
{
|
|
519
|
-
VisitedTable vt(ntotal);
|
|
572
|
+
VisitedTable vt(ntotal, hnsw.use_visited_hashset);
|
|
520
573
|
|
|
521
574
|
std::unique_ptr<DistanceComputer> dis(
|
|
522
575
|
storage_distance_computer(storage));
|
|
@@ -530,7 +583,7 @@ void IndexHNSW::init_level_0_from_entry_points(
|
|
|
530
583
|
dis->set_query(vec.data());
|
|
531
584
|
|
|
532
585
|
hnsw.add_links_starting_from(
|
|
533
|
-
*dis, pt_id, nearest, (*dis)(nearest), 0, locks
|
|
586
|
+
*dis, pt_id, nearest, (*dis)(nearest), 0, locks, vt);
|
|
534
587
|
|
|
535
588
|
if (verbose && i % 10000 == 0) {
|
|
536
589
|
printf(" %d / %d\r", i, n);
|
|
@@ -542,8 +595,8 @@ void IndexHNSW::init_level_0_from_entry_points(
|
|
|
542
595
|
printf("\n");
|
|
543
596
|
}
|
|
544
597
|
|
|
545
|
-
|
|
546
|
-
|
|
598
|
+
if (!retain_locks) {
|
|
599
|
+
locks.clear();
|
|
547
600
|
}
|
|
548
601
|
}
|
|
549
602
|
|
|
@@ -586,7 +639,7 @@ void IndexHNSW::link_singletons() {
|
|
|
586
639
|
|
|
587
640
|
std::vector<bool> seen(ntotal);
|
|
588
641
|
|
|
589
|
-
for (
|
|
642
|
+
for (idx_t i = 0; i < ntotal; i++) {
|
|
590
643
|
size_t begin, end;
|
|
591
644
|
hnsw.neighbor_range(i, 0, &begin, &end);
|
|
592
645
|
for (size_t j = begin; j < end; j++) {
|
|
@@ -615,8 +668,8 @@ void IndexHNSW::link_singletons() {
|
|
|
615
668
|
n_sing_l1);
|
|
616
669
|
|
|
617
670
|
std::vector<float> recons(singletons.size() * d);
|
|
618
|
-
for (
|
|
619
|
-
FAISS_ASSERT(
|
|
671
|
+
for (size_t i = 0; i < singletons.size(); i++) {
|
|
672
|
+
FAISS_ASSERT(false); // not implemented
|
|
620
673
|
}
|
|
621
674
|
}
|
|
622
675
|
|
|
@@ -640,10 +693,10 @@ IndexHNSWFlat::IndexHNSWFlat() {
|
|
|
640
693
|
is_trained = true;
|
|
641
694
|
}
|
|
642
695
|
|
|
643
|
-
IndexHNSWFlat::IndexHNSWFlat(int
|
|
696
|
+
IndexHNSWFlat::IndexHNSWFlat(int d_in, int M, MetricType metric)
|
|
644
697
|
: IndexHNSW(
|
|
645
|
-
(metric == METRIC_L2) ? new IndexFlatL2(
|
|
646
|
-
: new IndexFlat(
|
|
698
|
+
(metric == METRIC_L2) ? new IndexFlatL2(d_in)
|
|
699
|
+
: new IndexFlat(d_in, metric),
|
|
647
700
|
M) {
|
|
648
701
|
own_fields = true;
|
|
649
702
|
is_trained = true;
|
|
@@ -653,44 +706,21 @@ IndexHNSWFlat::IndexHNSWFlat(int d, int M, MetricType metric)
|
|
|
653
706
|
* IndexHNSWFlatPanorama implementation
|
|
654
707
|
**************************************************************/
|
|
655
708
|
|
|
656
|
-
void IndexHNSWFlatPanorama::compute_cum_sums(
|
|
657
|
-
const float* x,
|
|
658
|
-
float* dst_cum_sums,
|
|
659
|
-
int d,
|
|
660
|
-
int num_panorama_levels,
|
|
661
|
-
int panorama_level_width) {
|
|
662
|
-
// Iterate backwards through levels, accumulating sum as we go.
|
|
663
|
-
// This avoids computing the suffix sum for each vector, which takes
|
|
664
|
-
// extra memory.
|
|
665
|
-
|
|
666
|
-
float sum = 0.0f;
|
|
667
|
-
dst_cum_sums[num_panorama_levels] = 0.0f;
|
|
668
|
-
for (int level = num_panorama_levels - 1; level >= 0; level--) {
|
|
669
|
-
int start_idx = level * panorama_level_width;
|
|
670
|
-
int end_idx = std::min(start_idx + panorama_level_width, d);
|
|
671
|
-
for (int j = start_idx; j < end_idx; j++) {
|
|
672
|
-
sum += x[j] * x[j];
|
|
673
|
-
}
|
|
674
|
-
dst_cum_sums[level] = std::sqrt(sum);
|
|
675
|
-
}
|
|
676
|
-
}
|
|
677
|
-
|
|
678
709
|
IndexHNSWFlatPanorama::IndexHNSWFlatPanorama()
|
|
679
710
|
: IndexHNSWFlat(),
|
|
680
711
|
cum_sums(),
|
|
681
|
-
|
|
712
|
+
pano(sizeof(float), 1, 1),
|
|
682
713
|
num_panorama_levels(0) {}
|
|
683
714
|
|
|
684
715
|
IndexHNSWFlatPanorama::IndexHNSWFlatPanorama(
|
|
685
|
-
int
|
|
716
|
+
int d_in,
|
|
686
717
|
int M,
|
|
687
|
-
int
|
|
718
|
+
int num_panorama_levels_in,
|
|
688
719
|
MetricType metric)
|
|
689
|
-
: IndexHNSWFlat(
|
|
720
|
+
: IndexHNSWFlat(d_in, M, metric),
|
|
690
721
|
cum_sums(),
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
num_panorama_levels(num_panorama_levels) {
|
|
722
|
+
pano(d_in * sizeof(float), num_panorama_levels_in, 1),
|
|
723
|
+
num_panorama_levels(num_panorama_levels_in) {
|
|
694
724
|
// For now, we only support L2 distance.
|
|
695
725
|
// Supporting dot product and cosine distance is a trivial addition
|
|
696
726
|
// left for future work.
|
|
@@ -704,18 +734,8 @@ IndexHNSWFlatPanorama::IndexHNSWFlatPanorama(
|
|
|
704
734
|
|
|
705
735
|
void IndexHNSWFlatPanorama::add(idx_t n, const float* x) {
|
|
706
736
|
idx_t n0 = ntotal;
|
|
707
|
-
cum_sums.resize((ntotal + n) * (
|
|
708
|
-
|
|
709
|
-
for (size_t idx = 0; idx < n; idx++) {
|
|
710
|
-
const float* vector = x + idx * d;
|
|
711
|
-
compute_cum_sums(
|
|
712
|
-
vector,
|
|
713
|
-
&cum_sums[(n0 + idx) * (num_panorama_levels + 1)],
|
|
714
|
-
d,
|
|
715
|
-
num_panorama_levels,
|
|
716
|
-
panorama_level_width);
|
|
717
|
-
}
|
|
718
|
-
|
|
737
|
+
cum_sums.resize((ntotal + n) * (pano.n_levels + 1));
|
|
738
|
+
pano.compute_cumulative_sums(cum_sums.data(), n0, n, x);
|
|
719
739
|
IndexHNSWFlat::add(n, x);
|
|
720
740
|
}
|
|
721
741
|
|
|
@@ -725,13 +745,13 @@ void IndexHNSWFlatPanorama::reset() {
|
|
|
725
745
|
}
|
|
726
746
|
|
|
727
747
|
void IndexHNSWFlatPanorama::permute_entries(const idx_t* perm) {
|
|
728
|
-
std::vector<float> new_cum_sums(ntotal * (
|
|
748
|
+
std::vector<float> new_cum_sums(ntotal * (pano.n_levels + 1));
|
|
729
749
|
|
|
730
750
|
for (idx_t i = 0; i < ntotal; i++) {
|
|
731
751
|
idx_t src = perm[i];
|
|
732
|
-
memcpy(&new_cum_sums[i * (
|
|
733
|
-
&cum_sums[src * (
|
|
734
|
-
(
|
|
752
|
+
memcpy(&new_cum_sums[i * (pano.n_levels + 1)],
|
|
753
|
+
&cum_sums[src * (pano.n_levels + 1)],
|
|
754
|
+
(pano.n_levels + 1) * sizeof(float));
|
|
735
755
|
}
|
|
736
756
|
|
|
737
757
|
std::swap(cum_sums, new_cum_sums);
|
|
@@ -745,12 +765,12 @@ void IndexHNSWFlatPanorama::permute_entries(const idx_t* perm) {
|
|
|
745
765
|
IndexHNSWPQ::IndexHNSWPQ() = default;
|
|
746
766
|
|
|
747
767
|
IndexHNSWPQ::IndexHNSWPQ(
|
|
748
|
-
int
|
|
768
|
+
int d_in,
|
|
749
769
|
int pq_m,
|
|
750
770
|
int M,
|
|
751
771
|
int pq_nbits,
|
|
752
772
|
MetricType metric)
|
|
753
|
-
: IndexHNSW(new IndexPQ(
|
|
773
|
+
: IndexHNSW(new IndexPQ(d_in, pq_m, pq_nbits, metric), M) {
|
|
754
774
|
own_fields = true;
|
|
755
775
|
is_trained = false;
|
|
756
776
|
}
|
|
@@ -765,11 +785,11 @@ void IndexHNSWPQ::train(idx_t n, const float* x) {
|
|
|
765
785
|
**************************************************************/
|
|
766
786
|
|
|
767
787
|
IndexHNSWSQ::IndexHNSWSQ(
|
|
768
|
-
int
|
|
788
|
+
int d_in,
|
|
769
789
|
ScalarQuantizer::QuantizerType qtype,
|
|
770
790
|
int M,
|
|
771
791
|
MetricType metric)
|
|
772
|
-
: IndexHNSW(new IndexScalarQuantizer(
|
|
792
|
+
: IndexHNSW(new IndexScalarQuantizer(d_in, qtype, metric), M) {
|
|
773
793
|
is_trained = this->storage->is_trained;
|
|
774
794
|
own_fields = true;
|
|
775
795
|
}
|
|
@@ -882,8 +902,11 @@ void IndexHNSW2Level::search(
|
|
|
882
902
|
|
|
883
903
|
const IndexIVFPQ* index_ivfpq =
|
|
884
904
|
dynamic_cast<const IndexIVFPQ*>(storage);
|
|
905
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
906
|
+
index_ivfpq,
|
|
907
|
+
"IndexHNSW2Level mixed search requires IndexIVFPQ storage");
|
|
885
908
|
|
|
886
|
-
|
|
909
|
+
size_t nprobe = index_ivfpq->nprobe;
|
|
887
910
|
|
|
888
911
|
std::unique_ptr<idx_t[]> coarse_assign(new idx_t[n * nprobe]);
|
|
889
912
|
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
|
|
@@ -901,71 +924,88 @@ void IndexHNSW2Level::search(
|
|
|
901
924
|
labels,
|
|
902
925
|
false);
|
|
903
926
|
|
|
927
|
+
std::exception_ptr ex;
|
|
928
|
+
std::atomic<bool> interrupt{false};
|
|
904
929
|
#pragma omp parallel
|
|
905
930
|
{
|
|
906
|
-
|
|
907
|
-
std::unique_ptr<
|
|
908
|
-
|
|
909
|
-
|
|
931
|
+
// visited table (not hash set) for tri-state flags.
|
|
932
|
+
std::unique_ptr<VisitedTable> vt;
|
|
933
|
+
std::unique_ptr<DistanceComputer> dis;
|
|
910
934
|
constexpr int candidates_size = 1;
|
|
911
|
-
MinimaxHeap candidates
|
|
935
|
+
std::unique_ptr<MinimaxHeap> candidates;
|
|
936
|
+
try {
|
|
937
|
+
vt = std::make_unique<VisitedTable>(
|
|
938
|
+
ntotal, /*use_hashset=*/false);
|
|
939
|
+
dis.reset(storage_distance_computer(storage));
|
|
940
|
+
candidates = std::make_unique<MinimaxHeap>(candidates_size);
|
|
941
|
+
} catch (...) {
|
|
942
|
+
omp_capture_exception(ex, [&] { interrupt = true; });
|
|
943
|
+
}
|
|
912
944
|
|
|
913
945
|
#pragma omp for reduction(+ : n1, n2, ndis, nhops)
|
|
914
946
|
for (idx_t i = 0; i < n; i++) {
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
947
|
+
if (interrupt.load(std::memory_order_relaxed)) {
|
|
948
|
+
continue;
|
|
949
|
+
}
|
|
950
|
+
try {
|
|
951
|
+
idx_t* idxi = labels + i * k;
|
|
952
|
+
float* simi = distances + i * k;
|
|
953
|
+
dis->set_query(x + i * d);
|
|
954
|
+
|
|
955
|
+
// mark all inverted list elements as visited
|
|
956
|
+
for (size_t j = 0; j < nprobe; j++) {
|
|
957
|
+
idx_t key = coarse_assign[j + i * nprobe];
|
|
958
|
+
if (key < 0) {
|
|
959
|
+
break;
|
|
960
|
+
}
|
|
961
|
+
size_t list_length = index_ivfpq->get_list_size(key);
|
|
962
|
+
const idx_t* ids = index_ivfpq->invlists->get_ids(key);
|
|
928
963
|
|
|
929
|
-
|
|
930
|
-
|
|
964
|
+
for (size_t jj = 0; jj < list_length; jj++) {
|
|
965
|
+
vt->set(ids[jj]);
|
|
966
|
+
}
|
|
931
967
|
}
|
|
932
|
-
}
|
|
933
968
|
|
|
934
|
-
|
|
969
|
+
candidates->clear();
|
|
935
970
|
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
971
|
+
for (int j = 0; j < k; j++) {
|
|
972
|
+
if (idxi[j] < 0) {
|
|
973
|
+
break;
|
|
974
|
+
}
|
|
975
|
+
candidates->push(
|
|
976
|
+
static_cast<storage_idx_t>(idxi[j]), simi[j]);
|
|
939
977
|
}
|
|
940
|
-
candidates.push(idxi[j], simi[j]);
|
|
941
|
-
}
|
|
942
|
-
|
|
943
|
-
// reorder from sorted to heap
|
|
944
|
-
maxheap_heapify(k, simi, idxi, simi, idxi, k);
|
|
945
|
-
|
|
946
|
-
HNSWStats search_stats;
|
|
947
|
-
search_from_candidates_2(
|
|
948
|
-
hnsw,
|
|
949
|
-
*dis,
|
|
950
|
-
k,
|
|
951
|
-
idxi,
|
|
952
|
-
simi,
|
|
953
|
-
candidates,
|
|
954
|
-
vt,
|
|
955
|
-
search_stats,
|
|
956
|
-
0,
|
|
957
|
-
k);
|
|
958
|
-
n1 += search_stats.n1;
|
|
959
|
-
n2 += search_stats.n2;
|
|
960
|
-
ndis += search_stats.ndis;
|
|
961
|
-
nhops += search_stats.nhops;
|
|
962
978
|
|
|
963
|
-
|
|
964
|
-
|
|
979
|
+
// reorder from sorted to heap
|
|
980
|
+
maxheap_heapify(k, simi, idxi, simi, idxi, k);
|
|
965
981
|
|
|
966
|
-
|
|
982
|
+
HNSWStats search_stats;
|
|
983
|
+
search_from_candidates_2(
|
|
984
|
+
hnsw,
|
|
985
|
+
*dis,
|
|
986
|
+
k,
|
|
987
|
+
idxi,
|
|
988
|
+
simi,
|
|
989
|
+
*candidates,
|
|
990
|
+
*vt,
|
|
991
|
+
search_stats,
|
|
992
|
+
0,
|
|
993
|
+
k);
|
|
994
|
+
n1 += search_stats.n1;
|
|
995
|
+
n2 += search_stats.n2;
|
|
996
|
+
ndis += search_stats.ndis;
|
|
997
|
+
nhops += search_stats.nhops;
|
|
998
|
+
|
|
999
|
+
vt->advance();
|
|
1000
|
+
vt->advance();
|
|
1001
|
+
|
|
1002
|
+
maxheap_reorder(k, simi, idxi);
|
|
1003
|
+
} catch (...) {
|
|
1004
|
+
omp_capture_exception(ex, [&] { interrupt = true; });
|
|
1005
|
+
}
|
|
967
1006
|
}
|
|
968
1007
|
}
|
|
1008
|
+
omp_rethrow_if_exception(ex);
|
|
969
1009
|
|
|
970
1010
|
hnsw_stats.combine({n1, n2, ndis, nhops});
|
|
971
1011
|
}
|
|
@@ -1002,11 +1042,11 @@ IndexHNSWCagra::IndexHNSWCagra() {
|
|
|
1002
1042
|
}
|
|
1003
1043
|
|
|
1004
1044
|
IndexHNSWCagra::IndexHNSWCagra(
|
|
1005
|
-
int
|
|
1045
|
+
int d_in,
|
|
1006
1046
|
int M,
|
|
1007
1047
|
MetricType metric,
|
|
1008
1048
|
NumericType numeric_type)
|
|
1009
|
-
: IndexHNSW(
|
|
1049
|
+
: IndexHNSW(d_in, M, metric) {
|
|
1010
1050
|
FAISS_THROW_IF_NOT_MSG(
|
|
1011
1051
|
((metric == METRIC_L2) || (metric == METRIC_INNER_PRODUCT)),
|
|
1012
1052
|
"unsupported metric type for IndexHNSWCagra");
|
|
@@ -1050,10 +1090,18 @@ void IndexHNSWCagra::search(
|
|
|
1050
1090
|
if (!base_level_only) {
|
|
1051
1091
|
IndexHNSW::search(n, x, k, distances, labels, params);
|
|
1052
1092
|
} else {
|
|
1093
|
+
if (ntotal == 0) {
|
|
1094
|
+
std::fill(
|
|
1095
|
+
distances,
|
|
1096
|
+
distances + n * k,
|
|
1097
|
+
std::numeric_limits<float>::max());
|
|
1098
|
+
std::fill(labels, labels + n * k, -1);
|
|
1099
|
+
return;
|
|
1100
|
+
}
|
|
1053
1101
|
std::vector<storage_idx_t> nearest(n);
|
|
1054
1102
|
std::vector<float> nearest_d(n);
|
|
1055
1103
|
|
|
1056
|
-
#pragma omp for
|
|
1104
|
+
#pragma omp parallel for
|
|
1057
1105
|
for (idx_t i = 0; i < n; i++) {
|
|
1058
1106
|
std::unique_ptr<DistanceComputer> dis(
|
|
1059
1107
|
storage_distance_computer(this->storage));
|
|
@@ -1069,7 +1117,7 @@ void IndexHNSWCagra::search(
|
|
|
1069
1117
|
auto idx = distrib(gen);
|
|
1070
1118
|
auto distance = (*dis)(idx);
|
|
1071
1119
|
if (distance < nearest_d[i]) {
|
|
1072
|
-
nearest[i] = idx;
|
|
1120
|
+
nearest[i] = static_cast<storage_idx_t>(idx);
|
|
1073
1121
|
nearest_d[i] = distance;
|
|
1074
1122
|
}
|
|
1075
1123
|
}
|
|
@@ -1091,6 +1139,70 @@ void IndexHNSWCagra::search(
|
|
|
1091
1139
|
}
|
|
1092
1140
|
}
|
|
1093
1141
|
|
|
1142
|
+
void IndexHNSWCagra::range_search(
|
|
1143
|
+
idx_t n,
|
|
1144
|
+
const float* x,
|
|
1145
|
+
float radius,
|
|
1146
|
+
RangeSearchResult* result,
|
|
1147
|
+
const SearchParameters* params) const {
|
|
1148
|
+
if (!base_level_only) {
|
|
1149
|
+
IndexHNSW::range_search(n, x, radius, result, params);
|
|
1150
|
+
return;
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
const HNSW& hnsw = this->hnsw;
|
|
1154
|
+
size_t n1 = 0, n2 = 0, ndis = 0, nhops = 0;
|
|
1155
|
+
float threshold = is_similarity_metric(metric_type) ? -radius : radius;
|
|
1156
|
+
RangeSearchPartialResult pres(result);
|
|
1157
|
+
|
|
1158
|
+
for (idx_t i = 0; i < n; i++) {
|
|
1159
|
+
std::unique_ptr<DistanceComputer> dis(
|
|
1160
|
+
storage_distance_computer(storage));
|
|
1161
|
+
dis->set_query(x + i * d);
|
|
1162
|
+
|
|
1163
|
+
storage_idx_t nearest = -1;
|
|
1164
|
+
float nearest_d = std::numeric_limits<float>::max();
|
|
1165
|
+
|
|
1166
|
+
std::random_device rd;
|
|
1167
|
+
std::mt19937 gen(rd());
|
|
1168
|
+
std::uniform_int_distribution<idx_t> distrib(0, ntotal - 1);
|
|
1169
|
+
|
|
1170
|
+
for (idx_t j = 0; j < num_base_level_search_entrypoints; j++) {
|
|
1171
|
+
auto idx = distrib(gen);
|
|
1172
|
+
auto distance = (*dis)(idx);
|
|
1173
|
+
if (distance < nearest_d) {
|
|
1174
|
+
nearest = idx;
|
|
1175
|
+
nearest_d = distance;
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
1179
|
+
nearest >= 0, "Could not find a valid entrypoint.");
|
|
1180
|
+
|
|
1181
|
+
RangeQueryResult& qres = pres.new_result(i);
|
|
1182
|
+
RangeResultHandler<HNSW::C> res(&qres, threshold);
|
|
1183
|
+
VisitedTable vt(ntotal, hnsw.use_visited_hashset);
|
|
1184
|
+
HNSWStats stats;
|
|
1185
|
+
hnsw.search_level_0(
|
|
1186
|
+
*dis, res, 1, &nearest, &nearest_d, 1, stats, vt, params);
|
|
1187
|
+
n1 += stats.n1;
|
|
1188
|
+
n2 += stats.n2;
|
|
1189
|
+
ndis += stats.ndis;
|
|
1190
|
+
nhops += stats.nhops;
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1193
|
+
pres.set_lims();
|
|
1194
|
+
result->do_allocation();
|
|
1195
|
+
pres.copy_result();
|
|
1196
|
+
|
|
1197
|
+
hnsw_stats.combine({n1, n2, ndis, nhops});
|
|
1198
|
+
|
|
1199
|
+
if (is_similarity_metric(metric_type)) {
|
|
1200
|
+
for (size_t i = 0; i < result->lims[result->nq]; i++) {
|
|
1201
|
+
result->distances[i] = -result->distances[i];
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1094
1206
|
faiss::NumericType IndexHNSWCagra::get_numeric_type() const {
|
|
1095
1207
|
return numeric_type_;
|
|
1096
1208
|
}
|