faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <faiss/impl/hnsw/LockVector.h>
|
|
9
|
+
|
|
10
|
+
#include <cstdlib>
|
|
11
|
+
|
|
12
|
+
#include <utility>
|
|
13
|
+
|
|
14
|
+
namespace faiss {
|
|
15
|
+
|
|
16
|
+
LockVector::LockVector(LockVector&& other) noexcept
|
|
17
|
+
: data_(other.data_), size_(other.size_), capacity_(other.capacity_) {
|
|
18
|
+
other.data_ = nullptr;
|
|
19
|
+
other.size_ = 0;
|
|
20
|
+
other.capacity_ = 0;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
void LockVector::prepare(size_t new_size) {
|
|
24
|
+
if (new_size <= size_) {
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
if (new_size > capacity_) {
|
|
28
|
+
// Ensure geometric capacity growth.
|
|
29
|
+
size_t new_cap = std::max(new_size, capacity_ * 2);
|
|
30
|
+
// Just destroy old and init fresh; omp_lock_t is not copyable.
|
|
31
|
+
clear();
|
|
32
|
+
data_ = static_cast<omp_lock_t*>(malloc(new_cap * sizeof(omp_lock_t)));
|
|
33
|
+
FAISS_THROW_IF_NOT(data_ != nullptr);
|
|
34
|
+
capacity_ = new_cap;
|
|
35
|
+
}
|
|
36
|
+
for (size_t i = size_; i < new_size; i++) {
|
|
37
|
+
omp_init_lock(&data_[i]);
|
|
38
|
+
}
|
|
39
|
+
size_ = new_size;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
void LockVector::clear() {
|
|
43
|
+
if (data_) {
|
|
44
|
+
for (size_t i = 0; i < size_; i++) {
|
|
45
|
+
omp_destroy_lock(&data_[i]);
|
|
46
|
+
}
|
|
47
|
+
free(data_);
|
|
48
|
+
data_ = nullptr;
|
|
49
|
+
}
|
|
50
|
+
size_ = 0;
|
|
51
|
+
capacity_ = 0;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
} // namespace faiss
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <omp.h>
|
|
11
|
+
|
|
12
|
+
#include <faiss/impl/FaissAssert.h>
|
|
13
|
+
|
|
14
|
+
namespace faiss {
|
|
15
|
+
|
|
16
|
+
/// Contiguous, growable array of locks with geometric growth.
|
|
17
|
+
class LockVector {
|
|
18
|
+
public:
|
|
19
|
+
LockVector() = default;
|
|
20
|
+
explicit LockVector(size_t n) {
|
|
21
|
+
prepare(n);
|
|
22
|
+
}
|
|
23
|
+
// Copy ctor for clone(), initialized as empty.
|
|
24
|
+
LockVector(const LockVector&) : LockVector() {}
|
|
25
|
+
LockVector(LockVector&& other) noexcept;
|
|
26
|
+
|
|
27
|
+
~LockVector() {
|
|
28
|
+
clear();
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
LockVector& operator=(const LockVector&) = delete;
|
|
32
|
+
LockVector& operator=(LockVector&& other) = delete;
|
|
33
|
+
|
|
34
|
+
size_t size() const {
|
|
35
|
+
return size_;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Ensure size is at least 'new_size'. No locks may be held.
|
|
39
|
+
void prepare(size_t new_size);
|
|
40
|
+
// Release all locks and free memory. No locks may be held.
|
|
41
|
+
void clear();
|
|
42
|
+
|
|
43
|
+
void lock(size_t i) {
|
|
44
|
+
FAISS_CHECK_RANGE_DEBUG(i, 0, size_);
|
|
45
|
+
omp_set_lock(&data_[i]);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
void unlock(size_t i) {
|
|
49
|
+
FAISS_CHECK_RANGE_DEBUG(i, 0, size_);
|
|
50
|
+
omp_unset_lock(&data_[i]);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
bool try_lock(size_t i) {
|
|
54
|
+
FAISS_CHECK_RANGE_DEBUG(i, 0, size_);
|
|
55
|
+
return omp_test_lock(&data_[i]);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
private:
|
|
59
|
+
omp_lock_t* data_ = nullptr;
|
|
60
|
+
size_t size_ = 0;
|
|
61
|
+
size_t capacity_ = 0;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
} // namespace faiss
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <cmath>
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/hnsw/MinimaxHeap.h>
|
|
11
|
+
|
|
12
|
+
#include <cassert>
|
|
13
|
+
|
|
14
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
void MinimaxHeap::push(storage_idx_t i, float v) {
|
|
19
|
+
// Treat NaN distances as infinitely far away so heap ordering is preserved.
|
|
20
|
+
if (std::isnan(v)) {
|
|
21
|
+
v = HC::neutral();
|
|
22
|
+
}
|
|
23
|
+
if (k == n) {
|
|
24
|
+
if (v >= dis[0]) {
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
if (ids[0] != -1) {
|
|
28
|
+
--nvalid;
|
|
29
|
+
}
|
|
30
|
+
faiss::heap_pop<HC>(k--, dis.data(), ids.data());
|
|
31
|
+
}
|
|
32
|
+
faiss::heap_push<HC>(++k, dis.data(), ids.data(), v, i);
|
|
33
|
+
++nvalid;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Scalar (NONE) specialization of pop_min_tpl
|
|
37
|
+
template <>
|
|
38
|
+
int MinimaxHeap::pop_min_tpl<SIMDLevel::NONE>(float* vmin_out) {
|
|
39
|
+
assert(k > 0);
|
|
40
|
+
// returns min. This is an O(n) operation
|
|
41
|
+
int i = k - 1;
|
|
42
|
+
while (i >= 0) {
|
|
43
|
+
if (ids[i] != -1) {
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
i--;
|
|
47
|
+
}
|
|
48
|
+
if (i == -1) {
|
|
49
|
+
return -1;
|
|
50
|
+
}
|
|
51
|
+
int imin = i;
|
|
52
|
+
float vmin = dis[i];
|
|
53
|
+
i--;
|
|
54
|
+
while (i >= 0) {
|
|
55
|
+
if (ids[i] != -1 && dis[i] < vmin) {
|
|
56
|
+
vmin = dis[i];
|
|
57
|
+
imin = i;
|
|
58
|
+
}
|
|
59
|
+
i--;
|
|
60
|
+
}
|
|
61
|
+
if (vmin_out) {
|
|
62
|
+
*vmin_out = vmin;
|
|
63
|
+
}
|
|
64
|
+
int ret = ids[imin];
|
|
65
|
+
ids[imin] = -1;
|
|
66
|
+
--nvalid;
|
|
67
|
+
|
|
68
|
+
return ret;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Runtime-dispatched pop_min (NONE + AVX2 + AVX512 only)
|
|
72
|
+
constexpr int MINIMAX_HEAP_SIMD_LEVELS = (1 << int(SIMDLevel::NONE)) |
|
|
73
|
+
(1 << int(SIMDLevel::AVX2)) | (1 << int(SIMDLevel::AVX512));
|
|
74
|
+
|
|
75
|
+
int MinimaxHeap::pop_min(float* vmin_out) {
|
|
76
|
+
return with_selected_simd_levels<MINIMAX_HEAP_SIMD_LEVELS>(
|
|
77
|
+
[&]<SIMDLevel SL>() { return pop_min_tpl<SL>(vmin_out); });
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
int MinimaxHeap::count_below(float thresh) {
|
|
81
|
+
int n_below = 0;
|
|
82
|
+
for (int i = 0; i < k; i++) {
|
|
83
|
+
if (dis[i] < thresh) {
|
|
84
|
+
n_below++;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return n_below;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
} // namespace faiss
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <cstdint>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
#include <faiss/utils/Heap.h>
|
|
14
|
+
#include <faiss/utils/simd_levels.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
/** Heap structure that allows fast access and updates.
|
|
19
|
+
*
|
|
20
|
+
* Supports both max-heap operations (via the underlying CMax heap)
|
|
21
|
+
* and efficient min extraction via linear scan (with optional SIMD
|
|
22
|
+
* acceleration).
|
|
23
|
+
*/
|
|
24
|
+
struct MinimaxHeap {
|
|
25
|
+
using storage_idx_t = int32_t;
|
|
26
|
+
|
|
27
|
+
int n;
|
|
28
|
+
int k;
|
|
29
|
+
int nvalid;
|
|
30
|
+
|
|
31
|
+
std::vector<storage_idx_t> ids;
|
|
32
|
+
std::vector<float> dis;
|
|
33
|
+
using HC = faiss::CMax<float, storage_idx_t>;
|
|
34
|
+
|
|
35
|
+
explicit MinimaxHeap(int n_in)
|
|
36
|
+
: n(n_in), k(0), nvalid(0), ids(n_in), dis(n_in) {}
|
|
37
|
+
|
|
38
|
+
void push(storage_idx_t i, float v);
|
|
39
|
+
|
|
40
|
+
float max() const {
|
|
41
|
+
return dis[0];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
int size() const {
|
|
45
|
+
return nvalid;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
void clear() {
|
|
49
|
+
nvalid = k = 0;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/// SIMD-templated pop_min implementation.
|
|
53
|
+
/// Specializations exist for NONE, AVX2, and AVX512.
|
|
54
|
+
template <SIMDLevel SL>
|
|
55
|
+
int pop_min_tpl(float* vmin_out = nullptr);
|
|
56
|
+
|
|
57
|
+
/// Runtime-dispatched pop_min (calls pop_min_tpl with best available
|
|
58
|
+
/// SIMD level).
|
|
59
|
+
int pop_min(float* vmin_out = nullptr);
|
|
60
|
+
|
|
61
|
+
int count_below(float thresh);
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
} // namespace faiss
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_AVX2
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/hnsw/MinimaxHeap.h>
|
|
11
|
+
|
|
12
|
+
#include <immintrin.h>
|
|
13
|
+
#include <cassert>
|
|
14
|
+
#include <limits>
|
|
15
|
+
#include <type_traits>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
template <>
|
|
20
|
+
int MinimaxHeap::pop_min_tpl<SIMDLevel::AVX2>(float* vmin_out) {
|
|
21
|
+
assert(k > 0);
|
|
22
|
+
static_assert(
|
|
23
|
+
std::is_same<storage_idx_t, int32_t>::value,
|
|
24
|
+
"This code expects storage_idx_t to be int32_t");
|
|
25
|
+
|
|
26
|
+
int32_t min_idx = -1;
|
|
27
|
+
float min_dis = std::numeric_limits<float>::infinity();
|
|
28
|
+
|
|
29
|
+
size_t iii = 0;
|
|
30
|
+
|
|
31
|
+
__m256i min_indices = _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, -1);
|
|
32
|
+
__m256 min_distances =
|
|
33
|
+
_mm256_set1_ps(std::numeric_limits<float>::infinity());
|
|
34
|
+
__m256i current_indices = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
|
35
|
+
__m256i offset = _mm256_set1_epi32(8);
|
|
36
|
+
|
|
37
|
+
// The baseline version is available in the NONE specialization.
|
|
38
|
+
|
|
39
|
+
// The following loop tracks the rightmost index with the min distance.
|
|
40
|
+
// -1 index values are ignored.
|
|
41
|
+
const size_t k8 = (k / 8) * 8;
|
|
42
|
+
for (; iii < k8; iii += 8) {
|
|
43
|
+
__m256i indices =
|
|
44
|
+
_mm256_loadu_si256((const __m256i*)(ids.data() + iii));
|
|
45
|
+
__m256 distances = _mm256_loadu_ps(dis.data() + iii);
|
|
46
|
+
|
|
47
|
+
// This mask filters out -1 values among indices.
|
|
48
|
+
__m256i m1mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), indices);
|
|
49
|
+
|
|
50
|
+
__m256i dmask = _mm256_castps_si256(
|
|
51
|
+
_mm256_cmp_ps(min_distances, distances, _CMP_LT_OS));
|
|
52
|
+
__m256 finalmask = _mm256_castsi256_ps(_mm256_or_si256(m1mask, dmask));
|
|
53
|
+
|
|
54
|
+
const __m256i min_indices_new = _mm256_castps_si256(_mm256_blendv_ps(
|
|
55
|
+
_mm256_castsi256_ps(current_indices),
|
|
56
|
+
_mm256_castsi256_ps(min_indices),
|
|
57
|
+
finalmask));
|
|
58
|
+
|
|
59
|
+
const __m256 min_distances_new =
|
|
60
|
+
_mm256_blendv_ps(distances, min_distances, finalmask);
|
|
61
|
+
|
|
62
|
+
min_indices = min_indices_new;
|
|
63
|
+
min_distances = min_distances_new;
|
|
64
|
+
|
|
65
|
+
current_indices = _mm256_add_epi32(current_indices, offset);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Vectorizing is doable, but is not practical
|
|
69
|
+
int32_t vidx8[8];
|
|
70
|
+
float vdis8[8];
|
|
71
|
+
_mm256_storeu_ps(vdis8, min_distances);
|
|
72
|
+
_mm256_storeu_si256((__m256i*)vidx8, min_indices);
|
|
73
|
+
|
|
74
|
+
for (size_t j = 0; j < 8; j++) {
|
|
75
|
+
if (min_dis > vdis8[j] || (min_dis == vdis8[j] && min_idx < vidx8[j])) {
|
|
76
|
+
min_idx = vidx8[j];
|
|
77
|
+
min_dis = vdis8[j];
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// process last values. Vectorizing is doable, but is not practical
|
|
82
|
+
for (; iii < static_cast<size_t>(k); iii++) {
|
|
83
|
+
if (ids[iii] != -1 && dis[iii] <= min_dis) {
|
|
84
|
+
min_dis = dis[iii];
|
|
85
|
+
min_idx = iii;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (min_idx == -1) {
|
|
90
|
+
return -1;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (vmin_out) {
|
|
94
|
+
*vmin_out = min_dis;
|
|
95
|
+
}
|
|
96
|
+
int ret = ids[min_idx];
|
|
97
|
+
ids[min_idx] = -1;
|
|
98
|
+
--nvalid;
|
|
99
|
+
return ret;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
} // namespace faiss
|
|
103
|
+
|
|
104
|
+
#endif // COMPILE_SIMD_AVX2
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_AVX512
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/hnsw/MinimaxHeap.h>
|
|
11
|
+
|
|
12
|
+
#include <immintrin.h>
|
|
13
|
+
#include <cassert>
|
|
14
|
+
#include <limits>
|
|
15
|
+
#include <type_traits>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
template <>
|
|
20
|
+
int MinimaxHeap::pop_min_tpl<SIMDLevel::AVX512>(float* vmin_out) {
|
|
21
|
+
assert(k > 0);
|
|
22
|
+
static_assert(
|
|
23
|
+
std::is_same<storage_idx_t, int32_t>::value,
|
|
24
|
+
"This code expects storage_idx_t to be int32_t");
|
|
25
|
+
|
|
26
|
+
int32_t min_idx = -1;
|
|
27
|
+
float min_dis = std::numeric_limits<float>::infinity();
|
|
28
|
+
|
|
29
|
+
__m512i min_indices = _mm512_set1_epi32(-1);
|
|
30
|
+
__m512 min_distances =
|
|
31
|
+
_mm512_set1_ps(std::numeric_limits<float>::infinity());
|
|
32
|
+
__m512i current_indices = _mm512_setr_epi32(
|
|
33
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
|
34
|
+
__m512i offset = _mm512_set1_epi32(16);
|
|
35
|
+
|
|
36
|
+
// The following loop tracks the rightmost index with the min distance.
|
|
37
|
+
// -1 index values are ignored.
|
|
38
|
+
const size_t k16 = (k / 16) * 16;
|
|
39
|
+
for (size_t iii = 0; iii < k16; iii += 16) {
|
|
40
|
+
__m512i indices =
|
|
41
|
+
_mm512_loadu_si512((const __m512i*)(ids.data() + iii));
|
|
42
|
+
__m512 distances = _mm512_loadu_ps(dis.data() + iii);
|
|
43
|
+
|
|
44
|
+
// This mask filters out -1 values among indices.
|
|
45
|
+
__mmask16 m1mask =
|
|
46
|
+
_mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
|
|
47
|
+
|
|
48
|
+
__mmask16 dmask =
|
|
49
|
+
_mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
|
|
50
|
+
__mmask16 finalmask = m1mask | dmask;
|
|
51
|
+
|
|
52
|
+
const __m512i min_indices_new = _mm512_mask_blend_epi32(
|
|
53
|
+
finalmask, current_indices, min_indices);
|
|
54
|
+
const __m512 min_distances_new =
|
|
55
|
+
_mm512_mask_blend_ps(finalmask, distances, min_distances);
|
|
56
|
+
|
|
57
|
+
min_indices = min_indices_new;
|
|
58
|
+
min_distances = min_distances_new;
|
|
59
|
+
|
|
60
|
+
current_indices = _mm512_add_epi32(current_indices, offset);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// leftovers
|
|
64
|
+
if (k16 != static_cast<size_t>(k)) {
|
|
65
|
+
const __mmask16 kmask = (1 << (k - k16)) - 1;
|
|
66
|
+
|
|
67
|
+
__m512i indices = _mm512_mask_loadu_epi32(
|
|
68
|
+
_mm512_set1_epi32(-1), kmask, ids.data() + k16);
|
|
69
|
+
__m512 distances = _mm512_maskz_loadu_ps(kmask, dis.data() + k16);
|
|
70
|
+
|
|
71
|
+
// This mask filters out -1 values among indices.
|
|
72
|
+
__mmask16 m1mask =
|
|
73
|
+
_mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
|
|
74
|
+
|
|
75
|
+
__mmask16 dmask =
|
|
76
|
+
_mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
|
|
77
|
+
__mmask16 finalmask = m1mask | dmask;
|
|
78
|
+
|
|
79
|
+
const __m512i min_indices_new = _mm512_mask_blend_epi32(
|
|
80
|
+
finalmask, current_indices, min_indices);
|
|
81
|
+
const __m512 min_distances_new =
|
|
82
|
+
_mm512_mask_blend_ps(finalmask, distances, min_distances);
|
|
83
|
+
|
|
84
|
+
min_indices = min_indices_new;
|
|
85
|
+
min_distances = min_distances_new;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// grab min distance
|
|
89
|
+
min_dis = _mm512_reduce_min_ps(min_distances);
|
|
90
|
+
// blend
|
|
91
|
+
__mmask16 mindmask =
|
|
92
|
+
_mm512_cmpeq_ps_mask(min_distances, _mm512_set1_ps(min_dis));
|
|
93
|
+
// pick the max one
|
|
94
|
+
min_idx = _mm512_mask_reduce_max_epi32(mindmask, min_indices);
|
|
95
|
+
|
|
96
|
+
if (min_idx == -1) {
|
|
97
|
+
return -1;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (vmin_out) {
|
|
101
|
+
*vmin_out = min_dis;
|
|
102
|
+
}
|
|
103
|
+
int ret = ids[min_idx];
|
|
104
|
+
ids[min_idx] = -1;
|
|
105
|
+
--nvalid;
|
|
106
|
+
return ret;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
} // namespace faiss
|
|
110
|
+
|
|
111
|
+
#endif // COMPILE_SIMD_AVX512
|