faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -9,24 +9,56 @@
|
|
|
9
9
|
|
|
10
10
|
#include <immintrin.h>
|
|
11
11
|
|
|
12
|
-
#
|
|
12
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
13
|
+
#include <faiss/impl/ResultHandler.h>
|
|
14
|
+
#include <faiss/utils/distances_fused/distances_fused.h>
|
|
15
|
+
#include <faiss/utils/simd_impl/exhaustive_L2sqr_blas_cmax.h>
|
|
16
|
+
|
|
17
|
+
#ifndef FINTEGER
|
|
18
|
+
#define FINTEGER long
|
|
19
|
+
#endif
|
|
20
|
+
|
|
21
|
+
extern "C" {
|
|
22
|
+
|
|
23
|
+
int sgemm_(
|
|
24
|
+
const char* transa,
|
|
25
|
+
const char* transb,
|
|
26
|
+
FINTEGER* m,
|
|
27
|
+
FINTEGER* n,
|
|
28
|
+
FINTEGER* k,
|
|
29
|
+
const float* alpha,
|
|
30
|
+
const float* a,
|
|
31
|
+
FINTEGER* lda,
|
|
32
|
+
const float* b,
|
|
33
|
+
FINTEGER* ldb,
|
|
34
|
+
float* beta,
|
|
35
|
+
float* c,
|
|
36
|
+
FINTEGER* ldc);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
#define THE_SIMD_LEVEL SIMDLevel::AVX2
|
|
13
40
|
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
14
41
|
#include <faiss/utils/simd_impl/distances_autovec-inl.h>
|
|
15
42
|
|
|
43
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
44
|
+
#include <faiss/utils/simd_impl/distances_simdlib256.h>
|
|
45
|
+
|
|
16
46
|
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
17
47
|
#include <faiss/utils/simd_impl/distances_sse-inl.h>
|
|
18
48
|
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
19
49
|
#include <faiss/utils/transpose/transpose-avx2-inl.h>
|
|
50
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
51
|
+
#include <faiss/utils/simd_impl/IVFFlatScanner-inl.h>
|
|
20
52
|
|
|
21
53
|
namespace faiss {
|
|
22
54
|
|
|
23
55
|
template <>
|
|
24
56
|
void fvec_madd<SIMDLevel::AVX2>(
|
|
25
57
|
const size_t n,
|
|
26
|
-
const float*
|
|
58
|
+
const float* a,
|
|
27
59
|
const float bf,
|
|
28
|
-
const float*
|
|
29
|
-
float*
|
|
60
|
+
const float* b,
|
|
61
|
+
float* c) {
|
|
30
62
|
//
|
|
31
63
|
const size_t n8 = n / 8;
|
|
32
64
|
const size_t n_for_masking = n % 8;
|
|
@@ -1182,4 +1214,218 @@ int fvec_madd_and_argmin<SIMDLevel::AVX2>(
|
|
|
1182
1214
|
return fvec_madd_and_argmin_sse(n, a, bf, b, c);
|
|
1183
1215
|
}
|
|
1184
1216
|
|
|
1217
|
+
template <>
|
|
1218
|
+
void exhaustive_L2sqr_blas_cmax<SIMDLevel::AVX2>(
|
|
1219
|
+
const float* x,
|
|
1220
|
+
const float* y,
|
|
1221
|
+
size_t d,
|
|
1222
|
+
size_t nx,
|
|
1223
|
+
size_t ny,
|
|
1224
|
+
Top1BlockResultHandler<CMax<float, int64_t>>& res,
|
|
1225
|
+
const float* y_norms) {
|
|
1226
|
+
// BLAS does not like empty matrices
|
|
1227
|
+
if (nx == 0 || ny == 0) {
|
|
1228
|
+
return;
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
/* block sizes */
|
|
1232
|
+
const size_t bs_x = distance_compute_blas_query_bs;
|
|
1233
|
+
const size_t bs_y = distance_compute_blas_database_bs;
|
|
1234
|
+
// const size_t bs_x = 16, bs_y = 16;
|
|
1235
|
+
std::unique_ptr<float[]> ip_block(new float[bs_x * bs_y]);
|
|
1236
|
+
std::unique_ptr<float[]> x_norms(new float[nx]);
|
|
1237
|
+
std::unique_ptr<float[]> del2;
|
|
1238
|
+
|
|
1239
|
+
fvec_norms_L2sqr(x_norms.get(), x, d, nx);
|
|
1240
|
+
|
|
1241
|
+
if (!y_norms) {
|
|
1242
|
+
float* y_norms2 = new float[ny];
|
|
1243
|
+
del2.reset(y_norms2);
|
|
1244
|
+
fvec_norms_L2sqr(y_norms2, y, d, ny);
|
|
1245
|
+
y_norms = y_norms2;
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
|
|
1249
|
+
size_t i1 = i0 + bs_x;
|
|
1250
|
+
if (i1 > nx) {
|
|
1251
|
+
i1 = nx;
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
res.begin_multiple(i0, i1);
|
|
1255
|
+
|
|
1256
|
+
for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
|
|
1257
|
+
size_t j1 = j0 + bs_y;
|
|
1258
|
+
if (j1 > ny) {
|
|
1259
|
+
j1 = ny;
|
|
1260
|
+
}
|
|
1261
|
+
/* compute the actual dot products */
|
|
1262
|
+
{
|
|
1263
|
+
float one = 1, zero = 0;
|
|
1264
|
+
FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d;
|
|
1265
|
+
sgemm_("Transpose",
|
|
1266
|
+
"Not transpose",
|
|
1267
|
+
&nyi,
|
|
1268
|
+
&nxi,
|
|
1269
|
+
&di,
|
|
1270
|
+
&one,
|
|
1271
|
+
y + j0 * d,
|
|
1272
|
+
&di,
|
|
1273
|
+
x + i0 * d,
|
|
1274
|
+
&di,
|
|
1275
|
+
&zero,
|
|
1276
|
+
ip_block.get(),
|
|
1277
|
+
&nyi);
|
|
1278
|
+
}
|
|
1279
|
+
#pragma omp parallel for schedule(static) if ((i1 - i0) >= 16)
|
|
1280
|
+
for (int64_t i = static_cast<int64_t>(i0);
|
|
1281
|
+
i < static_cast<int64_t>(i1);
|
|
1282
|
+
i++) {
|
|
1283
|
+
float* ip_line = ip_block.get() + (i - i0) * (j1 - j0);
|
|
1284
|
+
|
|
1285
|
+
_mm_prefetch((const char*)ip_line, _MM_HINT_NTA);
|
|
1286
|
+
_mm_prefetch((const char*)(ip_line + 16), _MM_HINT_NTA);
|
|
1287
|
+
|
|
1288
|
+
// constant
|
|
1289
|
+
const __m256 mul_minus2 = _mm256_set1_ps(-2);
|
|
1290
|
+
|
|
1291
|
+
// Track 8 min distances + 8 min indices.
|
|
1292
|
+
// All the distances tracked do not take x_norms[i]
|
|
1293
|
+
// into account in order to get rid of extra
|
|
1294
|
+
// _mm256_add_ps(x_norms[i], ...) instructions
|
|
1295
|
+
// is distance computations.
|
|
1296
|
+
__m256 min_distances =
|
|
1297
|
+
_mm256_set1_ps(res.dis_tab[i] - x_norms[i]);
|
|
1298
|
+
|
|
1299
|
+
// these indices are local and are relative to j0.
|
|
1300
|
+
// so, value 0 means j0.
|
|
1301
|
+
__m256i min_indices = _mm256_set1_epi32(0);
|
|
1302
|
+
|
|
1303
|
+
__m256i current_indices =
|
|
1304
|
+
_mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
|
1305
|
+
const __m256i indices_delta = _mm256_set1_epi32(8);
|
|
1306
|
+
|
|
1307
|
+
// current j index
|
|
1308
|
+
size_t idx_j = 0;
|
|
1309
|
+
size_t count = j1 - j0;
|
|
1310
|
+
|
|
1311
|
+
// process 16 elements per loop
|
|
1312
|
+
for (; idx_j < (count / 16) * 16; idx_j += 16, ip_line += 16) {
|
|
1313
|
+
_mm_prefetch((const char*)(ip_line + 32), _MM_HINT_NTA);
|
|
1314
|
+
_mm_prefetch((const char*)(ip_line + 48), _MM_HINT_NTA);
|
|
1315
|
+
|
|
1316
|
+
// load values for norms
|
|
1317
|
+
const __m256 y_norm_0 =
|
|
1318
|
+
_mm256_loadu_ps(y_norms + idx_j + j0 + 0);
|
|
1319
|
+
const __m256 y_norm_1 =
|
|
1320
|
+
_mm256_loadu_ps(y_norms + idx_j + j0 + 8);
|
|
1321
|
+
|
|
1322
|
+
// load values for dot products
|
|
1323
|
+
const __m256 ip_0 = _mm256_loadu_ps(ip_line + 0);
|
|
1324
|
+
const __m256 ip_1 = _mm256_loadu_ps(ip_line + 8);
|
|
1325
|
+
|
|
1326
|
+
// compute dis = y_norm[j] - 2 * dot(x_norm[i], y_norm[j]).
|
|
1327
|
+
// x_norm[i] was dropped off because it is a constant for a
|
|
1328
|
+
// given i. We'll deal with it later.
|
|
1329
|
+
__m256 distances_0 =
|
|
1330
|
+
_mm256_fmadd_ps(ip_0, mul_minus2, y_norm_0);
|
|
1331
|
+
__m256 distances_1 =
|
|
1332
|
+
_mm256_fmadd_ps(ip_1, mul_minus2, y_norm_1);
|
|
1333
|
+
|
|
1334
|
+
// compare the new distances to the min distances
|
|
1335
|
+
// for each of the first group of 8 AVX2 components.
|
|
1336
|
+
const __m256 comparison_0 = _mm256_cmp_ps(
|
|
1337
|
+
min_distances, distances_0, _CMP_LE_OS);
|
|
1338
|
+
|
|
1339
|
+
// update min distances and indices with closest vectors if
|
|
1340
|
+
// needed.
|
|
1341
|
+
min_distances = _mm256_blendv_ps(
|
|
1342
|
+
distances_0, min_distances, comparison_0);
|
|
1343
|
+
min_indices = _mm256_castps_si256(_mm256_blendv_ps(
|
|
1344
|
+
_mm256_castsi256_ps(current_indices),
|
|
1345
|
+
_mm256_castsi256_ps(min_indices),
|
|
1346
|
+
comparison_0));
|
|
1347
|
+
current_indices =
|
|
1348
|
+
_mm256_add_epi32(current_indices, indices_delta);
|
|
1349
|
+
|
|
1350
|
+
// compare the new distances to the min distances
|
|
1351
|
+
// for each of the second group of 8 AVX2 components.
|
|
1352
|
+
const __m256 comparison_1 = _mm256_cmp_ps(
|
|
1353
|
+
min_distances, distances_1, _CMP_LE_OS);
|
|
1354
|
+
|
|
1355
|
+
// update min distances and indices with closest vectors if
|
|
1356
|
+
// needed.
|
|
1357
|
+
min_distances = _mm256_blendv_ps(
|
|
1358
|
+
distances_1, min_distances, comparison_1);
|
|
1359
|
+
min_indices = _mm256_castps_si256(_mm256_blendv_ps(
|
|
1360
|
+
_mm256_castsi256_ps(current_indices),
|
|
1361
|
+
_mm256_castsi256_ps(min_indices),
|
|
1362
|
+
comparison_1));
|
|
1363
|
+
current_indices =
|
|
1364
|
+
_mm256_add_epi32(current_indices, indices_delta);
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
// dump values and find the minimum distance / minimum index
|
|
1368
|
+
float min_distances_scalar[8];
|
|
1369
|
+
uint32_t min_indices_scalar[8];
|
|
1370
|
+
_mm256_storeu_ps(min_distances_scalar, min_distances);
|
|
1371
|
+
_mm256_storeu_si256(
|
|
1372
|
+
(__m256i*)(min_indices_scalar), min_indices);
|
|
1373
|
+
|
|
1374
|
+
float current_min_distance = res.dis_tab[i];
|
|
1375
|
+
uint32_t current_min_index = res.ids_tab[i];
|
|
1376
|
+
|
|
1377
|
+
// This unusual comparison is needed to maintain the behavior
|
|
1378
|
+
// of the original implementation: if two indices are
|
|
1379
|
+
// represented with equal distance values, then
|
|
1380
|
+
// the index with the min value is returned.
|
|
1381
|
+
for (size_t jv = 0; jv < 8; jv++) {
|
|
1382
|
+
// add missing x_norms[i]
|
|
1383
|
+
float distance_candidate =
|
|
1384
|
+
min_distances_scalar[jv] + x_norms[i];
|
|
1385
|
+
|
|
1386
|
+
// negative values can occur for identical vectors
|
|
1387
|
+
// due to roundoff errors.
|
|
1388
|
+
if (distance_candidate < 0) {
|
|
1389
|
+
distance_candidate = 0;
|
|
1390
|
+
}
|
|
1391
|
+
|
|
1392
|
+
int64_t index_candidate = min_indices_scalar[jv] + j0;
|
|
1393
|
+
|
|
1394
|
+
if (current_min_distance > distance_candidate) {
|
|
1395
|
+
current_min_distance = distance_candidate;
|
|
1396
|
+
current_min_index = index_candidate;
|
|
1397
|
+
} else if (
|
|
1398
|
+
current_min_distance == distance_candidate &&
|
|
1399
|
+
current_min_index > index_candidate) {
|
|
1400
|
+
current_min_index = index_candidate;
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
// process leftovers
|
|
1405
|
+
for (; idx_j < count; idx_j++, ip_line++) {
|
|
1406
|
+
float ip = *ip_line;
|
|
1407
|
+
float dis = x_norms[i] + y_norms[idx_j + j0] - 2 * ip;
|
|
1408
|
+
// negative values can occur for identical vectors
|
|
1409
|
+
// due to roundoff errors.
|
|
1410
|
+
if (dis < 0) {
|
|
1411
|
+
dis = 0;
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1414
|
+
if (current_min_distance > dis) {
|
|
1415
|
+
current_min_distance = dis;
|
|
1416
|
+
current_min_index = idx_j + j0;
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
//
|
|
1421
|
+
res.add_result(i, current_min_distance, current_min_index);
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1424
|
+
// Does nothing for SingleBestResultHandler, but
|
|
1425
|
+
// keeping the call for the consistency.
|
|
1426
|
+
res.end_multiple();
|
|
1427
|
+
InterruptCallback::check();
|
|
1428
|
+
}
|
|
1429
|
+
}
|
|
1430
|
+
|
|
1185
1431
|
} // namespace faiss
|
|
@@ -9,8 +9,11 @@
|
|
|
9
9
|
|
|
10
10
|
#include <immintrin.h>
|
|
11
11
|
|
|
12
|
-
#define
|
|
12
|
+
#define THE_SIMD_LEVEL SIMDLevel::AVX512
|
|
13
13
|
#include <faiss/utils/simd_impl/distances_autovec-inl.h>
|
|
14
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
15
|
+
#include <faiss/utils/simd_impl/IVFFlatScanner-inl.h>
|
|
16
|
+
|
|
14
17
|
#include <faiss/utils/simd_impl/distances_sse-inl.h>
|
|
15
18
|
#include <faiss/utils/transpose/transpose-avx512-inl.h>
|
|
16
19
|
|
|
@@ -19,10 +22,10 @@ namespace faiss {
|
|
|
19
22
|
template <>
|
|
20
23
|
void fvec_madd<SIMDLevel::AVX512>(
|
|
21
24
|
const size_t n,
|
|
22
|
-
const float*
|
|
25
|
+
const float* a,
|
|
23
26
|
const float bf,
|
|
24
|
-
const float*
|
|
25
|
-
float*
|
|
27
|
+
const float* b,
|
|
28
|
+
float* c) {
|
|
26
29
|
const size_t n16 = n / 16;
|
|
27
30
|
const size_t n_for_masking = n % 16;
|
|
28
31
|
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/utils/distances.h>
|
|
11
|
+
|
|
12
|
+
#ifdef COMPILE_SIMD_RISCV_RVV
|
|
13
|
+
|
|
14
|
+
#include <faiss/utils/extra_distances.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
template <>
|
|
19
|
+
float fvec_norm_L2sqr<SIMDLevel::RISCV_RVV>(const float* x, size_t d) {
|
|
20
|
+
return fvec_norm_L2sqr<SIMDLevel::NONE>(x, d);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
template <>
|
|
24
|
+
float fvec_L2sqr<SIMDLevel::RISCV_RVV>(
|
|
25
|
+
const float* x,
|
|
26
|
+
const float* y,
|
|
27
|
+
size_t d) {
|
|
28
|
+
return fvec_L2sqr<SIMDLevel::NONE>(x, y, d);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
template <>
|
|
32
|
+
float fvec_inner_product<SIMDLevel::RISCV_RVV>(
|
|
33
|
+
const float* x,
|
|
34
|
+
const float* y,
|
|
35
|
+
size_t d) {
|
|
36
|
+
return fvec_inner_product<SIMDLevel::NONE>(x, y, d);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
template <>
|
|
40
|
+
float fvec_L1<SIMDLevel::RISCV_RVV>(const float* x, const float* y, size_t d) {
|
|
41
|
+
return fvec_L1<SIMDLevel::NONE>(x, y, d);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
template <>
|
|
45
|
+
float fvec_Linf<SIMDLevel::RISCV_RVV>(
|
|
46
|
+
const float* x,
|
|
47
|
+
const float* y,
|
|
48
|
+
size_t d) {
|
|
49
|
+
return fvec_Linf<SIMDLevel::NONE>(x, y, d);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
template <>
|
|
53
|
+
void fvec_inner_product_batch_4<SIMDLevel::RISCV_RVV>(
|
|
54
|
+
const float* x,
|
|
55
|
+
const float* y0,
|
|
56
|
+
const float* y1,
|
|
57
|
+
const float* y2,
|
|
58
|
+
const float* y3,
|
|
59
|
+
const size_t d,
|
|
60
|
+
float& dis0,
|
|
61
|
+
float& dis1,
|
|
62
|
+
float& dis2,
|
|
63
|
+
float& dis3) {
|
|
64
|
+
fvec_inner_product_batch_4<SIMDLevel::NONE>(
|
|
65
|
+
x, y0, y1, y2, y3, d, dis0, dis1, dis2, dis3);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
template <>
|
|
69
|
+
void fvec_L2sqr_batch_4<SIMDLevel::RISCV_RVV>(
|
|
70
|
+
const float* x,
|
|
71
|
+
const float* y0,
|
|
72
|
+
const float* y1,
|
|
73
|
+
const float* y2,
|
|
74
|
+
const float* y3,
|
|
75
|
+
const size_t d,
|
|
76
|
+
float& dis0,
|
|
77
|
+
float& dis1,
|
|
78
|
+
float& dis2,
|
|
79
|
+
float& dis3) {
|
|
80
|
+
fvec_L2sqr_batch_4<SIMDLevel::NONE>(
|
|
81
|
+
x, y0, y1, y2, y3, d, dis0, dis1, dis2, dis3);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
template <>
|
|
85
|
+
void fvec_L2sqr_ny_transposed<SIMDLevel::RISCV_RVV>(
|
|
86
|
+
float* dis,
|
|
87
|
+
const float* x,
|
|
88
|
+
const float* y,
|
|
89
|
+
const float* y_sqlen,
|
|
90
|
+
size_t d,
|
|
91
|
+
size_t d_offset,
|
|
92
|
+
size_t ny) {
|
|
93
|
+
fvec_L2sqr_ny_transposed<SIMDLevel::NONE>(
|
|
94
|
+
dis, x, y, y_sqlen, d, d_offset, ny);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
template <>
|
|
98
|
+
void fvec_inner_products_ny<SIMDLevel::RISCV_RVV>(
|
|
99
|
+
float* ip,
|
|
100
|
+
const float* x,
|
|
101
|
+
const float* y,
|
|
102
|
+
size_t d,
|
|
103
|
+
size_t ny) {
|
|
104
|
+
fvec_inner_products_ny<SIMDLevel::NONE>(ip, x, y, d, ny);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
template <>
|
|
108
|
+
void fvec_L2sqr_ny<SIMDLevel::RISCV_RVV>(
|
|
109
|
+
float* dis,
|
|
110
|
+
const float* x,
|
|
111
|
+
const float* y,
|
|
112
|
+
size_t d,
|
|
113
|
+
size_t ny) {
|
|
114
|
+
fvec_L2sqr_ny<SIMDLevel::NONE>(dis, x, y, d, ny);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
template <>
|
|
118
|
+
size_t fvec_L2sqr_ny_nearest<SIMDLevel::RISCV_RVV>(
|
|
119
|
+
float* distances_tmp_buffer,
|
|
120
|
+
const float* x,
|
|
121
|
+
const float* y,
|
|
122
|
+
size_t d,
|
|
123
|
+
size_t ny) {
|
|
124
|
+
return fvec_L2sqr_ny_nearest<SIMDLevel::NONE>(
|
|
125
|
+
distances_tmp_buffer, x, y, d, ny);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
template <>
|
|
129
|
+
size_t fvec_L2sqr_ny_nearest_y_transposed<SIMDLevel::RISCV_RVV>(
|
|
130
|
+
float* distances_tmp_buffer,
|
|
131
|
+
const float* x,
|
|
132
|
+
const float* y,
|
|
133
|
+
const float* y_sqlen,
|
|
134
|
+
size_t d,
|
|
135
|
+
size_t d_offset,
|
|
136
|
+
size_t ny) {
|
|
137
|
+
return fvec_L2sqr_ny_nearest_y_transposed<SIMDLevel::NONE>(
|
|
138
|
+
distances_tmp_buffer, x, y, y_sqlen, d, d_offset, ny);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
template <>
|
|
142
|
+
void fvec_madd<SIMDLevel::RISCV_RVV>(
|
|
143
|
+
size_t n,
|
|
144
|
+
const float* a,
|
|
145
|
+
float bf,
|
|
146
|
+
const float* b,
|
|
147
|
+
float* c) {
|
|
148
|
+
fvec_madd<SIMDLevel::NONE>(n, a, bf, b, c);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
template <>
|
|
152
|
+
int fvec_madd_and_argmin<SIMDLevel::RISCV_RVV>(
|
|
153
|
+
size_t n,
|
|
154
|
+
const float* a,
|
|
155
|
+
float bf,
|
|
156
|
+
const float* b,
|
|
157
|
+
float* c) {
|
|
158
|
+
return fvec_madd_and_argmin<SIMDLevel::NONE>(n, a, bf, b, c);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
#define DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(metric) \
|
|
162
|
+
template <> \
|
|
163
|
+
float VectorDistance<metric, SIMDLevel::RISCV_RVV>::operator()( \
|
|
164
|
+
const float* x, const float* y) const { \
|
|
165
|
+
return VectorDistance<metric, SIMDLevel::NONE>( \
|
|
166
|
+
this->d, this->metric_arg)(x, y); \
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_L2)
|
|
170
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_INNER_PRODUCT)
|
|
171
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_L1)
|
|
172
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_Linf)
|
|
173
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_Lp)
|
|
174
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_Canberra)
|
|
175
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_BrayCurtis)
|
|
176
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_JensenShannon)
|
|
177
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_Jaccard)
|
|
178
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_NaNEuclidean)
|
|
179
|
+
DEFINE_VECTOR_DISTANCE_RVV_FALLBACK(METRIC_GOWER)
|
|
180
|
+
|
|
181
|
+
#undef DEFINE_VECTOR_DISTANCE_RVV_FALLBACK
|
|
182
|
+
|
|
183
|
+
} // namespace faiss
|
|
184
|
+
|
|
185
|
+
#define THE_SIMD_LEVEL SIMDLevel::RISCV_RVV
|
|
186
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
187
|
+
#include <faiss/utils/simd_impl/IVFFlatScanner-inl.h>
|
|
188
|
+
|
|
189
|
+
#endif // COMPILE_SIMD_RISCV_RVV
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <algorithm>
|
|
11
|
+
|
|
12
|
+
#include <faiss/impl/FaissAssert.h>
|
|
13
|
+
#include <faiss/impl/platform_macros.h>
|
|
14
|
+
#include <faiss/impl/simdlib/simdlib_dispatch.h>
|
|
15
|
+
#include <faiss/utils/distances.h>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
template <>
|
|
20
|
+
void fvec_sub<THE_SIMD_LEVEL>(
|
|
21
|
+
size_t d,
|
|
22
|
+
const float* a,
|
|
23
|
+
const float* b,
|
|
24
|
+
float* c) {
|
|
25
|
+
size_t i;
|
|
26
|
+
for (i = 0; i + 7 < d; i += 8) {
|
|
27
|
+
simd8float32_tpl<THE_SIMD_LEVEL> ci, ai, bi;
|
|
28
|
+
ai.loadu(a + i);
|
|
29
|
+
bi.loadu(b + i);
|
|
30
|
+
ci = ai - bi;
|
|
31
|
+
ci.storeu(c + i);
|
|
32
|
+
}
|
|
33
|
+
for (; i < d; i++) {
|
|
34
|
+
c[i] = a[i] - b[i];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
template <>
|
|
39
|
+
void fvec_add<THE_SIMD_LEVEL>(
|
|
40
|
+
size_t d,
|
|
41
|
+
const float* a,
|
|
42
|
+
const float* b,
|
|
43
|
+
float* c) {
|
|
44
|
+
size_t i;
|
|
45
|
+
for (i = 0; i + 7 < d; i += 8) {
|
|
46
|
+
simd8float32_tpl<THE_SIMD_LEVEL> ci, ai, bi;
|
|
47
|
+
ai.loadu(a + i);
|
|
48
|
+
bi.loadu(b + i);
|
|
49
|
+
ci = ai + bi;
|
|
50
|
+
ci.storeu(c + i);
|
|
51
|
+
}
|
|
52
|
+
for (; i < d; i++) {
|
|
53
|
+
c[i] = a[i] + b[i];
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
template <>
|
|
58
|
+
void fvec_add<THE_SIMD_LEVEL>(size_t d, const float* a, float b, float* c) {
|
|
59
|
+
size_t i;
|
|
60
|
+
simd8float32_tpl<THE_SIMD_LEVEL> bv(b);
|
|
61
|
+
for (i = 0; i + 7 < d; i += 8) {
|
|
62
|
+
simd8float32_tpl<THE_SIMD_LEVEL> ci, ai;
|
|
63
|
+
ai.loadu(a + i);
|
|
64
|
+
ci = ai + bv;
|
|
65
|
+
ci.storeu(c + i);
|
|
66
|
+
}
|
|
67
|
+
for (; i < d; i++) {
|
|
68
|
+
c[i] = a[i] + b;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/***************************************************************************
|
|
73
|
+
* PQ tables computations
|
|
74
|
+
***************************************************************************/
|
|
75
|
+
|
|
76
|
+
namespace {
|
|
77
|
+
|
|
78
|
+
/// compute the IP for dsub = 2 for 8 centroids and 4 sub-vectors at a time
|
|
79
|
+
template <SIMDLevel SL, bool is_inner_product>
|
|
80
|
+
void pq2_8cents_table(
|
|
81
|
+
const simd8float32_tpl<SL> centroids[8],
|
|
82
|
+
const simd8float32_tpl<SL> x,
|
|
83
|
+
float* out,
|
|
84
|
+
size_t ldo,
|
|
85
|
+
size_t nout = 4) {
|
|
86
|
+
simd8float32_tpl<SL> ips[4];
|
|
87
|
+
|
|
88
|
+
for (int i = 0; i < 4; i++) {
|
|
89
|
+
simd8float32_tpl<SL> p1, p2;
|
|
90
|
+
if (is_inner_product) {
|
|
91
|
+
p1 = x * centroids[2 * i];
|
|
92
|
+
p2 = x * centroids[2 * i + 1];
|
|
93
|
+
} else {
|
|
94
|
+
p1 = (x - centroids[2 * i]);
|
|
95
|
+
p1 = p1 * p1;
|
|
96
|
+
p2 = (x - centroids[2 * i + 1]);
|
|
97
|
+
p2 = p2 * p2;
|
|
98
|
+
}
|
|
99
|
+
ips[i] = hadd(p1, p2);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
simd8float32_tpl<SL> ip02a = geteven(ips[0], ips[1]);
|
|
103
|
+
simd8float32_tpl<SL> ip02b = geteven(ips[2], ips[3]);
|
|
104
|
+
simd8float32_tpl<SL> ip0 = getlow128(ip02a, ip02b);
|
|
105
|
+
simd8float32_tpl<SL> ip2 = gethigh128(ip02a, ip02b);
|
|
106
|
+
|
|
107
|
+
simd8float32_tpl<SL> ip13a = getodd(ips[0], ips[1]);
|
|
108
|
+
simd8float32_tpl<SL> ip13b = getodd(ips[2], ips[3]);
|
|
109
|
+
simd8float32_tpl<SL> ip1 = getlow128(ip13a, ip13b);
|
|
110
|
+
simd8float32_tpl<SL> ip3 = gethigh128(ip13a, ip13b);
|
|
111
|
+
|
|
112
|
+
switch (nout) {
|
|
113
|
+
case 4:
|
|
114
|
+
ip3.storeu(out + 3 * ldo);
|
|
115
|
+
[[fallthrough]];
|
|
116
|
+
case 3:
|
|
117
|
+
ip2.storeu(out + 2 * ldo);
|
|
118
|
+
[[fallthrough]];
|
|
119
|
+
case 2:
|
|
120
|
+
ip1.storeu(out + 1 * ldo);
|
|
121
|
+
[[fallthrough]];
|
|
122
|
+
case 1:
|
|
123
|
+
ip0.storeu(out);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
template <SIMDLevel SL>
|
|
128
|
+
simd8float32_tpl<SL> load_simd8float32_partial(const float* x, int n) {
|
|
129
|
+
ALIGNED(32) float tmp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
130
|
+
float* wp = tmp;
|
|
131
|
+
for (int i = 0; i < n; i++) {
|
|
132
|
+
*wp++ = *x++;
|
|
133
|
+
}
|
|
134
|
+
return simd8float32_tpl<SL>(tmp);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
} // anonymous namespace
|
|
138
|
+
|
|
139
|
+
template <>
|
|
140
|
+
void compute_PQ_dis_tables_dsub2<THE_SIMD_LEVEL>(
|
|
141
|
+
size_t d,
|
|
142
|
+
size_t ksub,
|
|
143
|
+
const float* all_centroids,
|
|
144
|
+
size_t nx,
|
|
145
|
+
const float* x,
|
|
146
|
+
bool is_inner_product,
|
|
147
|
+
float* dis_tables) {
|
|
148
|
+
size_t M = d / 2;
|
|
149
|
+
FAISS_THROW_IF_NOT(ksub % 8 == 0);
|
|
150
|
+
|
|
151
|
+
for (size_t m0 = 0; m0 < M; m0 += 4) {
|
|
152
|
+
int m1 = std::min(M, m0 + 4);
|
|
153
|
+
for (int k0 = 0; k0 < ksub; k0 += 8) {
|
|
154
|
+
simd8float32_tpl<THE_SIMD_LEVEL> centroids[8];
|
|
155
|
+
for (int k = 0; k < 8; k++) {
|
|
156
|
+
ALIGNED(32) float centroid[8];
|
|
157
|
+
size_t wp = 0;
|
|
158
|
+
size_t rp = (m0 * ksub + k + k0) * 2;
|
|
159
|
+
for (int m = m0; m < m1; m++) {
|
|
160
|
+
centroid[wp++] = all_centroids[rp];
|
|
161
|
+
centroid[wp++] = all_centroids[rp + 1];
|
|
162
|
+
rp += 2 * ksub;
|
|
163
|
+
}
|
|
164
|
+
centroids[k] = simd8float32_tpl<THE_SIMD_LEVEL>(centroid);
|
|
165
|
+
}
|
|
166
|
+
for (size_t i = 0; i < nx; i++) {
|
|
167
|
+
simd8float32_tpl<THE_SIMD_LEVEL> xi;
|
|
168
|
+
if (m1 == m0 + 4) {
|
|
169
|
+
xi.loadu(x + i * d + m0 * 2);
|
|
170
|
+
} else {
|
|
171
|
+
xi = load_simd8float32_partial<THE_SIMD_LEVEL>(
|
|
172
|
+
x + i * d + m0 * 2, 2 * (m1 - m0));
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (is_inner_product) {
|
|
176
|
+
pq2_8cents_table<THE_SIMD_LEVEL, true>(
|
|
177
|
+
centroids,
|
|
178
|
+
xi,
|
|
179
|
+
dis_tables + (i * M + m0) * ksub + k0,
|
|
180
|
+
ksub,
|
|
181
|
+
m1 - m0);
|
|
182
|
+
} else {
|
|
183
|
+
pq2_8cents_table<THE_SIMD_LEVEL, false>(
|
|
184
|
+
centroids,
|
|
185
|
+
xi,
|
|
186
|
+
dis_tables + (i * M + m0) * ksub + k0,
|
|
187
|
+
ksub,
|
|
188
|
+
m1 - m0);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
} // namespace faiss
|
|
@@ -360,7 +360,8 @@ inline int fvec_madd_and_argmin_sse(
|
|
|
360
360
|
float bf,
|
|
361
361
|
const float* b,
|
|
362
362
|
float* c) {
|
|
363
|
-
if ((n & 3) == 0 &&
|
|
363
|
+
if ((n & 3) == 0 &&
|
|
364
|
+
((((uintptr_t)a) | ((uintptr_t)b) | ((uintptr_t)c)) & 15) == 0) {
|
|
364
365
|
return fvec_madd_and_argmin_sse_ref(n, a, bf, b, c);
|
|
365
366
|
} else {
|
|
366
367
|
return fvec_madd_and_argmin<SIMDLevel::NONE>(n, a, bf, b, c);
|