faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -100,7 +100,7 @@ void float_rand(float* x, size_t n, int64_t seed) {
|
|
|
100
100
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
101
101
|
|
|
102
102
|
#pragma omp parallel for
|
|
103
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
103
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
104
104
|
RandomGenerator rng(a0 + j * b0);
|
|
105
105
|
|
|
106
106
|
const size_t istart = j * n / nblock;
|
|
@@ -120,7 +120,7 @@ void float_randn(float* x, size_t n, int64_t seed) {
|
|
|
120
120
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
121
121
|
|
|
122
122
|
#pragma omp parallel for
|
|
123
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
123
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
124
124
|
RandomGenerator rng(a0 + j * b0);
|
|
125
125
|
|
|
126
126
|
double a = 0, b = 0, s = 0;
|
|
@@ -155,7 +155,7 @@ void int64_rand(int64_t* x, size_t n, int64_t seed) {
|
|
|
155
155
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
156
156
|
|
|
157
157
|
#pragma omp parallel for
|
|
158
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
158
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
159
159
|
RandomGenerator rng(a0 + j * b0);
|
|
160
160
|
|
|
161
161
|
const size_t istart = j * n / nblock;
|
|
@@ -174,7 +174,7 @@ void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed) {
|
|
|
174
174
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
175
175
|
|
|
176
176
|
#pragma omp parallel for
|
|
177
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
177
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
178
178
|
RandomGenerator rng(a0 + j * b0);
|
|
179
179
|
|
|
180
180
|
const size_t istart = j * n / nblock;
|
|
@@ -219,7 +219,7 @@ void byte_rand(uint8_t* x, size_t n, int64_t seed) {
|
|
|
219
219
|
int a0 = rng0.rand_int(), b0 = rng0.rand_int();
|
|
220
220
|
|
|
221
221
|
#pragma omp parallel for
|
|
222
|
-
for (int64_t j = 0; j < nblock; j++) {
|
|
222
|
+
for (int64_t j = 0; j < static_cast<int64_t>(nblock); j++) {
|
|
223
223
|
RandomGenerator rng(a0 + j * b0);
|
|
224
224
|
|
|
225
225
|
const size_t istart = j * n / nblock;
|
|
@@ -261,7 +261,7 @@ void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed) {
|
|
|
261
261
|
float_rand(scales.data(), d, seed + 2);
|
|
262
262
|
|
|
263
263
|
#pragma omp parallel for if (n * d > 10000)
|
|
264
|
-
for (int64_t i = 0; i < n; i++) {
|
|
264
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
265
265
|
for (size_t j = 0; j < d; j++) {
|
|
266
266
|
x[i * d + j] = sinf(x[i * d + j] * (scales[j] * 4 + 0.1));
|
|
267
267
|
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/IndexIVFFlat.h>
|
|
11
|
+
#include <faiss/impl/expanded_scanners.h>
|
|
12
|
+
|
|
13
|
+
#ifndef THE_SIMD_LEVEL
|
|
14
|
+
#error "THE_SIMD_LEVEL not defined"
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
constexpr faiss::SIMDLevel THE_SL = THE_SIMD_LEVEL;
|
|
20
|
+
|
|
21
|
+
#define DEFINE_IVFFLAT_SCANNER_METHODS(mt) \
|
|
22
|
+
template <> \
|
|
23
|
+
float IVFFlatScanner<VectorDistance<mt, THE_SL>>::distance_to_code( \
|
|
24
|
+
const uint8_t* code) const { \
|
|
25
|
+
const float* yj = (float*)code; \
|
|
26
|
+
return vd(xi, yj); \
|
|
27
|
+
} \
|
|
28
|
+
template <> \
|
|
29
|
+
size_t IVFFlatScanner<VectorDistance<mt, THE_SL>>::scan_codes( \
|
|
30
|
+
size_t list_size, \
|
|
31
|
+
const uint8_t* codes, \
|
|
32
|
+
const idx_t* ids, \
|
|
33
|
+
ResultHandler& handler) const { \
|
|
34
|
+
return run_scan_codes_fix_C<C>(*this, list_size, codes, ids, handler); \
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_L2)
|
|
38
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_INNER_PRODUCT)
|
|
39
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_L1)
|
|
40
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_Linf)
|
|
41
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_Lp)
|
|
42
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_Canberra)
|
|
43
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_BrayCurtis)
|
|
44
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_JensenShannon)
|
|
45
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_Jaccard)
|
|
46
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_NaNEuclidean)
|
|
47
|
+
DEFINE_IVFFLAT_SCANNER_METHODS(METRIC_GOWER)
|
|
48
|
+
|
|
49
|
+
#undef DEFINE_IVFFLAT_SCANNER_METHODS
|
|
50
|
+
|
|
51
|
+
} // namespace faiss
|
|
@@ -12,8 +12,12 @@
|
|
|
12
12
|
#include <arm_neon.h>
|
|
13
13
|
#include <limits>
|
|
14
14
|
|
|
15
|
-
#define
|
|
15
|
+
#define THE_SIMD_LEVEL SIMDLevel::ARM_NEON
|
|
16
16
|
#include <faiss/utils/simd_impl/distances_autovec-inl.h>
|
|
17
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
18
|
+
#include <faiss/utils/simd_impl/IVFFlatScanner-inl.h>
|
|
19
|
+
|
|
20
|
+
#include <faiss/utils/simd_impl/distances_simdlib256.h>
|
|
17
21
|
|
|
18
22
|
namespace faiss {
|
|
19
23
|
|
|
@@ -9,18 +9,47 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/utils/distances.h>
|
|
11
11
|
|
|
12
|
-
#
|
|
12
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
13
|
+
#include <faiss/impl/ResultHandler.h>
|
|
14
|
+
#include <faiss/utils/distances_fused/distances_fused.h>
|
|
15
|
+
#include <faiss/utils/simd_impl/exhaustive_L2sqr_blas_cmax.h>
|
|
16
|
+
|
|
17
|
+
#ifndef FINTEGER
|
|
18
|
+
#define FINTEGER long
|
|
19
|
+
#endif
|
|
20
|
+
|
|
21
|
+
extern "C" {
|
|
22
|
+
|
|
23
|
+
int sgemm_(
|
|
24
|
+
const char* transa,
|
|
25
|
+
const char* transb,
|
|
26
|
+
FINTEGER* m,
|
|
27
|
+
FINTEGER* n,
|
|
28
|
+
FINTEGER* k,
|
|
29
|
+
const float* alpha,
|
|
30
|
+
const float* a,
|
|
31
|
+
FINTEGER* lda,
|
|
32
|
+
const float* b,
|
|
33
|
+
FINTEGER* ldb,
|
|
34
|
+
float* beta,
|
|
35
|
+
float* c,
|
|
36
|
+
FINTEGER* ldc);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
#define THE_SIMD_LEVEL SIMDLevel::ARM_SVE
|
|
13
40
|
#include <faiss/utils/simd_impl/distances_autovec-inl.h>
|
|
41
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
42
|
+
#include <faiss/utils/simd_impl/IVFFlatScanner-inl.h>
|
|
14
43
|
|
|
15
44
|
namespace faiss {
|
|
16
45
|
|
|
17
46
|
template <>
|
|
18
47
|
void fvec_madd<SIMDLevel::ARM_SVE>(
|
|
19
48
|
const size_t n,
|
|
20
|
-
const float*
|
|
49
|
+
const float* a,
|
|
21
50
|
const float bf,
|
|
22
|
-
const float*
|
|
23
|
-
float*
|
|
51
|
+
const float* b,
|
|
52
|
+
float* c) {
|
|
24
53
|
const size_t lanes = static_cast<size_t>(svcntw());
|
|
25
54
|
const size_t lanes2 = lanes * 2;
|
|
26
55
|
const size_t lanes3 = lanes * 3;
|
|
@@ -565,4 +594,184 @@ size_t fvec_L2sqr_ny_nearest_y_transposed<SIMDLevel::ARM_SVE>(
|
|
|
565
594
|
return nearest_idx;
|
|
566
595
|
}
|
|
567
596
|
|
|
597
|
+
template <>
|
|
598
|
+
void exhaustive_L2sqr_blas_cmax<SIMDLevel::ARM_SVE>(
|
|
599
|
+
const float* x,
|
|
600
|
+
const float* y,
|
|
601
|
+
size_t d,
|
|
602
|
+
size_t nx,
|
|
603
|
+
size_t ny,
|
|
604
|
+
Top1BlockResultHandler<CMax<float, int64_t>>& res,
|
|
605
|
+
const float* y_norms) {
|
|
606
|
+
// BLAS does not like empty matrices
|
|
607
|
+
if (nx == 0 || ny == 0)
|
|
608
|
+
return;
|
|
609
|
+
|
|
610
|
+
/* block sizes */
|
|
611
|
+
const size_t bs_x = distance_compute_blas_query_bs;
|
|
612
|
+
const size_t bs_y = distance_compute_blas_database_bs;
|
|
613
|
+
// const size_t bs_x = 16, bs_y = 16;
|
|
614
|
+
std::unique_ptr<float[]> ip_block(new float[bs_x * bs_y]);
|
|
615
|
+
std::unique_ptr<float[]> x_norms(new float[nx]);
|
|
616
|
+
std::unique_ptr<float[]> del2;
|
|
617
|
+
|
|
618
|
+
fvec_norms_L2sqr(x_norms.get(), x, d, nx);
|
|
619
|
+
|
|
620
|
+
const size_t lanes = svcntw();
|
|
621
|
+
|
|
622
|
+
if (!y_norms) {
|
|
623
|
+
float* y_norms2 = new float[ny];
|
|
624
|
+
del2.reset(y_norms2);
|
|
625
|
+
fvec_norms_L2sqr(y_norms2, y, d, ny);
|
|
626
|
+
y_norms = y_norms2;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
|
|
630
|
+
size_t i1 = i0 + bs_x;
|
|
631
|
+
if (i1 > nx)
|
|
632
|
+
i1 = nx;
|
|
633
|
+
|
|
634
|
+
res.begin_multiple(i0, i1);
|
|
635
|
+
|
|
636
|
+
for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
|
|
637
|
+
size_t j1 = j0 + bs_y;
|
|
638
|
+
if (j1 > ny)
|
|
639
|
+
j1 = ny;
|
|
640
|
+
/* compute the actual dot products */
|
|
641
|
+
{
|
|
642
|
+
float one = 1, zero = 0;
|
|
643
|
+
FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d;
|
|
644
|
+
sgemm_("Transpose",
|
|
645
|
+
"Not transpose",
|
|
646
|
+
&nyi,
|
|
647
|
+
&nxi,
|
|
648
|
+
&di,
|
|
649
|
+
&one,
|
|
650
|
+
y + j0 * d,
|
|
651
|
+
&di,
|
|
652
|
+
x + i0 * d,
|
|
653
|
+
&di,
|
|
654
|
+
&zero,
|
|
655
|
+
ip_block.get(),
|
|
656
|
+
&nyi);
|
|
657
|
+
}
|
|
658
|
+
#pragma omp parallel for schedule(static) if ((i1 - i0) >= 16)
|
|
659
|
+
for (int64_t i = static_cast<int64_t>(i0);
|
|
660
|
+
i < static_cast<int64_t>(i1);
|
|
661
|
+
i++) {
|
|
662
|
+
const size_t count = j1 - j0;
|
|
663
|
+
float* ip_line = ip_block.get() + (i - i0) * count;
|
|
664
|
+
|
|
665
|
+
svprfw(svwhilelt_b32_u64(0, count), ip_line, SV_PLDL1KEEP);
|
|
666
|
+
svprfw(svwhilelt_b32_u64(lanes, count),
|
|
667
|
+
ip_line + lanes,
|
|
668
|
+
SV_PLDL1KEEP);
|
|
669
|
+
|
|
670
|
+
// Track lanes min distances + lanes min indices.
|
|
671
|
+
// All the distances tracked do not take x_norms[i]
|
|
672
|
+
// into account in order to get rid of extra
|
|
673
|
+
// vaddq_f32(x_norms[i], ...) instructions
|
|
674
|
+
// is distance computations.
|
|
675
|
+
auto min_distances = svdup_n_f32(res.dis_tab[i] - x_norms[i]);
|
|
676
|
+
|
|
677
|
+
// these indices are local and are relative to j0.
|
|
678
|
+
// so, value 0 means j0.
|
|
679
|
+
auto min_indices = svdup_n_u32(0u);
|
|
680
|
+
|
|
681
|
+
auto current_indices = svindex_u32(0u, 1u);
|
|
682
|
+
|
|
683
|
+
// process lanes * 2 elements per loop
|
|
684
|
+
for (size_t idx_j = 0; idx_j < count;
|
|
685
|
+
idx_j += lanes * 2, ip_line += lanes * 2) {
|
|
686
|
+
svprfw(svwhilelt_b32_u64(idx_j + lanes * 2, count),
|
|
687
|
+
ip_line + lanes * 2,
|
|
688
|
+
SV_PLDL1KEEP);
|
|
689
|
+
svprfw(svwhilelt_b32_u64(idx_j + lanes * 3, count),
|
|
690
|
+
ip_line + lanes * 3,
|
|
691
|
+
SV_PLDL1KEEP);
|
|
692
|
+
|
|
693
|
+
// mask
|
|
694
|
+
const auto mask_0 = svwhilelt_b32_u64(idx_j, count);
|
|
695
|
+
const auto mask_1 = svwhilelt_b32_u64(idx_j + lanes, count);
|
|
696
|
+
|
|
697
|
+
// load values for norms
|
|
698
|
+
const auto y_norm_0 =
|
|
699
|
+
svld1_f32(mask_0, y_norms + idx_j + j0 + 0);
|
|
700
|
+
const auto y_norm_1 =
|
|
701
|
+
svld1_f32(mask_1, y_norms + idx_j + j0 + lanes);
|
|
702
|
+
|
|
703
|
+
// load values for dot products
|
|
704
|
+
const auto ip_0 = svld1_f32(mask_0, ip_line + 0);
|
|
705
|
+
const auto ip_1 = svld1_f32(mask_1, ip_line + lanes);
|
|
706
|
+
|
|
707
|
+
// compute dis = y_norm[j] - 2 * dot(x_norm[i], y_norm[j]).
|
|
708
|
+
// x_norm[i] was dropped off because it is a constant for a
|
|
709
|
+
// given i. We'll deal with it later.
|
|
710
|
+
const auto distances_0 =
|
|
711
|
+
svmla_n_f32_z(mask_0, y_norm_0, ip_0, -2.f);
|
|
712
|
+
const auto distances_1 =
|
|
713
|
+
svmla_n_f32_z(mask_1, y_norm_1, ip_1, -2.f);
|
|
714
|
+
|
|
715
|
+
// compare the new distances to the min distances
|
|
716
|
+
// for each of the first group of 4 ARM SIMD components.
|
|
717
|
+
auto comparison =
|
|
718
|
+
svcmpgt_f32(mask_0, min_distances, distances_0);
|
|
719
|
+
|
|
720
|
+
// update min distances and indices with closest vectors if
|
|
721
|
+
// needed.
|
|
722
|
+
min_distances =
|
|
723
|
+
svsel_f32(comparison, distances_0, min_distances);
|
|
724
|
+
min_indices =
|
|
725
|
+
svsel_u32(comparison, current_indices, min_indices);
|
|
726
|
+
current_indices = svadd_n_u32_x(
|
|
727
|
+
mask_0,
|
|
728
|
+
current_indices,
|
|
729
|
+
static_cast<uint32_t>(lanes));
|
|
730
|
+
|
|
731
|
+
// compare the new distances to the min distances
|
|
732
|
+
// for each of the second group of 4 ARM SIMD components.
|
|
733
|
+
comparison =
|
|
734
|
+
svcmpgt_f32(mask_1, min_distances, distances_1);
|
|
735
|
+
|
|
736
|
+
// update min distances and indices with closest vectors if
|
|
737
|
+
// needed.
|
|
738
|
+
min_distances =
|
|
739
|
+
svsel_f32(comparison, distances_1, min_distances);
|
|
740
|
+
min_indices =
|
|
741
|
+
svsel_u32(comparison, current_indices, min_indices);
|
|
742
|
+
current_indices = svadd_n_u32_x(
|
|
743
|
+
mask_1,
|
|
744
|
+
current_indices,
|
|
745
|
+
static_cast<uint32_t>(lanes));
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
// add missing x_norms[i]
|
|
749
|
+
// negative values can occur for identical vectors
|
|
750
|
+
// due to roundoff errors.
|
|
751
|
+
auto mask = svwhilelt_b32_u64(0, count);
|
|
752
|
+
min_distances = svadd_n_f32_z(
|
|
753
|
+
svcmpge_n_f32(mask, min_distances, -x_norms[i]),
|
|
754
|
+
min_distances,
|
|
755
|
+
x_norms[i]);
|
|
756
|
+
min_indices = svadd_n_u32_x(
|
|
757
|
+
mask, min_indices, static_cast<uint32_t>(j0));
|
|
758
|
+
mask = svcmple_n_f32(mask, min_distances, res.dis_tab[i]);
|
|
759
|
+
if (svcntp_b32(svptrue_b32(), mask) == 0)
|
|
760
|
+
res.add_result(i, res.dis_tab[i], res.ids_tab[i]);
|
|
761
|
+
else {
|
|
762
|
+
const auto min_distance = svminv_f32(mask, min_distances);
|
|
763
|
+
const auto min_index = svminv_u32(
|
|
764
|
+
svcmpeq_n_f32(mask, min_distances, min_distance),
|
|
765
|
+
min_indices);
|
|
766
|
+
res.add_result(i, min_distance, min_index);
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
// Does nothing for SingleBestResultHandler, but
|
|
771
|
+
// keeping the call for the consistency.
|
|
772
|
+
res.end_multiple();
|
|
773
|
+
InterruptCallback::check();
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
|
|
568
777
|
} // namespace faiss
|
|
@@ -7,14 +7,26 @@
|
|
|
7
7
|
|
|
8
8
|
#pragma once
|
|
9
9
|
|
|
10
|
+
#include <faiss/impl/FaissAssert.h>
|
|
10
11
|
#include <faiss/impl/platform_macros.h>
|
|
11
12
|
#include <faiss/utils/distances.h>
|
|
13
|
+
#include <faiss/utils/extra_distances.h>
|
|
14
|
+
|
|
15
|
+
#ifndef THE_SIMD_LEVEL
|
|
16
|
+
#error "THE_SIMD_LEVEL not defined"
|
|
17
|
+
#endif
|
|
12
18
|
|
|
13
19
|
namespace faiss {
|
|
14
20
|
|
|
21
|
+
constexpr faiss::SIMDLevel SL = THE_SIMD_LEVEL;
|
|
22
|
+
/******************************************************************
|
|
23
|
+
* These functions are simple enough that the compile will do a good job
|
|
24
|
+
* vectorizing them given the appropriate flags.
|
|
25
|
+
******************************************************************/
|
|
26
|
+
|
|
15
27
|
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
16
28
|
template <>
|
|
17
|
-
float fvec_norm_L2sqr<
|
|
29
|
+
float fvec_norm_L2sqr<SL>(const float* x, size_t d) {
|
|
18
30
|
// the double in the _ref is suspected to be a typo. Some of the manual
|
|
19
31
|
// implementations this replaces used float.
|
|
20
32
|
float res = 0;
|
|
@@ -29,7 +41,7 @@ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
|
29
41
|
|
|
30
42
|
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
31
43
|
template <>
|
|
32
|
-
float fvec_L2sqr<
|
|
44
|
+
float fvec_L2sqr<SL>(const float* x, const float* y, size_t d) {
|
|
33
45
|
size_t i;
|
|
34
46
|
float res = 0;
|
|
35
47
|
FAISS_PRAGMA_IMPRECISE_LOOP
|
|
@@ -43,10 +55,7 @@ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
|
43
55
|
|
|
44
56
|
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
45
57
|
template <>
|
|
46
|
-
float fvec_inner_product<
|
|
47
|
-
const float* x,
|
|
48
|
-
const float* y,
|
|
49
|
-
size_t d) {
|
|
58
|
+
float fvec_inner_product<SL>(const float* x, const float* y, size_t d) {
|
|
50
59
|
float res = 0.F;
|
|
51
60
|
FAISS_PRAGMA_IMPRECISE_LOOP
|
|
52
61
|
for (size_t i = 0; i != d; ++i) {
|
|
@@ -58,7 +67,7 @@ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
|
58
67
|
|
|
59
68
|
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
60
69
|
template <>
|
|
61
|
-
float fvec_L1<
|
|
70
|
+
float fvec_L1<SL>(const float* x, const float* y, size_t d) {
|
|
62
71
|
size_t i;
|
|
63
72
|
float res = 0;
|
|
64
73
|
FAISS_PRAGMA_IMPRECISE_LOOP
|
|
@@ -72,7 +81,7 @@ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
|
72
81
|
|
|
73
82
|
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
74
83
|
template <>
|
|
75
|
-
float fvec_Linf<
|
|
84
|
+
float fvec_Linf<SL>(const float* x, const float* y, size_t d) {
|
|
76
85
|
float res = 0;
|
|
77
86
|
FAISS_PRAGMA_IMPRECISE_LOOP
|
|
78
87
|
for (size_t i = 0; i < d; i++) {
|
|
@@ -84,7 +93,7 @@ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
|
84
93
|
|
|
85
94
|
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
86
95
|
template <>
|
|
87
|
-
void fvec_inner_product_batch_4<
|
|
96
|
+
void fvec_inner_product_batch_4<SL>(
|
|
88
97
|
const float* x,
|
|
89
98
|
const float* y0,
|
|
90
99
|
const float* y1,
|
|
@@ -116,7 +125,7 @@ FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
|
116
125
|
|
|
117
126
|
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
118
127
|
template <>
|
|
119
|
-
void fvec_L2sqr_batch_4<
|
|
128
|
+
void fvec_L2sqr_batch_4<SL>(
|
|
120
129
|
const float* x,
|
|
121
130
|
const float* y0,
|
|
122
131
|
const float* y1,
|
|
@@ -150,4 +159,148 @@ void fvec_L2sqr_batch_4<AUTOVEC_LEVEL>(
|
|
|
150
159
|
}
|
|
151
160
|
FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
152
161
|
|
|
162
|
+
/******************************************************************
|
|
163
|
+
* VectorDistance::operator() specializations — defined out-of-class
|
|
164
|
+
* so that SIMD compilation units produce externally-linkable symbols.
|
|
165
|
+
******************************************************************/
|
|
166
|
+
|
|
167
|
+
template <>
|
|
168
|
+
float VectorDistance<METRIC_L2, SL>::operator()(const float* x, const float* y)
|
|
169
|
+
const {
|
|
170
|
+
return fvec_L2sqr<SL>(x, y, this->d);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
template <>
|
|
174
|
+
float VectorDistance<METRIC_INNER_PRODUCT, SL>::operator()(
|
|
175
|
+
const float* x,
|
|
176
|
+
const float* y) const {
|
|
177
|
+
return fvec_inner_product<SL>(x, y, this->d);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
template <>
|
|
181
|
+
float VectorDistance<METRIC_L1, SL>::operator()(const float* x, const float* y)
|
|
182
|
+
const {
|
|
183
|
+
return fvec_L1<SL>(x, y, this->d);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
template <>
|
|
187
|
+
float VectorDistance<METRIC_Linf, SL>::operator()(
|
|
188
|
+
const float* x,
|
|
189
|
+
const float* y) const {
|
|
190
|
+
return fvec_Linf<SL>(x, y, this->d);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
template <>
|
|
194
|
+
float VectorDistance<METRIC_Lp, SL>::operator()(const float* x, const float* y)
|
|
195
|
+
const {
|
|
196
|
+
float accu = 0;
|
|
197
|
+
for (size_t i = 0; i < this->d; i++) {
|
|
198
|
+
float diff = fabs(x[i] - y[i]);
|
|
199
|
+
accu += powf(diff, this->metric_arg);
|
|
200
|
+
}
|
|
201
|
+
return accu;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
template <>
|
|
205
|
+
float VectorDistance<METRIC_Canberra, SL>::operator()(
|
|
206
|
+
const float* x,
|
|
207
|
+
const float* y) const {
|
|
208
|
+
float accu = 0;
|
|
209
|
+
for (size_t i = 0; i < this->d; i++) {
|
|
210
|
+
float xi = x[i], yi = y[i];
|
|
211
|
+
accu += fabs(xi - yi) / (fabs(xi) + fabs(yi));
|
|
212
|
+
}
|
|
213
|
+
return accu;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
template <>
|
|
217
|
+
float VectorDistance<METRIC_BrayCurtis, SL>::operator()(
|
|
218
|
+
const float* x,
|
|
219
|
+
const float* y) const {
|
|
220
|
+
float accu_num = 0, accu_den = 0;
|
|
221
|
+
for (size_t i = 0; i < this->d; i++) {
|
|
222
|
+
float xi = x[i], yi = y[i];
|
|
223
|
+
accu_num += fabs(xi - yi);
|
|
224
|
+
accu_den += fabs(xi + yi);
|
|
225
|
+
}
|
|
226
|
+
return accu_num / accu_den;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
template <>
|
|
230
|
+
float VectorDistance<METRIC_JensenShannon, SL>::operator()(
|
|
231
|
+
const float* x,
|
|
232
|
+
const float* y) const {
|
|
233
|
+
float accu = 0;
|
|
234
|
+
for (size_t i = 0; i < this->d; i++) {
|
|
235
|
+
float xi = x[i], yi = y[i];
|
|
236
|
+
float mi = 0.5 * (xi + yi);
|
|
237
|
+
float kl1 = -xi * log(mi / xi);
|
|
238
|
+
float kl2 = -yi * log(mi / yi);
|
|
239
|
+
accu += kl1 + kl2;
|
|
240
|
+
}
|
|
241
|
+
return 0.5 * accu;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
template <>
|
|
245
|
+
float VectorDistance<METRIC_Jaccard, SL>::operator()(
|
|
246
|
+
const float* x,
|
|
247
|
+
const float* y) const {
|
|
248
|
+
float accu_num = 0, accu_den = 0;
|
|
249
|
+
for (size_t i = 0; i < this->d; i++) {
|
|
250
|
+
accu_num += fmin(x[i], y[i]);
|
|
251
|
+
accu_den += fmax(x[i], y[i]);
|
|
252
|
+
}
|
|
253
|
+
return accu_num / accu_den;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
template <>
|
|
257
|
+
float VectorDistance<METRIC_NaNEuclidean, SL>::operator()(
|
|
258
|
+
const float* x,
|
|
259
|
+
const float* y) const {
|
|
260
|
+
float accu = 0;
|
|
261
|
+
size_t present = 0;
|
|
262
|
+
for (size_t i = 0; i < this->d; i++) {
|
|
263
|
+
if (!std::isnan(x[i]) && !std::isnan(y[i])) {
|
|
264
|
+
float diff = x[i] - y[i];
|
|
265
|
+
accu += diff * diff;
|
|
266
|
+
present++;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
if (present == 0) {
|
|
270
|
+
return NAN;
|
|
271
|
+
}
|
|
272
|
+
return float(this->d) / float(present) * accu;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
template <>
|
|
276
|
+
float VectorDistance<METRIC_GOWER, SL>::operator()(
|
|
277
|
+
const float* x,
|
|
278
|
+
const float* y) const {
|
|
279
|
+
float accu = 0;
|
|
280
|
+
size_t valid_dims = 0;
|
|
281
|
+
|
|
282
|
+
for (size_t i = 0; i < this->d; i++) {
|
|
283
|
+
if (std::isnan(x[i]) || std::isnan(y[i])) {
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (x[i] >= 0 && y[i] >= 0) {
|
|
288
|
+
if (x[i] > 1 || y[i] > 1) {
|
|
289
|
+
return std::numeric_limits<float>::quiet_NaN();
|
|
290
|
+
}
|
|
291
|
+
accu += fabs(x[i] - y[i]);
|
|
292
|
+
} else if (x[i] < 0 && y[i] < 0) {
|
|
293
|
+
accu += float(int(x[i] != y[i]));
|
|
294
|
+
} else {
|
|
295
|
+
return std::numeric_limits<float>::quiet_NaN();
|
|
296
|
+
}
|
|
297
|
+
valid_dims++;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (valid_dims == 0) {
|
|
301
|
+
return std::numeric_limits<float>::quiet_NaN();
|
|
302
|
+
}
|
|
303
|
+
return accu / valid_dims;
|
|
304
|
+
}
|
|
305
|
+
|
|
153
306
|
} // namespace faiss
|