faiss 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +4 -4
- data/ext/faiss/index.cpp +63 -45
- data/ext/faiss/index_binary.cpp +37 -27
- data/ext/faiss/kmeans.cpp +9 -8
- data/ext/faiss/pca_matrix.cpp +9 -7
- data/ext/faiss/product_quantizer.cpp +13 -11
- data/ext/faiss/utils.cpp +4 -2
- data/ext/faiss/utils.h +4 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +214 -82
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +97 -249
- data/vendor/faiss/faiss/Clustering.h +18 -0
- data/vendor/faiss/faiss/IVFlib.cpp +67 -44
- data/vendor/faiss/faiss/Index.cpp +25 -12
- data/vendor/faiss/faiss/Index.h +26 -4
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
- data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
- data/vendor/faiss/faiss/IndexFastScan.h +35 -24
- data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
- data/vendor/faiss/faiss/IndexFlat.h +32 -14
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
- data/vendor/faiss/faiss/IndexHNSW.h +30 -14
- data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
- data/vendor/faiss/faiss/IndexIVF.h +47 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
- data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
- data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
- data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
- data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
- data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +13 -13
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +29 -6
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
- data/vendor/faiss/faiss/VectorTransform.h +39 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +55 -51
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
- data/vendor/faiss/faiss/impl/HNSW.h +21 -40
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
- data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
- data/vendor/faiss/faiss/impl/NSG.h +20 -10
- data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
- data/vendor/faiss/faiss/impl/Panorama.h +265 -78
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
- data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +115 -28
- data/vendor/faiss/faiss/index_io.h +53 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +507 -559
- data/vendor/faiss/faiss/utils/distances.h +118 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
- data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
- data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +21 -14
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +156 -42
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
#include <faiss/impl/platform_macros.h>
|
|
17
17
|
#include <faiss/utils/Heap.h>
|
|
18
|
+
#include <faiss/utils/simd_levels.h>
|
|
18
19
|
|
|
19
20
|
namespace faiss {
|
|
20
21
|
|
|
@@ -27,15 +28,27 @@ struct IDSelector;
|
|
|
27
28
|
/// Squared L2 distance between two vectors
|
|
28
29
|
float fvec_L2sqr(const float* x, const float* y, size_t d);
|
|
29
30
|
|
|
31
|
+
template <SIMDLevel>
|
|
32
|
+
float fvec_L2sqr(const float* x, const float* y, size_t d);
|
|
33
|
+
|
|
30
34
|
/// inner product
|
|
31
35
|
float fvec_inner_product(const float* x, const float* y, size_t d);
|
|
32
36
|
|
|
37
|
+
template <SIMDLevel>
|
|
38
|
+
float fvec_inner_product(const float* x, const float* y, size_t d);
|
|
39
|
+
|
|
33
40
|
/// L1 distance
|
|
34
41
|
float fvec_L1(const float* x, const float* y, size_t d);
|
|
35
42
|
|
|
43
|
+
template <SIMDLevel>
|
|
44
|
+
float fvec_L1(const float* x, const float* y, size_t d);
|
|
45
|
+
|
|
36
46
|
/// infinity distance
|
|
37
47
|
float fvec_Linf(const float* x, const float* y, size_t d);
|
|
38
48
|
|
|
49
|
+
template <SIMDLevel>
|
|
50
|
+
float fvec_Linf(const float* x, const float* y, size_t d);
|
|
51
|
+
|
|
39
52
|
/// Special version of inner product that computes 4 distances
|
|
40
53
|
/// between x and yi, which is performance oriented.
|
|
41
54
|
void fvec_inner_product_batch_4(
|
|
@@ -50,6 +63,19 @@ void fvec_inner_product_batch_4(
|
|
|
50
63
|
float& dis2,
|
|
51
64
|
float& dis3);
|
|
52
65
|
|
|
66
|
+
template <SIMDLevel>
|
|
67
|
+
void fvec_inner_product_batch_4(
|
|
68
|
+
const float* x,
|
|
69
|
+
const float* y0,
|
|
70
|
+
const float* y1,
|
|
71
|
+
const float* y2,
|
|
72
|
+
const float* y3,
|
|
73
|
+
const size_t d,
|
|
74
|
+
float& dis0,
|
|
75
|
+
float& dis1,
|
|
76
|
+
float& dis2,
|
|
77
|
+
float& dis3);
|
|
78
|
+
|
|
53
79
|
/// Special version of L2sqr that computes 4 distances
|
|
54
80
|
/// between x and yi, which is performance oriented.
|
|
55
81
|
void fvec_L2sqr_batch_4(
|
|
@@ -64,6 +90,19 @@ void fvec_L2sqr_batch_4(
|
|
|
64
90
|
float& dis2,
|
|
65
91
|
float& dis3);
|
|
66
92
|
|
|
93
|
+
template <SIMDLevel>
|
|
94
|
+
void fvec_L2sqr_batch_4(
|
|
95
|
+
const float* x,
|
|
96
|
+
const float* y0,
|
|
97
|
+
const float* y1,
|
|
98
|
+
const float* y2,
|
|
99
|
+
const float* y3,
|
|
100
|
+
const size_t d,
|
|
101
|
+
float& dis0,
|
|
102
|
+
float& dis1,
|
|
103
|
+
float& dis2,
|
|
104
|
+
float& dis3);
|
|
105
|
+
|
|
67
106
|
/** Compute pairwise distances between sets of vectors
|
|
68
107
|
*
|
|
69
108
|
* @param d dimension of the vectors
|
|
@@ -93,6 +132,14 @@ void fvec_inner_products_ny(
|
|
|
93
132
|
size_t d,
|
|
94
133
|
size_t ny);
|
|
95
134
|
|
|
135
|
+
template <SIMDLevel>
|
|
136
|
+
void fvec_inner_products_ny(
|
|
137
|
+
float* ip, /* output inner product */
|
|
138
|
+
const float* x,
|
|
139
|
+
const float* y,
|
|
140
|
+
size_t d,
|
|
141
|
+
size_t ny);
|
|
142
|
+
|
|
96
143
|
/* compute ny square L2 distance between x and a set of contiguous y vectors */
|
|
97
144
|
void fvec_L2sqr_ny(
|
|
98
145
|
float* dis,
|
|
@@ -101,6 +148,14 @@ void fvec_L2sqr_ny(
|
|
|
101
148
|
size_t d,
|
|
102
149
|
size_t ny);
|
|
103
150
|
|
|
151
|
+
template <SIMDLevel>
|
|
152
|
+
void fvec_L2sqr_ny(
|
|
153
|
+
float* dis,
|
|
154
|
+
const float* x,
|
|
155
|
+
const float* y,
|
|
156
|
+
size_t d,
|
|
157
|
+
size_t ny);
|
|
158
|
+
|
|
104
159
|
/* compute ny square L2 distance between x and a set of transposed contiguous
|
|
105
160
|
y vectors. squared lengths of y should be provided as well */
|
|
106
161
|
void fvec_L2sqr_ny_transposed(
|
|
@@ -112,6 +167,16 @@ void fvec_L2sqr_ny_transposed(
|
|
|
112
167
|
size_t d_offset,
|
|
113
168
|
size_t ny);
|
|
114
169
|
|
|
170
|
+
template <SIMDLevel>
|
|
171
|
+
void fvec_L2sqr_ny_transposed(
|
|
172
|
+
float* dis,
|
|
173
|
+
const float* x,
|
|
174
|
+
const float* y,
|
|
175
|
+
const float* y_sqlen,
|
|
176
|
+
size_t d,
|
|
177
|
+
size_t d_offset,
|
|
178
|
+
size_t ny);
|
|
179
|
+
|
|
115
180
|
/* compute ny square L2 distance between x and a set of contiguous y vectors
|
|
116
181
|
and return the index of the nearest vector.
|
|
117
182
|
return 0 if ny == 0. */
|
|
@@ -122,6 +187,14 @@ size_t fvec_L2sqr_ny_nearest(
|
|
|
122
187
|
size_t d,
|
|
123
188
|
size_t ny);
|
|
124
189
|
|
|
190
|
+
template <SIMDLevel>
|
|
191
|
+
size_t fvec_L2sqr_ny_nearest(
|
|
192
|
+
float* distances_tmp_buffer,
|
|
193
|
+
const float* x,
|
|
194
|
+
const float* y,
|
|
195
|
+
size_t d,
|
|
196
|
+
size_t ny);
|
|
197
|
+
|
|
125
198
|
/* compute ny square L2 distance between x and a set of transposed contiguous
|
|
126
199
|
y vectors and return the index of the nearest vector.
|
|
127
200
|
squared lengths of y should be provided as well
|
|
@@ -135,9 +208,22 @@ size_t fvec_L2sqr_ny_nearest_y_transposed(
|
|
|
135
208
|
size_t d_offset,
|
|
136
209
|
size_t ny);
|
|
137
210
|
|
|
211
|
+
template <SIMDLevel>
|
|
212
|
+
size_t fvec_L2sqr_ny_nearest_y_transposed(
|
|
213
|
+
float* distances_tmp_buffer,
|
|
214
|
+
const float* x,
|
|
215
|
+
const float* y,
|
|
216
|
+
const float* y_sqlen,
|
|
217
|
+
size_t d,
|
|
218
|
+
size_t d_offset,
|
|
219
|
+
size_t ny);
|
|
220
|
+
|
|
138
221
|
/** squared norm of a vector */
|
|
139
222
|
float fvec_norm_L2sqr(const float* x, size_t d);
|
|
140
223
|
|
|
224
|
+
template <SIMDLevel>
|
|
225
|
+
float fvec_norm_L2sqr(const float* x, size_t d);
|
|
226
|
+
|
|
141
227
|
/** compute the L2 norms for a set of vectors
|
|
142
228
|
*
|
|
143
229
|
* @param norms output norms, size nx
|
|
@@ -175,6 +261,9 @@ void inner_product_to_L2sqr(
|
|
|
175
261
|
*/
|
|
176
262
|
void fvec_add(size_t d, const float* a, const float* b, float* c);
|
|
177
263
|
|
|
264
|
+
template <SIMDLevel>
|
|
265
|
+
void fvec_add(size_t d, const float* a, const float* b, float* c);
|
|
266
|
+
|
|
178
267
|
/** compute c := a + b for a, c vectors and b a scalar
|
|
179
268
|
*
|
|
180
269
|
* c and a can overlap
|
|
@@ -184,6 +273,9 @@ void fvec_add(size_t d, const float* a, const float* b, float* c);
|
|
|
184
273
|
*/
|
|
185
274
|
void fvec_add(size_t d, const float* a, float b, float* c);
|
|
186
275
|
|
|
276
|
+
template <SIMDLevel>
|
|
277
|
+
void fvec_add(size_t d, const float* a, float b, float* c);
|
|
278
|
+
|
|
187
279
|
/** compute c := a - b for vectors
|
|
188
280
|
*
|
|
189
281
|
* c and a can overlap, c and b can overlap
|
|
@@ -194,6 +286,9 @@ void fvec_add(size_t d, const float* a, float b, float* c);
|
|
|
194
286
|
*/
|
|
195
287
|
void fvec_sub(size_t d, const float* a, const float* b, float* c);
|
|
196
288
|
|
|
289
|
+
template <SIMDLevel>
|
|
290
|
+
void fvec_sub(size_t d, const float* a, const float* b, float* c);
|
|
291
|
+
|
|
197
292
|
/***************************************************************************
|
|
198
293
|
* Compute a subset of distances
|
|
199
294
|
***************************************************************************/
|
|
@@ -274,7 +369,7 @@ void pairwise_indexed_inner_product(
|
|
|
274
369
|
* KNN functions
|
|
275
370
|
***************************************************************************/
|
|
276
371
|
|
|
277
|
-
// threshold on nx above which we switch to BLAS to compute distances
|
|
372
|
+
// threshold on nx * d above which we switch to BLAS to compute distances
|
|
278
373
|
FAISS_API extern int distance_compute_blas_threshold;
|
|
279
374
|
|
|
280
375
|
// block sizes for BLAS distance computations
|
|
@@ -456,6 +551,16 @@ void compute_PQ_dis_tables_dsub2(
|
|
|
456
551
|
bool is_inner_product,
|
|
457
552
|
float* dis_tables);
|
|
458
553
|
|
|
554
|
+
template <SIMDLevel>
|
|
555
|
+
void compute_PQ_dis_tables_dsub2(
|
|
556
|
+
size_t d,
|
|
557
|
+
size_t ksub,
|
|
558
|
+
const float* centroids,
|
|
559
|
+
size_t nx,
|
|
560
|
+
const float* x,
|
|
561
|
+
bool is_inner_product,
|
|
562
|
+
float* dis_tables);
|
|
563
|
+
|
|
459
564
|
/***************************************************************************
|
|
460
565
|
* Templatized versions of distance functions
|
|
461
566
|
***************************************************************************/
|
|
@@ -473,6 +578,10 @@ void compute_PQ_dis_tables_dsub2(
|
|
|
473
578
|
*/
|
|
474
579
|
void fvec_madd(size_t n, const float* a, float bf, const float* b, float* c);
|
|
475
580
|
|
|
581
|
+
/* same statically */
|
|
582
|
+
template <SIMDLevel>
|
|
583
|
+
void fvec_madd(size_t n, const float* a, float bf, const float* b, float* c);
|
|
584
|
+
|
|
476
585
|
/** same as fvec_madd, also return index of the min of the result table
|
|
477
586
|
* @return index of the min of table c
|
|
478
587
|
*/
|
|
@@ -483,4 +592,12 @@ int fvec_madd_and_argmin(
|
|
|
483
592
|
const float* b,
|
|
484
593
|
float* c);
|
|
485
594
|
|
|
595
|
+
template <SIMDLevel>
|
|
596
|
+
int fvec_madd_and_argmin(
|
|
597
|
+
size_t n,
|
|
598
|
+
const float* a,
|
|
599
|
+
float bf,
|
|
600
|
+
const float* b,
|
|
601
|
+
float* c);
|
|
602
|
+
|
|
486
603
|
} // namespace faiss
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @file distances_dispatch.h
|
|
12
|
+
* @brief Inlineable dispatch wrappers for distance functions.
|
|
13
|
+
*
|
|
14
|
+
* This is a PRIVATE header. Do not include in public APIs or user code.
|
|
15
|
+
*
|
|
16
|
+
* These wrappers call with_simd_level to route to the correct SIMD
|
|
17
|
+
* implementation. They are plain inline functions with a _dispatch suffix
|
|
18
|
+
* (e.g. fvec_L2sqr_dispatch). Internal callers that want inlining include
|
|
19
|
+
* this header and call the _dispatch variants directly.
|
|
20
|
+
*
|
|
21
|
+
* The public API functions (fvec_L2sqr, etc.) are defined as regular extern
|
|
22
|
+
* functions in distances.cpp and simply delegate to these _dispatch variants.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
26
|
+
#include <faiss/utils/distances.h>
|
|
27
|
+
#include <faiss/utils/extra_distances.h>
|
|
28
|
+
|
|
29
|
+
namespace faiss {
|
|
30
|
+
|
|
31
|
+
inline float fvec_L1_dispatch(const float* x, const float* y, size_t d) {
|
|
32
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
33
|
+
[&]<SIMDLevel SL>() { return fvec_L1<SL>(x, y, d); });
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
inline float fvec_Linf_dispatch(const float* x, const float* y, size_t d) {
|
|
37
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
38
|
+
[&]<SIMDLevel SL>() { return fvec_Linf<SL>(x, y, d); });
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
inline float fvec_norm_L2sqr_dispatch(const float* x, size_t d) {
|
|
42
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
43
|
+
[&]<SIMDLevel SL>() { return fvec_norm_L2sqr<SL>(x, d); });
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
inline float fvec_L2sqr_dispatch(const float* x, const float* y, size_t d) {
|
|
47
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
48
|
+
[&]<SIMDLevel SL>() { return fvec_L2sqr<SL>(x, y, d); });
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
inline float fvec_inner_product_dispatch(
|
|
52
|
+
const float* x,
|
|
53
|
+
const float* y,
|
|
54
|
+
size_t d) {
|
|
55
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
56
|
+
[&]<SIMDLevel SL>() { return fvec_inner_product<SL>(x, y, d); });
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
inline void fvec_inner_product_batch_4_dispatch(
|
|
60
|
+
const float* x,
|
|
61
|
+
const float* y0,
|
|
62
|
+
const float* y1,
|
|
63
|
+
const float* y2,
|
|
64
|
+
const float* y3,
|
|
65
|
+
const size_t d,
|
|
66
|
+
float& dis0,
|
|
67
|
+
float& dis1,
|
|
68
|
+
float& dis2,
|
|
69
|
+
float& dis3) {
|
|
70
|
+
with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>([&]<SIMDLevel SL>() {
|
|
71
|
+
fvec_inner_product_batch_4<SL>(
|
|
72
|
+
x, y0, y1, y2, y3, d, dis0, dis1, dis2, dis3);
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
inline void fvec_L2sqr_batch_4_dispatch(
|
|
77
|
+
const float* x,
|
|
78
|
+
const float* y0,
|
|
79
|
+
const float* y1,
|
|
80
|
+
const float* y2,
|
|
81
|
+
const float* y3,
|
|
82
|
+
const size_t d,
|
|
83
|
+
float& dis0,
|
|
84
|
+
float& dis1,
|
|
85
|
+
float& dis2,
|
|
86
|
+
float& dis3) {
|
|
87
|
+
with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>([&]<SIMDLevel SL>() {
|
|
88
|
+
fvec_L2sqr_batch_4<SL>(x, y0, y1, y2, y3, d, dis0, dis1, dis2, dis3);
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
inline void fvec_L2sqr_ny_transposed_dispatch(
|
|
93
|
+
float* dis,
|
|
94
|
+
const float* x,
|
|
95
|
+
const float* y,
|
|
96
|
+
const float* y_sqlen,
|
|
97
|
+
size_t d,
|
|
98
|
+
size_t d_offset,
|
|
99
|
+
size_t ny) {
|
|
100
|
+
with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>([&]<SIMDLevel SL>() {
|
|
101
|
+
fvec_L2sqr_ny_transposed<SL>(dis, x, y, y_sqlen, d, d_offset, ny);
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
inline void fvec_inner_products_ny_dispatch(
|
|
106
|
+
float* ip,
|
|
107
|
+
const float* x,
|
|
108
|
+
const float* y,
|
|
109
|
+
size_t d,
|
|
110
|
+
size_t ny) {
|
|
111
|
+
with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>([&]<SIMDLevel SL>() {
|
|
112
|
+
fvec_inner_products_ny<SL>(ip, x, y, d, ny);
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
inline void fvec_L2sqr_ny_dispatch(
|
|
117
|
+
float* dis,
|
|
118
|
+
const float* x,
|
|
119
|
+
const float* y,
|
|
120
|
+
size_t d,
|
|
121
|
+
size_t ny) {
|
|
122
|
+
with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
123
|
+
[&]<SIMDLevel SL>() { fvec_L2sqr_ny<SL>(dis, x, y, d, ny); });
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
inline size_t fvec_L2sqr_ny_nearest_dispatch(
|
|
127
|
+
float* distances_tmp_buffer,
|
|
128
|
+
const float* x,
|
|
129
|
+
const float* y,
|
|
130
|
+
size_t d,
|
|
131
|
+
size_t ny) {
|
|
132
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
133
|
+
[&]<SIMDLevel SL>() {
|
|
134
|
+
return fvec_L2sqr_ny_nearest<SL>(
|
|
135
|
+
distances_tmp_buffer, x, y, d, ny);
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
inline size_t fvec_L2sqr_ny_nearest_y_transposed_dispatch(
|
|
140
|
+
float* distances_tmp_buffer,
|
|
141
|
+
const float* x,
|
|
142
|
+
const float* y,
|
|
143
|
+
const float* y_sqlen,
|
|
144
|
+
size_t d,
|
|
145
|
+
size_t d_offset,
|
|
146
|
+
size_t ny) {
|
|
147
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
148
|
+
[&]<SIMDLevel SL>() {
|
|
149
|
+
return fvec_L2sqr_ny_nearest_y_transposed<SL>(
|
|
150
|
+
distances_tmp_buffer, x, y, y_sqlen, d, d_offset, ny);
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
inline void fvec_madd_dispatch(
|
|
155
|
+
size_t n,
|
|
156
|
+
const float* a,
|
|
157
|
+
float bf,
|
|
158
|
+
const float* b,
|
|
159
|
+
float* c) {
|
|
160
|
+
with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
161
|
+
[&]<SIMDLevel SL>() { fvec_madd<SL>(n, a, bf, b, c); });
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
inline int fvec_madd_and_argmin_dispatch(
|
|
165
|
+
size_t n,
|
|
166
|
+
const float* a,
|
|
167
|
+
float bf,
|
|
168
|
+
const float* b,
|
|
169
|
+
float* c) {
|
|
170
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A1>(
|
|
171
|
+
[&]<SIMDLevel SL>() {
|
|
172
|
+
return fvec_madd_and_argmin<SL>(n, a, bf, b, c);
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
inline void fvec_sub_dispatch(
|
|
177
|
+
size_t d,
|
|
178
|
+
const float* a,
|
|
179
|
+
const float* b,
|
|
180
|
+
float* c) {
|
|
181
|
+
with_simd_level_256bit(
|
|
182
|
+
[&]<SIMDLevel level>() { fvec_sub<level>(d, a, b, c); });
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
inline void fvec_add_dispatch(
|
|
186
|
+
size_t d,
|
|
187
|
+
const float* a,
|
|
188
|
+
const float* b,
|
|
189
|
+
float* c) {
|
|
190
|
+
with_simd_level_256bit(
|
|
191
|
+
[&]<SIMDLevel level>() { fvec_add<level>(d, a, b, c); });
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
inline void fvec_add_scalar_dispatch(
|
|
195
|
+
size_t d,
|
|
196
|
+
const float* a,
|
|
197
|
+
float b,
|
|
198
|
+
float* c) {
|
|
199
|
+
with_simd_level_256bit(
|
|
200
|
+
[&]<SIMDLevel level>() { fvec_add<level>(d, a, b, c); });
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
inline void compute_PQ_dis_tables_dsub2_dispatch(
|
|
204
|
+
size_t d,
|
|
205
|
+
size_t ksub,
|
|
206
|
+
const float* centroids,
|
|
207
|
+
size_t nx,
|
|
208
|
+
const float* x,
|
|
209
|
+
bool is_inner_product,
|
|
210
|
+
float* dis_tables) {
|
|
211
|
+
with_simd_level_256bit([&]<SIMDLevel level>() {
|
|
212
|
+
compute_PQ_dis_tables_dsub2<level>(
|
|
213
|
+
d, ksub, centroids, nx, x, is_inner_product, dis_tables);
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/***************************************************************************
|
|
218
|
+
* Dispatching function that takes a lambda directly.
|
|
219
|
+
* The lambda should be templated on VectorDistance, eg.:
|
|
220
|
+
*
|
|
221
|
+
* auto result = with_VectorDistance(
|
|
222
|
+
* metric, metric_arg, [&]<class VD>(VD vd) {
|
|
223
|
+
* return vd(x, y);
|
|
224
|
+
* });
|
|
225
|
+
**************************************************************************/
|
|
226
|
+
|
|
227
|
+
template <typename LambdaType>
|
|
228
|
+
auto with_VectorDistance(
|
|
229
|
+
size_t d,
|
|
230
|
+
MetricType metric,
|
|
231
|
+
float metric_arg,
|
|
232
|
+
LambdaType&& action) {
|
|
233
|
+
auto dispatch_metric = [&]<MetricType mt>() {
|
|
234
|
+
auto call = [&]<SIMDLevel level>() {
|
|
235
|
+
VectorDistance<mt, level> vd = {d, metric_arg};
|
|
236
|
+
return action(vd);
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
constexpr bool has_simd = mt == METRIC_INNER_PRODUCT ||
|
|
240
|
+
mt == METRIC_L2 || mt == METRIC_L1 || mt == METRIC_Linf;
|
|
241
|
+
if constexpr (!has_simd) {
|
|
242
|
+
return call.template operator()<SIMDLevel::NONE>();
|
|
243
|
+
} else {
|
|
244
|
+
return with_simd_level(call);
|
|
245
|
+
}
|
|
246
|
+
};
|
|
247
|
+
return with_metric_type(metric, dispatch_metric);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
} // namespace faiss
|
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
|
|
8
8
|
// -*- c++ -*-
|
|
9
9
|
|
|
10
|
-
#include <faiss/utils/distances_fused/
|
|
10
|
+
#include <faiss/utils/distances_fused/distances_fused.h>
|
|
11
11
|
|
|
12
|
-
#ifdef
|
|
12
|
+
#ifdef COMPILE_SIMD_AVX512
|
|
13
13
|
|
|
14
14
|
#include <immintrin.h>
|
|
15
15
|
|
|
@@ -78,7 +78,7 @@ void kernel(
|
|
|
78
78
|
const float* const __restrict xd_0 = x + i * DIM;
|
|
79
79
|
|
|
80
80
|
// prefetch the next point
|
|
81
|
-
_mm_prefetch(xd_0 + DIM * sizeof(float), _MM_HINT_NTA);
|
|
81
|
+
_mm_prefetch((char*)(xd_0 + DIM * sizeof(float)), _MM_HINT_NTA);
|
|
82
82
|
|
|
83
83
|
// load a single point from x
|
|
84
84
|
// load -2 * value
|
|
@@ -262,10 +262,10 @@ void exhaustive_L2sqr_fused_cmax(
|
|
|
262
262
|
}
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
-
const
|
|
265
|
+
const idx_t nx_p = (nx / NX_POINTS_PER_LOOP) * NX_POINTS_PER_LOOP;
|
|
266
266
|
// the main loop.
|
|
267
267
|
#pragma omp parallel for schedule(dynamic)
|
|
268
|
-
for (
|
|
268
|
+
for (idx_t i = 0; i < nx_p; i += NX_POINTS_PER_LOOP) {
|
|
269
269
|
kernel<DIM, NX_POINTS_PER_LOOP, NY_POINTS_PER_LOOP>(
|
|
270
270
|
x, y, y_transposed.data(), ny, res, y_norms, i);
|
|
271
271
|
}
|
|
@@ -283,7 +283,8 @@ void exhaustive_L2sqr_fused_cmax(
|
|
|
283
283
|
|
|
284
284
|
} // namespace
|
|
285
285
|
|
|
286
|
-
|
|
286
|
+
template <>
|
|
287
|
+
bool exhaustive_L2sqr_fused_cmax<SIMDLevel::AVX512>(
|
|
287
288
|
const float* x,
|
|
288
289
|
const float* y,
|
|
289
290
|
size_t d,
|
|
@@ -343,4 +344,4 @@ bool exhaustive_L2sqr_fused_cmax_AVX512(
|
|
|
343
344
|
|
|
344
345
|
} // namespace faiss
|
|
345
346
|
|
|
346
|
-
#endif
|
|
347
|
+
#endif // COMPILE_SIMD_AVX512
|
|
@@ -7,13 +7,37 @@
|
|
|
7
7
|
|
|
8
8
|
#include <faiss/utils/distances_fused/distances_fused.h>
|
|
9
9
|
|
|
10
|
-
#include <faiss/impl/
|
|
11
|
-
|
|
12
|
-
#include <faiss/utils/distances_fused/avx512.h> // NOLINT
|
|
13
|
-
#include <faiss/utils/distances_fused/simdlib_based.h>
|
|
10
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
14
11
|
|
|
15
12
|
namespace faiss {
|
|
16
13
|
|
|
14
|
+
// Scalar fallback: no fused kernel available.
|
|
15
|
+
template <>
|
|
16
|
+
bool exhaustive_L2sqr_fused_cmax<SIMDLevel::NONE>(
|
|
17
|
+
const float*,
|
|
18
|
+
const float*,
|
|
19
|
+
size_t,
|
|
20
|
+
size_t,
|
|
21
|
+
size_t,
|
|
22
|
+
Top1BlockResultHandler<CMax<float, int64_t>>&,
|
|
23
|
+
const float*) {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
#ifdef COMPILE_SIMD_RISCV_RVV
|
|
28
|
+
template <>
|
|
29
|
+
bool exhaustive_L2sqr_fused_cmax<SIMDLevel::RISCV_RVV>(
|
|
30
|
+
const float*,
|
|
31
|
+
const float*,
|
|
32
|
+
size_t,
|
|
33
|
+
size_t,
|
|
34
|
+
size_t,
|
|
35
|
+
Top1BlockResultHandler<CMax<float, int64_t>>&,
|
|
36
|
+
const float*) {
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
#endif // COMPILE_SIMD_RISCV_RVV
|
|
40
|
+
|
|
17
41
|
bool exhaustive_L2sqr_fused_cmax(
|
|
18
42
|
const float* x,
|
|
19
43
|
const float* y,
|
|
@@ -27,16 +51,11 @@ bool exhaustive_L2sqr_fused_cmax(
|
|
|
27
51
|
return true;
|
|
28
52
|
}
|
|
29
53
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
return exhaustive_L2sqr_fused_cmax_simdlib(x, y, d, nx, ny, res, y_norms);
|
|
36
|
-
#else
|
|
37
|
-
// not supported, please use a general-purpose kernel
|
|
38
|
-
return false;
|
|
39
|
-
#endif
|
|
54
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A0>(
|
|
55
|
+
[&]<SIMDLevel SL>() {
|
|
56
|
+
return exhaustive_L2sqr_fused_cmax<SL>(
|
|
57
|
+
x, y, d, nx, ny, res, y_norms);
|
|
58
|
+
});
|
|
40
59
|
}
|
|
41
60
|
|
|
42
61
|
} // namespace faiss
|
|
@@ -21,8 +21,8 @@
|
|
|
21
21
|
#pragma once
|
|
22
22
|
|
|
23
23
|
#include <faiss/impl/ResultHandler.h>
|
|
24
|
-
|
|
25
24
|
#include <faiss/utils/Heap.h>
|
|
25
|
+
#include <faiss/utils/simd_levels.h>
|
|
26
26
|
|
|
27
27
|
namespace faiss {
|
|
28
28
|
|
|
@@ -37,4 +37,15 @@ bool exhaustive_L2sqr_fused_cmax(
|
|
|
37
37
|
Top1BlockResultHandler<CMax<float, int64_t>>& res,
|
|
38
38
|
const float* y_norms);
|
|
39
39
|
|
|
40
|
+
// Per-SIMD-level implementation (defined in per-SIMD TUs).
|
|
41
|
+
template <SIMDLevel>
|
|
42
|
+
bool exhaustive_L2sqr_fused_cmax(
|
|
43
|
+
const float* x,
|
|
44
|
+
const float* y,
|
|
45
|
+
size_t d,
|
|
46
|
+
size_t nx,
|
|
47
|
+
size_t ny,
|
|
48
|
+
Top1BlockResultHandler<CMax<float, int64_t>>& res,
|
|
49
|
+
const float* y_norms);
|
|
50
|
+
|
|
40
51
|
} // namespace faiss
|