faiss 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +4 -4
- data/ext/faiss/index.cpp +63 -45
- data/ext/faiss/index_binary.cpp +37 -27
- data/ext/faiss/kmeans.cpp +9 -8
- data/ext/faiss/pca_matrix.cpp +9 -7
- data/ext/faiss/product_quantizer.cpp +13 -11
- data/ext/faiss/utils.cpp +4 -2
- data/ext/faiss/utils.h +4 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +214 -82
- data/vendor/faiss/faiss/AutoTune.h +14 -1
- data/vendor/faiss/faiss/Clustering.cpp +97 -249
- data/vendor/faiss/faiss/Clustering.h +18 -0
- data/vendor/faiss/faiss/IVFlib.cpp +67 -44
- data/vendor/faiss/faiss/Index.cpp +25 -12
- data/vendor/faiss/faiss/Index.h +26 -4
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +68 -61
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +6 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +92 -95
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +120 -414
- data/vendor/faiss/faiss/IndexFastScan.cpp +105 -129
- data/vendor/faiss/faiss/IndexFastScan.h +35 -24
- data/vendor/faiss/faiss/IndexFlat.cpp +216 -152
- data/vendor/faiss/faiss/IndexFlat.h +32 -14
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +88 -41
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +299 -187
- data/vendor/faiss/faiss/IndexHNSW.h +30 -14
- data/vendor/faiss/faiss/IndexIDMap.cpp +26 -22
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +535 -405
- data/vendor/faiss/faiss/IndexIVF.h +47 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +105 -99
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +6 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +379 -249
- data/vendor/faiss/faiss/IndexIVFFastScan.h +65 -60
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +41 -124
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +89 -138
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +77 -907
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +184 -122
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -18
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +59 -60
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +564 -416
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +269 -111
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +44 -25
- data/vendor/faiss/faiss/IndexLattice.cpp +41 -36
- data/vendor/faiss/faiss/IndexNNDescent.cpp +37 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +40 -23
- data/vendor/faiss/faiss/IndexNSG.h +0 -2
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +32 -12
- data/vendor/faiss/faiss/IndexPQ.cpp +129 -213
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +31 -43
- data/vendor/faiss/faiss/IndexRaBitQ.h +4 -3
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +135 -317
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +192 -34
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -55
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +13 -13
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +29 -6
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +349 -141
- data/vendor/faiss/faiss/VectorTransform.h +39 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +55 -51
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +6 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +64 -34
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -28
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
- data/vendor/faiss/faiss/impl/CodePacker.cpp +7 -3
- data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +64 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +117 -351
- data/vendor/faiss/faiss/impl/HNSW.h +21 -40
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +114 -102
- data/vendor/faiss/faiss/impl/NNDescent.cpp +63 -26
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +44 -26
- data/vendor/faiss/faiss/impl/NSG.h +20 -10
- data/vendor/faiss/faiss/impl/Panorama.cpp +76 -52
- data/vendor/faiss/faiss/impl/Panorama.h +265 -78
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +62 -37
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +99 -80
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +135 -37
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +148 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +298 -301
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +40 -32
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +218 -113
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +119 -2362
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
- data/vendor/faiss/faiss/impl/VisitedTable.h +76 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +163 -0
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +176 -4
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -348
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +290 -142
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1950 -505
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -2
- data/vendor/faiss/faiss/impl/index_write.cpp +112 -21
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +81 -40
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +15 -8
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.h} +43 -220
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.h} +25 -112
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +59 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +256 -0
- data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -146
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +320 -483
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +137 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +371 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +190 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +603 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +597 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +388 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +630 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +387 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +54 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +173 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +274 -171
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +275 -217
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +115 -28
- data/vendor/faiss/faiss/index_io.h +53 -3
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +73 -20
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +14 -14
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +19 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +14 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +56 -10
- data/vendor/faiss/faiss/utils/Heap.h +21 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +54 -40
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +507 -559
- data/vendor/faiss/faiss/utils/distances.h +118 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +250 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +72 -3681
- data/vendor/faiss/faiss/utils/extra_distances.cpp +60 -102
- data/vendor/faiss/faiss/utils/extra_distances.h +79 -7
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +124 -343
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +154 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +777 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +306 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1431 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1095 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +392 -0
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +334 -0
- data/vendor/faiss/faiss/utils/simd_levels.h +183 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +21 -14
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +156 -42
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -216
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -224
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -228
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -450
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -296
- /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_AVX2
|
|
9
|
+
|
|
10
|
+
#include <faiss/utils/simd_impl/super_kmeans_kernels.h>
|
|
11
|
+
|
|
12
|
+
#include <immintrin.h>
|
|
13
|
+
|
|
14
|
+
namespace faiss {
|
|
15
|
+
namespace detail {
|
|
16
|
+
|
|
17
|
+
namespace {
|
|
18
|
+
|
|
19
|
+
// Reduce 8 float lanes of an AVX2 register to a scalar sum.
|
|
20
|
+
// Uses a shuffle+add tree instead of two _mm_hadd_ps. On Skylake-class
|
|
21
|
+
// cores, hadd is 3-cycle latency / 2-uop, while movehdup/movehl/add_ss
|
|
22
|
+
// are single-uop, single-cycle ops.
|
|
23
|
+
inline float horizontal_sum_avx2(__m256 v) {
|
|
24
|
+
__m128 lo = _mm256_castps256_ps128(v);
|
|
25
|
+
__m128 hi = _mm256_extractf128_ps(v, 1);
|
|
26
|
+
__m128 sum128 = _mm_add_ps(lo, hi); // 4 lanes
|
|
27
|
+
__m128 shuf = _mm_movehdup_ps(sum128); // [s1, s1, s3, s3]
|
|
28
|
+
__m128 sums = _mm_add_ps(sum128, shuf); // [s0+s1, _, s2+s3, _]
|
|
29
|
+
shuf = _mm_movehl_ps(shuf, sums); // [s2+s3, s3, _, _]
|
|
30
|
+
sums = _mm_add_ss(sums, shuf); // (s0+s1) + (s2+s3)
|
|
31
|
+
return _mm_cvtss_f32(sums);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
} // namespace
|
|
35
|
+
|
|
36
|
+
template <>
|
|
37
|
+
float block_l2<SIMDLevel::AVX2>(const float* x, const float* y, int n) {
|
|
38
|
+
__m256 acc = _mm256_setzero_ps();
|
|
39
|
+
int m = 0;
|
|
40
|
+
for (; m + 8 <= n; m += 8) {
|
|
41
|
+
__m256 xv = _mm256_loadu_ps(x + m);
|
|
42
|
+
__m256 yv = _mm256_loadu_ps(y + m);
|
|
43
|
+
__m256 diff = _mm256_sub_ps(xv, yv);
|
|
44
|
+
acc = _mm256_fmadd_ps(diff, diff, acc);
|
|
45
|
+
}
|
|
46
|
+
float result = horizontal_sum_avx2(acc);
|
|
47
|
+
for (; m < n; ++m) {
|
|
48
|
+
const float d = x[m] - y[m];
|
|
49
|
+
result += d * d;
|
|
50
|
+
}
|
|
51
|
+
return result;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
} // namespace detail
|
|
55
|
+
} // namespace faiss
|
|
56
|
+
|
|
57
|
+
#endif // COMPILE_SIMD_AVX2
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_AVX512
|
|
9
|
+
|
|
10
|
+
#include <faiss/utils/simd_impl/super_kmeans_kernels.h>
|
|
11
|
+
|
|
12
|
+
#include <immintrin.h>
|
|
13
|
+
|
|
14
|
+
namespace faiss {
|
|
15
|
+
namespace detail {
|
|
16
|
+
|
|
17
|
+
template <>
|
|
18
|
+
float block_l2<SIMDLevel::AVX512>(const float* x, const float* y, int n) {
|
|
19
|
+
__m512 acc = _mm512_setzero_ps();
|
|
20
|
+
int m = 0;
|
|
21
|
+
for (; m + 16 <= n; m += 16) {
|
|
22
|
+
__m512 xv = _mm512_loadu_ps(x + m);
|
|
23
|
+
__m512 yv = _mm512_loadu_ps(y + m);
|
|
24
|
+
__m512 diff = _mm512_sub_ps(xv, yv);
|
|
25
|
+
acc = _mm512_fmadd_ps(diff, diff, acc);
|
|
26
|
+
}
|
|
27
|
+
// _mm512_reduce_add_ps: on modern AVX-512 SKUs (Cascade Lake+, Sapphire
|
|
28
|
+
// Rapids) GCC/Clang lower this to a shuffle+add tree, ~5-cycle latency.
|
|
29
|
+
// On older AVX-512 SKUs (Skylake-X, Ice Lake) the cross-lane reduction
|
|
30
|
+
// can be ~20 cycles. Acceptable here because n ~ pdx_block_size = 64
|
|
31
|
+
// (4 iterations of 16-wide accumulation), so per-block work dominates
|
|
32
|
+
// the reduction cost. AVX2 uses a manual shuffle+add tree explicitly
|
|
33
|
+
// to avoid `_mm_hadd_ps` overhead, where the ratio is reversed.
|
|
34
|
+
float result = _mm512_reduce_add_ps(acc);
|
|
35
|
+
for (; m < n; ++m) {
|
|
36
|
+
const float d = x[m] - y[m];
|
|
37
|
+
result += d * d;
|
|
38
|
+
}
|
|
39
|
+
return result;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
} // namespace detail
|
|
43
|
+
} // namespace faiss
|
|
44
|
+
|
|
45
|
+
#endif // COMPILE_SIMD_AVX512
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <faiss/utils/simd_levels.h>
|
|
9
|
+
|
|
10
|
+
#include <cstdlib>
|
|
11
|
+
|
|
12
|
+
#include <faiss/impl/FaissAssert.h>
|
|
13
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
14
|
+
|
|
15
|
+
namespace faiss {
|
|
16
|
+
|
|
17
|
+
// Static member definitions - used in both DD and static modes
|
|
18
|
+
SIMDLevel SIMDConfig::level = SIMDLevel::NONE;
|
|
19
|
+
|
|
20
|
+
// Bitmask of supported SIMD levels (1 << SIMDLevel)
|
|
21
|
+
uint64_t SIMDConfig::supported_simd_levels = 0;
|
|
22
|
+
|
|
23
|
+
// ARM SVE runtime detection
|
|
24
|
+
#if defined(__aarch64__) || defined(_M_ARM64)
|
|
25
|
+
|
|
26
|
+
#if defined(__linux__)
|
|
27
|
+
#include <sys/auxv.h>
|
|
28
|
+
#ifndef HWCAP_SVE
|
|
29
|
+
#define HWCAP_SVE (1 << 22)
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
static bool has_sve() {
|
|
33
|
+
return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
#elif defined(__APPLE__)
|
|
37
|
+
// Apple Silicon does NOT support SVE
|
|
38
|
+
static bool has_sve() {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
#else
|
|
43
|
+
// Other aarch64 platforms: conservatively report no SVE
|
|
44
|
+
static bool has_sve() {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
#endif // __linux__ / __APPLE__ / other
|
|
49
|
+
|
|
50
|
+
#else // Not ARM64
|
|
51
|
+
[[maybe_unused]] static bool has_sve() {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
#endif
|
|
55
|
+
|
|
56
|
+
#ifdef FAISS_ENABLE_DD
|
|
57
|
+
|
|
58
|
+
// =============================================================================
|
|
59
|
+
// Dynamic Dispatch (DD) mode implementation
|
|
60
|
+
// =============================================================================
|
|
61
|
+
|
|
62
|
+
// Static initializer to run constructor at load time
|
|
63
|
+
// NOLINTNEXTLINE(facebook-avoid-non-const-global-variables)
|
|
64
|
+
static SIMDConfig simd_config_initializer;
|
|
65
|
+
|
|
66
|
+
SIMDConfig::SIMDConfig(const char** faiss_simd_level_env) {
|
|
67
|
+
// Support dependency injection for testing
|
|
68
|
+
const char* env_var = faiss_simd_level_env ? *faiss_simd_level_env
|
|
69
|
+
: getenv("FAISS_SIMD_LEVEL");
|
|
70
|
+
|
|
71
|
+
if (!env_var) {
|
|
72
|
+
level = auto_detect_simd_level();
|
|
73
|
+
} else {
|
|
74
|
+
level = to_simd_level(env_var);
|
|
75
|
+
supported_simd_levels = (1 << static_cast<int>(level));
|
|
76
|
+
}
|
|
77
|
+
supported_simd_levels |= (1 << static_cast<int>(SIMDLevel::NONE));
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
void SIMDConfig::set_level(SIMDLevel l) {
|
|
81
|
+
if (!is_simd_level_available(l)) {
|
|
82
|
+
FAISS_THROW_FMT(
|
|
83
|
+
"SIMDConfig::set_level: level %s is not available",
|
|
84
|
+
to_string(l).c_str());
|
|
85
|
+
}
|
|
86
|
+
level = l;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
SIMDLevel SIMDConfig::get_level() {
|
|
90
|
+
return level;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
std::string SIMDConfig::get_level_name() {
|
|
94
|
+
return to_string(level);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
bool SIMDConfig::is_simd_level_available(SIMDLevel l) {
|
|
98
|
+
return (supported_simd_levels & (1 << static_cast<int>(l))) != 0;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
SIMDLevel SIMDConfig::auto_detect_simd_level() {
|
|
102
|
+
SIMDLevel detected_level = SIMDLevel::NONE;
|
|
103
|
+
|
|
104
|
+
#if defined(__x86_64__) && \
|
|
105
|
+
(defined(COMPILE_SIMD_AVX2) || defined(COMPILE_SIMD_AVX512))
|
|
106
|
+
unsigned int eax, ebx, ecx, edx;
|
|
107
|
+
|
|
108
|
+
eax = 1;
|
|
109
|
+
ecx = 0;
|
|
110
|
+
asm volatile("cpuid"
|
|
111
|
+
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
|
|
112
|
+
: "a"(eax), "c"(ecx));
|
|
113
|
+
|
|
114
|
+
bool has_avx = (ecx & (1 << 28)) != 0;
|
|
115
|
+
|
|
116
|
+
bool has_xsave_osxsave =
|
|
117
|
+
(ecx & ((1 << 26) | (1 << 27))) == ((1 << 26) | (1 << 27));
|
|
118
|
+
|
|
119
|
+
bool avx_supported = false;
|
|
120
|
+
if (has_avx && has_xsave_osxsave) {
|
|
121
|
+
unsigned int xcr0;
|
|
122
|
+
asm volatile("xgetbv" : "=a"(xcr0), "=d"(edx) : "c"(0));
|
|
123
|
+
avx_supported = (xcr0 & 6) == 6;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (avx_supported) {
|
|
127
|
+
eax = 7;
|
|
128
|
+
ecx = 0;
|
|
129
|
+
asm volatile("cpuid"
|
|
130
|
+
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
|
|
131
|
+
: "a"(eax), "c"(ecx));
|
|
132
|
+
|
|
133
|
+
unsigned int xcr0;
|
|
134
|
+
asm volatile("xgetbv" : "=a"(xcr0), "=d"(edx) : "c"(0));
|
|
135
|
+
|
|
136
|
+
#if defined(COMPILE_SIMD_AVX2) || defined(COMPILE_SIMD_AVX512)
|
|
137
|
+
bool has_avx2 = (ebx & (1 << 5)) != 0;
|
|
138
|
+
if (has_avx2) {
|
|
139
|
+
supported_simd_levels |= (1 << static_cast<int>(SIMDLevel::AVX2));
|
|
140
|
+
detected_level = SIMDLevel::AVX2;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
#if defined(COMPILE_SIMD_AVX512)
|
|
144
|
+
bool cpu_has_avx512f = (ebx & (1 << 16)) != 0;
|
|
145
|
+
bool os_supports_avx512 = (xcr0 & 0xE0) == 0xE0;
|
|
146
|
+
bool has_avx512f = cpu_has_avx512f && os_supports_avx512;
|
|
147
|
+
if (has_avx512f) {
|
|
148
|
+
bool has_avx512cd = (ebx & (1 << 28)) != 0;
|
|
149
|
+
bool has_avx512vl = (ebx & (1 << 31)) != 0;
|
|
150
|
+
bool has_avx512dq = (ebx & (1 << 17)) != 0;
|
|
151
|
+
bool has_avx512bw = (ebx & (1 << 30)) != 0;
|
|
152
|
+
if (has_avx512bw && has_avx512cd && has_avx512vl && has_avx512dq) {
|
|
153
|
+
detected_level = SIMDLevel::AVX512;
|
|
154
|
+
supported_simd_levels |=
|
|
155
|
+
(1 << static_cast<int>(SIMDLevel::AVX512));
|
|
156
|
+
|
|
157
|
+
#if defined(COMPILE_SIMD_AVX512_SPR)
|
|
158
|
+
// Check for Sapphire Rapids features (AVX512_BF16)
|
|
159
|
+
// CPUID EAX=7, ECX=1: EAX bit 5 = AVX512_BF16
|
|
160
|
+
unsigned int eax1, ebx1, ecx1, edx1;
|
|
161
|
+
eax1 = 7;
|
|
162
|
+
ecx1 = 1;
|
|
163
|
+
asm volatile("cpuid"
|
|
164
|
+
: "=a"(eax1), "=b"(ebx1), "=c"(ecx1), "=d"(edx1)
|
|
165
|
+
: "a"(eax1), "c"(ecx1));
|
|
166
|
+
bool has_avx512_bf16 = (eax1 & (1 << 5)) != 0;
|
|
167
|
+
if (has_avx512_bf16) {
|
|
168
|
+
detected_level = SIMDLevel::AVX512_SPR;
|
|
169
|
+
supported_simd_levels |=
|
|
170
|
+
(1 << static_cast<int>(SIMDLevel::AVX512_SPR));
|
|
171
|
+
}
|
|
172
|
+
#endif // defined(COMPILE_SIMD_AVX512_SPR)
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
#endif // defined(COMPILE_SIMD_AVX512)
|
|
176
|
+
#endif // defined(COMPILE_SIMD_AVX2) || defined(COMPILE_SIMD_AVX512)
|
|
177
|
+
}
|
|
178
|
+
#endif // defined(__x86_64__) && ...
|
|
179
|
+
|
|
180
|
+
#ifdef COMPILE_SIMD_ARM_NEON
|
|
181
|
+
// ARM NEON is standard on aarch64
|
|
182
|
+
supported_simd_levels |= (1 << static_cast<int>(SIMDLevel::ARM_NEON));
|
|
183
|
+
detected_level = SIMDLevel::ARM_NEON;
|
|
184
|
+
#endif
|
|
185
|
+
|
|
186
|
+
#ifdef COMPILE_SIMD_ARM_SVE
|
|
187
|
+
if (has_sve()) {
|
|
188
|
+
supported_simd_levels |= (1 << static_cast<int>(SIMDLevel::ARM_SVE));
|
|
189
|
+
detected_level = SIMDLevel::ARM_SVE;
|
|
190
|
+
}
|
|
191
|
+
#endif
|
|
192
|
+
|
|
193
|
+
#if defined(__riscv) && defined(COMPILE_SIMD_RISCV_RVV)
|
|
194
|
+
// RVV is always available on RISC-V builds compiled with rv64gcv.
|
|
195
|
+
supported_simd_levels |= (1 << static_cast<int>(SIMDLevel::RISCV_RVV));
|
|
196
|
+
detected_level = SIMDLevel::RISCV_RVV;
|
|
197
|
+
#endif
|
|
198
|
+
|
|
199
|
+
return detected_level;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
namespace {
|
|
203
|
+
|
|
204
|
+
template <SIMDLevel Level>
|
|
205
|
+
SIMDLevel get_dispatched_level_impl() {
|
|
206
|
+
return Level;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
} // namespace
|
|
210
|
+
|
|
211
|
+
SIMDLevel SIMDConfig::get_dispatched_level() {
|
|
212
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_ALL>(
|
|
213
|
+
[&]<SIMDLevel SL>() { return get_dispatched_level_impl<SL>(); });
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
#else // Static mode
|
|
217
|
+
|
|
218
|
+
// =============================================================================
|
|
219
|
+
// Static mode implementation
|
|
220
|
+
// =============================================================================
|
|
221
|
+
|
|
222
|
+
// Static initializer to set up the single supported level
|
|
223
|
+
// NOLINTNEXTLINE(facebook-avoid-non-const-global-variables)
|
|
224
|
+
static SIMDConfig simd_config_initializer;
|
|
225
|
+
|
|
226
|
+
SIMDConfig::SIMDConfig(const char** /* faiss_simd_level_env */) {
|
|
227
|
+
// In static mode, the level is fixed at compile time
|
|
228
|
+
level = auto_detect_simd_level();
|
|
229
|
+
supported_simd_levels = (1 << static_cast<int>(level));
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
void SIMDConfig::set_level(SIMDLevel l) {
|
|
233
|
+
if (!is_simd_level_available(l)) {
|
|
234
|
+
FAISS_THROW_FMT(
|
|
235
|
+
"SIMDConfig::set_level: level %s is not available "
|
|
236
|
+
"(static build only supports %s)",
|
|
237
|
+
to_string(l).c_str(),
|
|
238
|
+
to_string(level).c_str());
|
|
239
|
+
}
|
|
240
|
+
// In static mode, setting to the same level is a no-op
|
|
241
|
+
level = l;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
SIMDLevel SIMDConfig::get_level() {
|
|
245
|
+
return level;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
std::string SIMDConfig::get_level_name() {
|
|
249
|
+
return to_string(level);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
bool SIMDConfig::is_simd_level_available(SIMDLevel l) {
|
|
253
|
+
return (supported_simd_levels & (1 << static_cast<int>(l))) != 0;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
SIMDLevel SIMDConfig::auto_detect_simd_level() {
|
|
257
|
+
// In static mode, return the compiled-in level
|
|
258
|
+
#if defined(COMPILE_SIMD_AVX512_SPR)
|
|
259
|
+
return SIMDLevel::AVX512_SPR;
|
|
260
|
+
#elif defined(COMPILE_SIMD_AVX512)
|
|
261
|
+
return SIMDLevel::AVX512;
|
|
262
|
+
#elif defined(COMPILE_SIMD_AVX2)
|
|
263
|
+
return SIMDLevel::AVX2;
|
|
264
|
+
#elif defined(COMPILE_SIMD_ARM_SVE)
|
|
265
|
+
return SIMDLevel::ARM_SVE;
|
|
266
|
+
#elif defined(COMPILE_SIMD_ARM_NEON)
|
|
267
|
+
return SIMDLevel::ARM_NEON;
|
|
268
|
+
#elif defined(COMPILE_SIMD_RISCV_RVV)
|
|
269
|
+
return SIMDLevel::RISCV_RVV;
|
|
270
|
+
#else
|
|
271
|
+
return SIMDLevel::NONE;
|
|
272
|
+
#endif
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
SIMDLevel SIMDConfig::get_dispatched_level() {
|
|
276
|
+
// In static mode, just return the current level (no dispatch)
|
|
277
|
+
return get_level();
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
#endif // FAISS_ENABLE_DD
|
|
281
|
+
|
|
282
|
+
// =============================================================================
|
|
283
|
+
// Common functions (both modes)
|
|
284
|
+
// =============================================================================
|
|
285
|
+
|
|
286
|
+
std::string to_string(SIMDLevel level) {
|
|
287
|
+
switch (level) {
|
|
288
|
+
case SIMDLevel::NONE:
|
|
289
|
+
return "NONE";
|
|
290
|
+
case SIMDLevel::AVX2:
|
|
291
|
+
return "AVX2";
|
|
292
|
+
case SIMDLevel::AVX512:
|
|
293
|
+
return "AVX512";
|
|
294
|
+
case SIMDLevel::AVX512_SPR:
|
|
295
|
+
return "AVX512_SPR";
|
|
296
|
+
case SIMDLevel::ARM_NEON:
|
|
297
|
+
return "ARM_NEON";
|
|
298
|
+
case SIMDLevel::ARM_SVE:
|
|
299
|
+
return "ARM_SVE";
|
|
300
|
+
case SIMDLevel::RISCV_RVV:
|
|
301
|
+
return "RISCV_RVV";
|
|
302
|
+
case SIMDLevel::COUNT:
|
|
303
|
+
default:
|
|
304
|
+
throw FaissException("Invalid SIMDLevel");
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
SIMDLevel to_simd_level(const std::string& level_str) {
|
|
309
|
+
if (level_str == "NONE") {
|
|
310
|
+
return SIMDLevel::NONE;
|
|
311
|
+
}
|
|
312
|
+
if (level_str == "AVX2") {
|
|
313
|
+
return SIMDLevel::AVX2;
|
|
314
|
+
}
|
|
315
|
+
if (level_str == "AVX512") {
|
|
316
|
+
return SIMDLevel::AVX512;
|
|
317
|
+
}
|
|
318
|
+
if (level_str == "AVX512_SPR") {
|
|
319
|
+
return SIMDLevel::AVX512_SPR;
|
|
320
|
+
}
|
|
321
|
+
if (level_str == "ARM_NEON") {
|
|
322
|
+
return SIMDLevel::ARM_NEON;
|
|
323
|
+
}
|
|
324
|
+
if (level_str == "ARM_SVE") {
|
|
325
|
+
return SIMDLevel::ARM_SVE;
|
|
326
|
+
}
|
|
327
|
+
if (level_str == "RISCV_RVV") {
|
|
328
|
+
return SIMDLevel::RISCV_RVV;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
throw FaissException("Invalid SIMD level string: " + level_str);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
} // namespace faiss
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <cstdint>
|
|
11
|
+
#include <string>
|
|
12
|
+
|
|
13
|
+
#include <faiss/impl/platform_macros.h>
|
|
14
|
+
|
|
15
|
+
namespace faiss {
|
|
16
|
+
|
|
17
|
+
#define COMPILE_SIMD_NONE
|
|
18
|
+
|
|
19
|
+
enum class SIMDLevel {
|
|
20
|
+
NONE,
|
|
21
|
+
// x86
|
|
22
|
+
AVX2,
|
|
23
|
+
AVX512,
|
|
24
|
+
AVX512_SPR, // Sapphire Rapids: AVX512 + BF16 + FP16 + VNNI
|
|
25
|
+
// arm & aarch64
|
|
26
|
+
ARM_NEON,
|
|
27
|
+
ARM_SVE, // Scalable Vector Extension (ARMv8.2+)
|
|
28
|
+
// riscv
|
|
29
|
+
RISCV_RVV, // RISC-V Vector Extension (rv64gcv)
|
|
30
|
+
|
|
31
|
+
COUNT
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/***************************************************************
|
|
35
|
+
* SINGLE_SIMD_LEVEL: the SIMD level for code without explicit SL context.
|
|
36
|
+
*
|
|
37
|
+
* In static mode: resolves to the compiled-in level (zero overhead).
|
|
38
|
+
* In DD mode: resolves to NONE (emulated scalar). Code using
|
|
39
|
+
* SINGLE_SIMD_LEVEL is meant to be incrementally migrated to use
|
|
40
|
+
* proper SL dispatch — SINGLE_SIMD_LEVEL is migration scaffolding,
|
|
41
|
+
* not permanent API.
|
|
42
|
+
***************************************************************/
|
|
43
|
+
#ifdef FAISS_ENABLE_DD
|
|
44
|
+
// DD dispatches to the highest optional SIMD level at runtime.
|
|
45
|
+
// On ARM64, NEON is mandatory (always available via COMPILE_SIMD_ARM_NEON),
|
|
46
|
+
// so the baseline is ARM_NEON. On x86, the baseline is NONE.
|
|
47
|
+
#if defined(COMPILE_SIMD_ARM_NEON)
|
|
48
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::ARM_NEON;
|
|
49
|
+
#else
|
|
50
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::NONE;
|
|
51
|
+
#endif
|
|
52
|
+
#else
|
|
53
|
+
#if defined(COMPILE_SIMD_AVX512_SPR)
|
|
54
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::AVX512_SPR;
|
|
55
|
+
#elif defined(COMPILE_SIMD_AVX512)
|
|
56
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::AVX512;
|
|
57
|
+
#elif defined(COMPILE_SIMD_AVX2)
|
|
58
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::AVX2;
|
|
59
|
+
#elif defined(COMPILE_SIMD_ARM_SVE)
|
|
60
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::ARM_SVE;
|
|
61
|
+
#elif defined(COMPILE_SIMD_ARM_NEON)
|
|
62
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::ARM_NEON;
|
|
63
|
+
#elif defined(COMPILE_SIMD_RISCV_RVV)
|
|
64
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::RISCV_RVV;
|
|
65
|
+
#else
|
|
66
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL = SIMDLevel::NONE;
|
|
67
|
+
#endif
|
|
68
|
+
#endif
|
|
69
|
+
|
|
70
|
+
/***************************************************************
|
|
71
|
+
* Helper to select the appropriate 256-bit SIMD level.
|
|
72
|
+
*
|
|
73
|
+
* For 256-bit SIMD types (simd16uint16, simd32uint8, etc.), maps:
|
|
74
|
+
* AVX512/AVX512_SPR → AVX2 (256-bit ops use AVX2 instructions)
|
|
75
|
+
* AVX2 → AVX2
|
|
76
|
+
* ARM_NEON/ARM_SVE → ARM_NEON
|
|
77
|
+
* NONE → NONE
|
|
78
|
+
***************************************************************/
|
|
79
|
+
template <SIMDLevel SL>
|
|
80
|
+
struct simd256_level_selector {
|
|
81
|
+
static constexpr SIMDLevel value =
|
|
82
|
+
(SL == SIMDLevel::AVX512 || SL == SIMDLevel::AVX512_SPR)
|
|
83
|
+
? SIMDLevel::AVX2
|
|
84
|
+
: (SL == SIMDLevel::ARM_SVE ? SIMDLevel::ARM_NEON
|
|
85
|
+
: SL == SIMDLevel::RISCV_RVV ? SIMDLevel::NONE
|
|
86
|
+
: SL);
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
/// SINGLE_SIMD_LEVEL mapped to 256-bit: use this for 256-bit simd types
|
|
90
|
+
/// (simd16uint16, simd32uint8, etc.) which don't have AVX512/SVE
|
|
91
|
+
/// specializations.
|
|
92
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL_256 =
|
|
93
|
+
simd256_level_selector<SINGLE_SIMD_LEVEL>::value;
|
|
94
|
+
|
|
95
|
+
/***************************************************************
|
|
96
|
+
* Helper to select the appropriate 512-bit SIMD level.
|
|
97
|
+
*
|
|
98
|
+
* For 512-bit SIMD types (simd32uint16, simd64uint8, etc.), maps:
|
|
99
|
+
* AVX512_SPR → AVX512 (512-bit ops share the same instructions)
|
|
100
|
+
* AVX512 → AVX512
|
|
101
|
+
* NONE → NONE
|
|
102
|
+
***************************************************************/
|
|
103
|
+
template <SIMDLevel SL>
|
|
104
|
+
struct simd512_level_selector {
|
|
105
|
+
static constexpr SIMDLevel value = (SL == SIMDLevel::AVX512_SPR)
|
|
106
|
+
? SIMDLevel::AVX512
|
|
107
|
+
: (SL == SIMDLevel::RISCV_RVV) ? SIMDLevel::NONE
|
|
108
|
+
: SL;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
/// SINGLE_SIMD_LEVEL mapped to 512-bit: use this for 512-bit simd types
|
|
112
|
+
/// (simd32uint16, simd64uint8, etc.) which don't have AVX512_SPR
|
|
113
|
+
/// specializations (AVX512_SPR uses the same 512-bit integer ops as AVX512).
|
|
114
|
+
inline constexpr SIMDLevel SINGLE_SIMD_LEVEL_512 =
|
|
115
|
+
simd512_level_selector<SINGLE_SIMD_LEVEL>::value;
|
|
116
|
+
|
|
117
|
+
/// Number of float32 lanes for a given SIMD level.
|
|
118
|
+
/// ARM_SVE is variable-width (128–2048 bits); no single constant is correct.
|
|
119
|
+
template <SIMDLevel SL>
|
|
120
|
+
constexpr int simd_width() {
|
|
121
|
+
static_assert(
|
|
122
|
+
SL != SIMDLevel::ARM_SVE,
|
|
123
|
+
"simd_width<ARM_SVE> is not supported: SVE is variable-width");
|
|
124
|
+
static_assert(
|
|
125
|
+
SL != SIMDLevel::RISCV_RVV,
|
|
126
|
+
"simd_width<RISCV_RVV> is not supported: RVV is variable-width");
|
|
127
|
+
if constexpr (SL == SIMDLevel::AVX512 || SL == SIMDLevel::AVX512_SPR)
|
|
128
|
+
return 16;
|
|
129
|
+
else if constexpr (SL == SIMDLevel::AVX2 || SL == SIMDLevel::ARM_NEON)
|
|
130
|
+
return 8;
|
|
131
|
+
else
|
|
132
|
+
return 1;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/// Convert SIMDLevel to string. Throws FaissException for invalid level.
|
|
136
|
+
std::string to_string(SIMDLevel level);
|
|
137
|
+
|
|
138
|
+
/// Parse string to SIMDLevel. Throws FaissException for invalid strings.
|
|
139
|
+
SIMDLevel to_simd_level(const std::string& level_str);
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Current SIMD configuration.
|
|
143
|
+
*
|
|
144
|
+
* This class provides a uniform API for querying and setting the SIMD level,
|
|
145
|
+
* regardless of whether faiss was built with Dynamic Dispatch (DD) or static
|
|
146
|
+
* SIMD selection.
|
|
147
|
+
*
|
|
148
|
+
* In DD mode:
|
|
149
|
+
* - get_level() returns the runtime-detected or user-set level
|
|
150
|
+
* - set_level() changes the runtime level (if level is supported)
|
|
151
|
+
* - supported_simd_levels() returns bitmask of all compiled-in levels
|
|
152
|
+
*
|
|
153
|
+
* In static mode:
|
|
154
|
+
* - get_level() returns the compiled-in level
|
|
155
|
+
* - set_level() succeeds only if level matches compiled-in level
|
|
156
|
+
* - supported_simd_levels() returns bitmask with single level
|
|
157
|
+
*/
|
|
158
|
+
struct FAISS_API SIMDConfig {
|
|
159
|
+
static SIMDLevel level;
|
|
160
|
+
|
|
161
|
+
/// Returns bitmask of supported SIMD levels (1 << SIMDLevel).
|
|
162
|
+
static uint64_t supported_simd_levels;
|
|
163
|
+
|
|
164
|
+
static SIMDLevel auto_detect_simd_level();
|
|
165
|
+
|
|
166
|
+
SIMDConfig(const char** faiss_simd_level_env = nullptr);
|
|
167
|
+
|
|
168
|
+
/// Set the SIMD level. Throws FaissException if level is not supported.
|
|
169
|
+
static void set_level(SIMDLevel level);
|
|
170
|
+
static SIMDLevel get_level();
|
|
171
|
+
static std::string get_level_name();
|
|
172
|
+
|
|
173
|
+
/// Check if a SIMD level is available (compiled in).
|
|
174
|
+
static bool is_simd_level_available(SIMDLevel level);
|
|
175
|
+
|
|
176
|
+
/// Returns the SIMD level via the dispatch mechanism.
|
|
177
|
+
/// In DD mode, uses with_simd_level internally.
|
|
178
|
+
/// In static mode, returns the compiled-in level.
|
|
179
|
+
/// Useful for verification: get_level() == get_dispatched_level()
|
|
180
|
+
static SIMDLevel get_dispatched_level();
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
} // namespace faiss
|