faiss 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
- data/vendor/faiss/faiss/factory_tools.cpp +9 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
- data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
- data/vendor/faiss/faiss/impl/HNSW.h +61 -44
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +269 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
- data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +58 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +90 -18
- data/vendor/faiss/faiss/index_io.h +40 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
- data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/bf16.h +34 -0
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +129 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -22,6 +22,190 @@ static float sqr(float x) {
|
|
|
22
22
|
return x * x;
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
constexpr size_t kTurboQuantMaxBits = 8;
|
|
26
|
+
// TurboQuant builds a 1-D optimal scalar quantizer analytically. We approximate
|
|
27
|
+
// the target density on a uniform grid over [-1, 1]; the grid is kept dense
|
|
28
|
+
// enough both in absolute terms and per output centroid.
|
|
29
|
+
constexpr size_t kTurboQuantGridMin = 1 << 15;
|
|
30
|
+
constexpr size_t kTurboQuantGridPerCentroid = 512;
|
|
31
|
+
constexpr int kTurboQuantMaxIter = 100;
|
|
32
|
+
constexpr double kTurboQuantTol = 1e-8;
|
|
33
|
+
|
|
34
|
+
void build_TurboQuantMSECodebook(
|
|
35
|
+
size_t d,
|
|
36
|
+
size_t nbits,
|
|
37
|
+
std::vector<float>& centroids,
|
|
38
|
+
std::vector<float>& boundaries) {
|
|
39
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
40
|
+
nbits <= kTurboQuantMaxBits,
|
|
41
|
+
"invalid TurboQuant nbits %zu (must be in [0, %zu])",
|
|
42
|
+
nbits,
|
|
43
|
+
kTurboQuantMaxBits);
|
|
44
|
+
|
|
45
|
+
if (nbits == 0) {
|
|
46
|
+
centroids.clear();
|
|
47
|
+
boundaries.clear();
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const size_t k = size_t(1) << nbits;
|
|
52
|
+
|
|
53
|
+
if (d == 1) {
|
|
54
|
+
// In 1-D, a unit vector can only be -1 or +1, so the marginal
|
|
55
|
+
// distribution collapses to two atoms. The TurboQuant codebook is
|
|
56
|
+
// therefore a repeated pair of endpoint centroids.
|
|
57
|
+
centroids.resize(k);
|
|
58
|
+
for (size_t i = 0; i < k; i++) {
|
|
59
|
+
centroids[i] = i < k / 2 ? -1.0f : 1.0f;
|
|
60
|
+
}
|
|
61
|
+
boundaries.resize(k - 1);
|
|
62
|
+
for (size_t i = 0; i + 1 < k; i++) {
|
|
63
|
+
boundaries[i] = 0.5f * (centroids[i] + centroids[i + 1]);
|
|
64
|
+
}
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// For d > 1, TurboQuant uses the marginal distribution of one coordinate of
|
|
69
|
+
// a random unit vector in R^d. On [-1, 1], this density is proportional to
|
|
70
|
+
// (1 - x^2)^((d - 3) / 2), which is a symmetric beta-law after a change of
|
|
71
|
+
// variables. The code below discretizes that density.
|
|
72
|
+
const size_t ngrid =
|
|
73
|
+
std::max(kTurboQuantGridMin, k * kTurboQuantGridPerCentroid);
|
|
74
|
+
const double step = 2.0 / ngrid;
|
|
75
|
+
const double alpha = 0.5 * (double(d) - 3.0);
|
|
76
|
+
|
|
77
|
+
std::vector<double> xs(ngrid);
|
|
78
|
+
// prefix_w stores the cumulative mass of the discretized density and
|
|
79
|
+
// prefix_wx stores its cumulative first moment, so interval means can be
|
|
80
|
+
// recovered in O(1).
|
|
81
|
+
std::vector<double> prefix_w(ngrid + 1, 0.0);
|
|
82
|
+
std::vector<double> prefix_wx(ngrid + 1, 0.0);
|
|
83
|
+
|
|
84
|
+
for (size_t i = 0; i < ngrid; i++) {
|
|
85
|
+
const double x = -1.0 + (i + 0.5) * step;
|
|
86
|
+
const double one_minus_x2 = std::max(0.0, 1.0 - x * x);
|
|
87
|
+
double w;
|
|
88
|
+
if (alpha == 0.0) { // when d == 3
|
|
89
|
+
w = 1.0;
|
|
90
|
+
} else {
|
|
91
|
+
// (1-x^2)^((d-3)/2)
|
|
92
|
+
w = std::pow(one_minus_x2, alpha);
|
|
93
|
+
}
|
|
94
|
+
if (!std::isfinite(w) || w < 0.0) {
|
|
95
|
+
w = 0.0;
|
|
96
|
+
}
|
|
97
|
+
xs[i] = x;
|
|
98
|
+
prefix_w[i + 1] = prefix_w[i] + w;
|
|
99
|
+
prefix_wx[i + 1] = prefix_wx[i] + w * x;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
auto range_mean = [&](size_t i0, size_t i1, double fallback) {
|
|
103
|
+
const double w = prefix_w[i1] - prefix_w[i0];
|
|
104
|
+
if (w <= 0.0) {
|
|
105
|
+
return fallback;
|
|
106
|
+
}
|
|
107
|
+
return (prefix_wx[i1] - prefix_wx[i0]) / w;
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
const double total_w = prefix_w.back();
|
|
111
|
+
std::vector<size_t> cuts(k + 1, 0);
|
|
112
|
+
cuts[k] = ngrid;
|
|
113
|
+
|
|
114
|
+
// Initialize with k equal-mass cells under the target density. This gives
|
|
115
|
+
// a stable starting point before the Lloyd refinements below.
|
|
116
|
+
for (size_t i = 1; i < k; i++) {
|
|
117
|
+
const double target = total_w * i / k;
|
|
118
|
+
cuts[i] = std::lower_bound(prefix_w.begin(), prefix_w.end(), target) -
|
|
119
|
+
prefix_w.begin();
|
|
120
|
+
cuts[i] = std::min(cuts[i], ngrid);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
std::vector<double> centroids_d(k);
|
|
124
|
+
for (size_t i = 0; i < k; i++) {
|
|
125
|
+
const double left = -1.0 + 2.0 * i / k;
|
|
126
|
+
const double right = -1.0 + 2.0 * (i + 1) / k;
|
|
127
|
+
// First estimate of each centroid: the conditional mean of its initial
|
|
128
|
+
// equal-mass cell, with a uniform-cell midpoint as a fallback.
|
|
129
|
+
centroids_d[i] = range_mean(cuts[i], cuts[i + 1], 0.5 * (left + right));
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
std::vector<double> boundaries_d(k > 0 ? k - 1 : 0);
|
|
133
|
+
|
|
134
|
+
// Refine the 1-D codebook with a weighted Lloyd iteration over the
|
|
135
|
+
// discretized marginal density on [-1, 1]:
|
|
136
|
+
// 1. boundaries_d are the Voronoi separators implied by neighboring
|
|
137
|
+
// centroids.
|
|
138
|
+
// 2. cuts map each boundary interval back to a contiguous range of the
|
|
139
|
+
// integration grid xs[].
|
|
140
|
+
// 3. each centroid becomes the weighted mean of the samples currently in
|
|
141
|
+
// its cell, clipped to stay within its neighboring boundaries.
|
|
142
|
+
//
|
|
143
|
+
// The loop stops once the largest centroid update is below kTurboQuantTol.
|
|
144
|
+
for (int iter = 0; iter < kTurboQuantMaxIter; iter++) {
|
|
145
|
+
// Midpoints between adjacent centroids define the current Voronoi
|
|
146
|
+
// partition of [-1, 1].
|
|
147
|
+
for (size_t i = 0; i + 1 < k; i++) {
|
|
148
|
+
boundaries_d[i] = 0.5 * (centroids_d[i] + centroids_d[i + 1]);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
cuts[0] = 0;
|
|
152
|
+
cuts[k] = ngrid;
|
|
153
|
+
// Reassign the discretized density samples to the Voronoi cell induced
|
|
154
|
+
// by each boundary. Because xs is sorted, the reassignment reduces to
|
|
155
|
+
// finding the first grid point strictly greater than each boundary.
|
|
156
|
+
for (size_t i = 1; i < k; i++) {
|
|
157
|
+
cuts[i] = std::upper_bound(
|
|
158
|
+
xs.begin(), xs.end(), boundaries_d[i - 1]) -
|
|
159
|
+
xs.begin();
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
double max_delta = 0.0;
|
|
163
|
+
for (size_t i = 0; i < k; i++) {
|
|
164
|
+
const double left = i == 0 ? -1.0 : boundaries_d[i - 1];
|
|
165
|
+
const double right = i + 1 == k ? 1.0 : boundaries_d[i];
|
|
166
|
+
// Lloyd update: replace the centroid with the weighted average of
|
|
167
|
+
// the mass assigned to its cell. Empty cells fall back to the cell
|
|
168
|
+
// midpoint, and we clamp to [left, right] to preserve ordering.
|
|
169
|
+
double c = range_mean(cuts[i], cuts[i + 1], 0.5 * (left + right));
|
|
170
|
+
c = std::min(std::max(c, left), right);
|
|
171
|
+
max_delta = std::max(max_delta, std::abs(c - centroids_d[i]));
|
|
172
|
+
centroids_d[i] = c;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (max_delta < kTurboQuantTol) {
|
|
176
|
+
break;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
std::sort(centroids_d.begin(), centroids_d.end());
|
|
181
|
+
|
|
182
|
+
centroids.resize(k);
|
|
183
|
+
boundaries.resize(k - 1);
|
|
184
|
+
for (size_t i = 0; i < k; i++) {
|
|
185
|
+
centroids[i] = centroids_d[i];
|
|
186
|
+
}
|
|
187
|
+
for (size_t i = 0; i + 1 < k; i++) {
|
|
188
|
+
boundaries[i] = 0.5f * (centroids[i] + centroids[i + 1]);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
void train_TurboQuantMSE(size_t d, size_t nbits, std::vector<float>& trained) {
|
|
193
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
194
|
+
nbits > 0, "invalid TurboQuant SQ nbits %zu (must be > 0)", nbits);
|
|
195
|
+
std::vector<float> centroids;
|
|
196
|
+
std::vector<float> boundaries;
|
|
197
|
+
build_TurboQuantMSECodebook(d, nbits, centroids, boundaries);
|
|
198
|
+
const size_t k = centroids.size();
|
|
199
|
+
|
|
200
|
+
trained.resize(k + (k - 1));
|
|
201
|
+
for (size_t i = 0; i < k; i++) {
|
|
202
|
+
trained[i] = centroids[i];
|
|
203
|
+
}
|
|
204
|
+
for (size_t i = 0; i + 1 < k; i++) {
|
|
205
|
+
trained[k + i] = boundaries[i];
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
25
209
|
void train_Uniform(
|
|
26
210
|
RangeStat rs,
|
|
27
211
|
float rs_arg,
|
|
@@ -37,7 +221,7 @@ void train_Uniform(
|
|
|
37
221
|
if (rs == ScalarQuantizer::RS_minmax) {
|
|
38
222
|
vmin = HUGE_VAL;
|
|
39
223
|
vmax = -HUGE_VAL;
|
|
40
|
-
for (
|
|
224
|
+
for (idx_t i = 0; i < n; i++) {
|
|
41
225
|
if (x[i] < vmin) {
|
|
42
226
|
vmin = x[i];
|
|
43
227
|
}
|
|
@@ -50,7 +234,7 @@ void train_Uniform(
|
|
|
50
234
|
vmax += vexp;
|
|
51
235
|
} else if (rs == ScalarQuantizer::RS_meanstd) {
|
|
52
236
|
double sum = 0, sum2 = 0;
|
|
53
|
-
for (
|
|
237
|
+
for (idx_t i = 0; i < n; i++) {
|
|
54
238
|
sum += x[i];
|
|
55
239
|
sum2 += x[i] * x[i];
|
|
56
240
|
}
|
|
@@ -81,7 +265,7 @@ void train_Uniform(
|
|
|
81
265
|
float sx = 0;
|
|
82
266
|
{
|
|
83
267
|
vmin = HUGE_VAL, vmax = -HUGE_VAL;
|
|
84
|
-
for (
|
|
268
|
+
for (idx_t i = 0; i < n; i++) {
|
|
85
269
|
if (x[i] < vmin) {
|
|
86
270
|
vmin = x[i];
|
|
87
271
|
}
|
|
@@ -161,9 +345,9 @@ void train_NonUniform(
|
|
|
161
345
|
if (rs == ScalarQuantizer::RS_minmax) {
|
|
162
346
|
memcpy(vmin, x, sizeof(*x) * d);
|
|
163
347
|
memcpy(vmax, x, sizeof(*x) * d);
|
|
164
|
-
for (
|
|
348
|
+
for (idx_t i = 1; i < n; i++) {
|
|
165
349
|
const float* xi = x + i * d;
|
|
166
|
-
for (
|
|
350
|
+
for (int j = 0; j < d; j++) {
|
|
167
351
|
if (xi[j] < vmin[j]) {
|
|
168
352
|
vmin[j] = xi[j];
|
|
169
353
|
}
|
|
@@ -173,7 +357,7 @@ void train_NonUniform(
|
|
|
173
357
|
}
|
|
174
358
|
}
|
|
175
359
|
float* vdiff = vmax;
|
|
176
|
-
for (
|
|
360
|
+
for (int j = 0; j < d; j++) {
|
|
177
361
|
float vexp = (vmax[j] - vmin[j]) * rs_arg;
|
|
178
362
|
vmin[j] -= vexp;
|
|
179
363
|
vmax[j] += vexp;
|
|
@@ -182,9 +366,9 @@ void train_NonUniform(
|
|
|
182
366
|
} else {
|
|
183
367
|
// transpose
|
|
184
368
|
std::vector<float> xt(n * d);
|
|
185
|
-
for (
|
|
369
|
+
for (idx_t i = 1; i < n; i++) {
|
|
186
370
|
const float* xi = x + i * d;
|
|
187
|
-
for (
|
|
371
|
+
for (int j = 0; j < d; j++) {
|
|
188
372
|
xt[j * n + i] = xi[j];
|
|
189
373
|
}
|
|
190
374
|
}
|
|
@@ -37,6 +37,18 @@ void train_NonUniform(
|
|
|
37
37
|
int k,
|
|
38
38
|
const float* x,
|
|
39
39
|
std::vector<float>& trained);
|
|
40
|
+
|
|
41
|
+
/** Build the TurboQuant MSE codebook using the beta-distribution-optimal
|
|
42
|
+
* quantizer from the TurboQuant paper. The codebook is analytical
|
|
43
|
+
* (depends only on d and nbits, no training data needed).
|
|
44
|
+
*
|
|
45
|
+
* @param d vector dimensionality (used for beta-distribution shape)
|
|
46
|
+
* @param nbits bits per component (1-8)
|
|
47
|
+
* @param trained output: [centroids (k floats), boundaries (k-1 floats)]
|
|
48
|
+
* where k = 2^nbits
|
|
49
|
+
*/
|
|
50
|
+
void train_TurboQuantMSE(size_t d, size_t nbits, std::vector<float>& trained);
|
|
51
|
+
|
|
40
52
|
} // namespace scalar_quantizer
|
|
41
53
|
|
|
42
54
|
} // namespace faiss
|
|
@@ -23,86 +23,145 @@
|
|
|
23
23
|
|
|
24
24
|
namespace faiss {
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
/** Defining which SIMD levels are available for a given function is via a
|
|
27
|
+
* binary mask. Here we predefine the most common masks.
|
|
28
|
+
* */
|
|
29
|
+
|
|
30
|
+
constexpr int AVAILABLE_SIMD_LEVELS_NONE = (1 << int(SIMDLevel::NONE));
|
|
31
|
+
|
|
32
|
+
constexpr int AVAILABLE_SIMD_LEVELS_AVX2_NEON = AVAILABLE_SIMD_LEVELS_NONE |
|
|
33
|
+
(1 << int(SIMDLevel::AVX2)) | (1 << int(SIMDLevel::ARM_NEON));
|
|
34
|
+
|
|
35
|
+
// A0: same + AVX512 + RISCV_RVV
|
|
36
|
+
constexpr int AVAILABLE_SIMD_LEVELS_A0 = AVAILABLE_SIMD_LEVELS_AVX2_NEON |
|
|
37
|
+
(1 << int(SIMDLevel::AVX512)) | (1 << int(SIMDLevel::RISCV_RVV));
|
|
38
|
+
|
|
39
|
+
// A0_SPR: same as A0 + AVX512_SPR (for functions with a dedicated SPR
|
|
40
|
+
// specialization on top of an AVX512 fallback). Currently used by the
|
|
41
|
+
// RaBitQ popcount kernels, which use VPOPCNTDQ on SPR+.
|
|
42
|
+
constexpr int AVAILABLE_SIMD_LEVELS_A0_SPR =
|
|
43
|
+
AVAILABLE_SIMD_LEVELS_A0 | (1 << int(SIMDLevel::AVX512_SPR));
|
|
44
|
+
|
|
45
|
+
// A1: same + ARM_SVE (for functions with dedicated SVE implementations)
|
|
46
|
+
constexpr int AVAILABLE_SIMD_LEVELS_A1 =
|
|
47
|
+
AVAILABLE_SIMD_LEVELS_A0 | (1 << int(SIMDLevel::ARM_SVE));
|
|
48
|
+
|
|
49
|
+
// A2: NONE + AVX2 + ARM_SVE only (for functions with only these
|
|
50
|
+
// implementations)
|
|
51
|
+
constexpr int AVAILABLE_SIMD_LEVELS_A2 = AVAILABLE_SIMD_LEVELS_NONE |
|
|
52
|
+
(1 << int(SIMDLevel::AVX2)) | (1 << int(SIMDLevel::ARM_SVE));
|
|
53
|
+
|
|
54
|
+
constexpr int AVAILABLE_SIMD_LEVELS_ALL = -1;
|
|
55
|
+
|
|
56
|
+
constexpr SIMDLevel get_simd_fallback(SIMDLevel level) {
|
|
57
|
+
switch (level) {
|
|
58
|
+
case SIMDLevel::AVX512_SPR:
|
|
59
|
+
return SIMDLevel::AVX512;
|
|
60
|
+
case SIMDLevel::AVX512:
|
|
61
|
+
return SIMDLevel::AVX2;
|
|
62
|
+
case SIMDLevel::ARM_SVE:
|
|
63
|
+
return SIMDLevel::ARM_NEON;
|
|
64
|
+
case SIMDLevel::AVX2:
|
|
65
|
+
case SIMDLevel::ARM_NEON:
|
|
66
|
+
case SIMDLevel::RISCV_RVV:
|
|
67
|
+
return SIMDLevel::NONE;
|
|
68
|
+
default:
|
|
69
|
+
return SIMDLevel::NONE;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
27
72
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
return
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
73
|
+
template <int available_levels, SIMDLevel current_level, typename LambdaType>
|
|
74
|
+
inline auto dispatch_with_fallback(LambdaType&& action) {
|
|
75
|
+
if constexpr (available_levels & (1 << int(current_level))) {
|
|
76
|
+
return action.template operator()<current_level>();
|
|
77
|
+
} else if constexpr (current_level != SIMDLevel::NONE) {
|
|
78
|
+
return dispatch_with_fallback<
|
|
79
|
+
available_levels,
|
|
80
|
+
get_simd_fallback(current_level)>(
|
|
81
|
+
std::forward<LambdaType>(action));
|
|
82
|
+
} else {
|
|
83
|
+
return action.template operator()<SIMDLevel::NONE>();
|
|
84
|
+
}
|
|
85
|
+
}
|
|
35
86
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
87
|
+
/** The complete dispatching function. It takes into account:
|
|
88
|
+
* - the currently selected SIMD level
|
|
89
|
+
* - the compiled in SIMD levels (given by COMPILE_SIMD_XXX)
|
|
90
|
+
* - the available SIMD implementations for that particular function (given by
|
|
91
|
+
* available_levels)
|
|
92
|
+
*/
|
|
93
|
+
|
|
94
|
+
template <int available_levels, typename LambdaType>
|
|
95
|
+
inline auto with_selected_simd_levels(LambdaType&& action) {
|
|
96
|
+
#ifdef FAISS_ENABLE_DD
|
|
97
|
+
switch (SIMDConfig::level) {
|
|
98
|
+
// For x86 -- try from highest to lowest level
|
|
43
99
|
|
|
44
100
|
#ifdef COMPILE_SIMD_AVX512_SPR
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
101
|
+
case SIMDLevel::AVX512_SPR:
|
|
102
|
+
if constexpr (
|
|
103
|
+
available_levels & (1 << int(SIMDLevel::AVX512_SPR))) {
|
|
104
|
+
return action.template operator()<SIMDLevel::AVX512_SPR>();
|
|
105
|
+
}
|
|
106
|
+
[[fallthrough]];
|
|
50
107
|
#endif
|
|
51
108
|
|
|
52
|
-
|
|
109
|
+
#ifdef COMPILE_SIMD_AVX512
|
|
110
|
+
case SIMDLevel::AVX512:
|
|
111
|
+
if constexpr (available_levels & (1 << int(SIMDLevel::AVX512))) {
|
|
112
|
+
return action.template operator()<SIMDLevel::AVX512>();
|
|
113
|
+
}
|
|
114
|
+
[[fallthrough]];
|
|
115
|
+
#endif
|
|
53
116
|
|
|
54
|
-
#ifdef
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
117
|
+
#ifdef COMPILE_SIMD_AVX2
|
|
118
|
+
case SIMDLevel::AVX2:
|
|
119
|
+
if constexpr (available_levels & (1 << int(SIMDLevel::AVX2))) {
|
|
120
|
+
return action.template operator()<SIMDLevel::AVX2>();
|
|
121
|
+
}
|
|
122
|
+
[[fallthrough]];
|
|
60
123
|
#endif
|
|
61
124
|
|
|
125
|
+
// For ARM, try from highest to lowest level
|
|
62
126
|
#ifdef COMPILE_SIMD_ARM_SVE
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
127
|
+
case SIMDLevel::ARM_SVE:
|
|
128
|
+
if constexpr (available_levels & (1 << int(SIMDLevel::ARM_SVE))) {
|
|
129
|
+
return action.template operator()<SIMDLevel::ARM_SVE>();
|
|
130
|
+
}
|
|
131
|
+
[[fallthrough]];
|
|
68
132
|
#endif
|
|
69
133
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
134
|
+
#ifdef COMPILE_SIMD_ARM_NEON
|
|
135
|
+
case SIMDLevel::ARM_NEON:
|
|
136
|
+
if constexpr (available_levels & (1 << int(SIMDLevel::ARM_NEON))) {
|
|
137
|
+
return action.template operator()<SIMDLevel::ARM_NEON>();
|
|
138
|
+
}
|
|
139
|
+
[[fallthrough]];
|
|
140
|
+
#endif
|
|
73
141
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
DISPATCH_SIMDLevel_ARM_SVE(f, __VA_ARGS__); \
|
|
84
|
-
default: \
|
|
85
|
-
FAISS_THROW_MSG("Invalid SIMD level"); \
|
|
142
|
+
#ifdef COMPILE_SIMD_RISCV_RVV
|
|
143
|
+
case SIMDLevel::RISCV_RVV:
|
|
144
|
+
if constexpr (available_levels & (1 << int(SIMDLevel::RISCV_RVV))) {
|
|
145
|
+
return action.template operator()<SIMDLevel::RISCV_RVV>();
|
|
146
|
+
}
|
|
147
|
+
[[fallthrough]];
|
|
148
|
+
#endif
|
|
149
|
+
default:
|
|
150
|
+
return action.template operator()<SIMDLevel::NONE>();
|
|
86
151
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
//
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
#define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::ARM_SVE>(__VA_ARGS__)
|
|
99
|
-
#elif defined(COMPILE_SIMD_ARM_NEON)
|
|
100
|
-
#define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::ARM_NEON>(__VA_ARGS__)
|
|
101
|
-
#else
|
|
102
|
-
#define DISPATCH_SIMDLevel(f, ...) return f<SIMDLevel::NONE>(__VA_ARGS__)
|
|
152
|
+
#else // static dispatch
|
|
153
|
+
// In static mode, SINGLE_SIMD_LEVEL is a constexpr resolved at compile
|
|
154
|
+
// time. We mirror the DD fallthrough behavior at compile time via
|
|
155
|
+
// dispatch_with_fallback, which recursively walks get_simd_fallback:
|
|
156
|
+
// x86: AVX512_SPR -> AVX512 -> AVX2 -> NONE
|
|
157
|
+
// ARM: ARM_SVE -> ARM_NEON -> NONE
|
|
158
|
+
// RISCV: RISCV_RVV -> NONE
|
|
159
|
+
// The first level in the chain that appears in available_levels is
|
|
160
|
+
// selected; if none match, NONE is used unconditionally.
|
|
161
|
+
return dispatch_with_fallback<available_levels, SINGLE_SIMD_LEVEL>(
|
|
162
|
+
std::forward<LambdaType>(action));
|
|
103
163
|
#endif
|
|
104
|
-
|
|
105
|
-
#endif // FAISS_ENABLE_DD
|
|
164
|
+
}
|
|
106
165
|
|
|
107
166
|
/**
|
|
108
167
|
* Dispatch to a lambda with SIMDLevel as a compile-time constant.
|
|
@@ -126,6 +185,8 @@ namespace faiss {
|
|
|
126
185
|
* });
|
|
127
186
|
*
|
|
128
187
|
* The lambda must be a generic lambda with a SIMDLevel template parameter.
|
|
188
|
+
* By default, the lambda uses levels AVX2 + AVX512 + NEON + RVV, since these
|
|
189
|
+
* are the most common cases.
|
|
129
190
|
*
|
|
130
191
|
* @param action A generic lambda with signature `template<SIMDLevel> T
|
|
131
192
|
* operator()()`
|
|
@@ -133,7 +194,37 @@ namespace faiss {
|
|
|
133
194
|
*/
|
|
134
195
|
template <typename LambdaType>
|
|
135
196
|
inline auto with_simd_level(LambdaType&& action) {
|
|
136
|
-
|
|
197
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A0>(
|
|
198
|
+
std::forward<LambdaType>(action));
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Use for functions with AVX512_SPR-specific implementations.
|
|
203
|
+
*/
|
|
204
|
+
template <typename LambdaType>
|
|
205
|
+
inline auto with_simd_level_spr(LambdaType&& action) {
|
|
206
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A0_SPR>(
|
|
207
|
+
std::forward<LambdaType>(action));
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Use for functions implemented with simdXintY (256-bit) operations
|
|
212
|
+
* that don't have dedicated AVX512 or SVE implementations.
|
|
213
|
+
*/
|
|
214
|
+
template <typename LambdaType>
|
|
215
|
+
inline auto with_simd_level_256bit(LambdaType&& action) {
|
|
216
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_AVX2_NEON>(
|
|
217
|
+
std::forward<LambdaType>(action));
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Use for functions that have A0-level implementations plus an AVX512_SPR
|
|
222
|
+
* specialization (e.g. using VPOPCNTDQ).
|
|
223
|
+
*/
|
|
224
|
+
template <typename LambdaType>
|
|
225
|
+
inline auto with_simd_level_a0_spr(LambdaType&& action) {
|
|
226
|
+
return with_selected_simd_levels<AVAILABLE_SIMD_LEVELS_A0_SPR>(
|
|
227
|
+
std::forward<LambdaType>(action));
|
|
137
228
|
}
|
|
138
229
|
|
|
139
230
|
} // namespace faiss
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
/** Abstractions for 256-bit and 512-bit SIMD registers.
|
|
11
|
+
*
|
|
12
|
+
* The objective is to separate the different interpretations of the same
|
|
13
|
+
* registers (as a vector of uint8, uint16 or uint32), to provide printing
|
|
14
|
+
* functions.
|
|
15
|
+
*
|
|
16
|
+
* The types are templatized on SIMDLevel. Each platform header provides
|
|
17
|
+
* explicit specializations for the appropriate level. Code without explicit
|
|
18
|
+
* SL context uses SINGLE_SIMD_LEVEL (see simd_levels.h).
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
#include <faiss/utils/simd_levels.h>
|
|
22
|
+
|
|
23
|
+
namespace faiss {
|
|
24
|
+
|
|
25
|
+
// 256-bit primary templates
|
|
26
|
+
template <SIMDLevel SL>
|
|
27
|
+
struct simd256bit_tpl {};
|
|
28
|
+
template <SIMDLevel SL>
|
|
29
|
+
struct simd16uint16_tpl : simd256bit_tpl<SL> {};
|
|
30
|
+
template <SIMDLevel SL>
|
|
31
|
+
struct simd32uint8_tpl : simd256bit_tpl<SL> {};
|
|
32
|
+
template <SIMDLevel SL>
|
|
33
|
+
struct simd8uint32_tpl : simd256bit_tpl<SL> {};
|
|
34
|
+
template <SIMDLevel SL>
|
|
35
|
+
struct simd8float32_tpl : simd256bit_tpl<SL> {};
|
|
36
|
+
|
|
37
|
+
// 512-bit primary templates
|
|
38
|
+
template <SIMDLevel SL>
|
|
39
|
+
struct simd512bit_tpl {};
|
|
40
|
+
template <SIMDLevel SL>
|
|
41
|
+
struct simd32uint16_tpl : simd512bit_tpl<SL> {};
|
|
42
|
+
template <SIMDLevel SL>
|
|
43
|
+
struct simd64uint8_tpl : simd512bit_tpl<SL> {};
|
|
44
|
+
template <SIMDLevel SL>
|
|
45
|
+
struct simd16float32_tpl : simd512bit_tpl<SL> {};
|
|
46
|
+
|
|
47
|
+
} // namespace faiss
|
|
48
|
+
|
|
49
|
+
// NONE specialization — always included.
|
|
50
|
+
// Provides simd16uint16_tpl<NONE> etc. (scalar fallback).
|
|
51
|
+
// On PPC64: uses PPC-optimized scalar code (hand-tuned loop unrolling).
|
|
52
|
+
// Elsewhere: generic scalar implementation.
|
|
53
|
+
#if defined(__PPC64__)
|
|
54
|
+
#include <faiss/impl/simdlib/simdlib_ppc64.h>
|
|
55
|
+
#else
|
|
56
|
+
#include <faiss/impl/simdlib/simdlib_emulated.h>
|
|
57
|
+
#endif
|