faiss 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
- data/vendor/faiss/faiss/factory_tools.cpp +9 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
- data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
- data/vendor/faiss/faiss/impl/HNSW.h +61 -44
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +269 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
- data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +58 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +90 -18
- data/vendor/faiss/faiss/index_io.h +40 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
- data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/bf16.h +34 -0
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +129 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
#include <stdint.h>
|
|
12
12
|
|
|
13
|
+
#include <memory>
|
|
13
14
|
#include <optional>
|
|
14
15
|
#include <unordered_set>
|
|
15
16
|
#include <vector>
|
|
@@ -21,47 +22,88 @@ namespace faiss {
|
|
|
21
22
|
|
|
22
23
|
FAISS_API extern size_t visited_table_hashset_threshold;
|
|
23
24
|
|
|
24
|
-
///
|
|
25
|
+
/// Abstract base class for a fast, reusable Visited Set for graph search
|
|
26
|
+
/// algorithms.
|
|
25
27
|
struct VisitedTable {
|
|
26
|
-
|
|
27
|
-
std::unordered_set<size_t> visited_set;
|
|
28
|
-
uint8_t visno; // 0 if using visited_set, 1..250 if using vector.
|
|
28
|
+
virtual ~VisitedTable() = default;
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
/// set flag #no to true, return whether this changed it.
|
|
31
|
+
virtual bool set(size_t no) = 0;
|
|
32
|
+
|
|
33
|
+
/// get flag #no
|
|
34
|
+
virtual bool get(size_t no) const = 0;
|
|
35
|
+
|
|
36
|
+
/// prefetch flag #no
|
|
37
|
+
virtual void prefetch(size_t no) const = 0;
|
|
38
|
+
|
|
39
|
+
/// pre-allocate bucket space to avoid rehashing during repeated set() calls
|
|
40
|
+
virtual void reserve(size_t /*n*/) {}
|
|
41
|
+
|
|
42
|
+
/// reset all flags to false
|
|
43
|
+
virtual void advance() = 0;
|
|
44
|
+
|
|
45
|
+
/// Factory method to create appropriate implementation.
|
|
46
|
+
/// If use_hashset is nullopt, the use of a hashset will be determined by
|
|
47
|
+
/// size >= visited_table_hashset_threshold.
|
|
48
|
+
static std::unique_ptr<VisitedTable> create(
|
|
33
49
|
size_t size,
|
|
34
50
|
std::optional<bool> use_hashset = std::nullopt);
|
|
51
|
+
};
|
|
35
52
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
53
|
+
/// Set-based implementation using unordered_set.
|
|
54
|
+
/// O(1) to construct and O(visits) to advance.
|
|
55
|
+
struct VisitedTableSet FAISS_FINAL : VisitedTable {
|
|
56
|
+
std::unordered_set<size_t> visited_set;
|
|
57
|
+
|
|
58
|
+
VisitedTableSet() = default;
|
|
59
|
+
|
|
60
|
+
bool set(size_t no) final {
|
|
61
|
+
return visited_set.insert(no).second;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
bool get(size_t no) const final {
|
|
65
|
+
return visited_set.count(no) != 0;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
void prefetch(size_t /*no*/) const final {
|
|
69
|
+
// No-op for set-based implementation
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
void reserve(size_t n) final {
|
|
73
|
+
visited_set.reserve(n);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
void advance() final {
|
|
77
|
+
visited_set.clear();
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
/// Vector-based implementation using a versioned byte array.
|
|
82
|
+
/// Faster for get()/set(), but O(size) to initialize.
|
|
83
|
+
/// advance() is O(1) except every 250 calls, which are O(size).
|
|
84
|
+
struct VisitedTableVector FAISS_FINAL : VisitedTable {
|
|
85
|
+
std::vector<uint8_t> visited;
|
|
86
|
+
uint8_t visno{1}; // Version number, 1..254
|
|
87
|
+
|
|
88
|
+
explicit VisitedTableVector(size_t size) : visited(size, 0) {}
|
|
89
|
+
|
|
90
|
+
bool set(size_t no) final {
|
|
91
|
+
if (visited[no] == visno) {
|
|
41
92
|
return false;
|
|
42
|
-
} else {
|
|
43
|
-
visited[no] = visno;
|
|
44
|
-
return true;
|
|
45
93
|
}
|
|
94
|
+
visited[no] = visno;
|
|
95
|
+
return true;
|
|
46
96
|
}
|
|
47
97
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
if (visno == 0) {
|
|
51
|
-
return visited_set.count(no) != 0;
|
|
52
|
-
} else {
|
|
53
|
-
return visited[no] == visno;
|
|
54
|
-
}
|
|
98
|
+
bool get(size_t no) const final {
|
|
99
|
+
return visited[no] == visno;
|
|
55
100
|
}
|
|
56
101
|
|
|
57
|
-
void prefetch(size_t no) const {
|
|
58
|
-
|
|
59
|
-
prefetch_L2(&visited[no]);
|
|
60
|
-
}
|
|
102
|
+
void prefetch(size_t no) const final {
|
|
103
|
+
prefetch_L2(&visited[no]);
|
|
61
104
|
}
|
|
62
105
|
|
|
63
|
-
|
|
64
|
-
void advance();
|
|
106
|
+
void advance() final;
|
|
65
107
|
};
|
|
66
108
|
|
|
67
109
|
} // namespace faiss
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Approximate top-k search using bucketed heaps with SIMD acceleration.
|
|
9
|
+
//
|
|
10
|
+
// HeapWithBuckets<C, NBUCKETS, N> splits n elements into NBUCKETS buckets,
|
|
11
|
+
// tracks the top-N per bucket using SIMD, then merges into a regular heap.
|
|
12
|
+
// This trades a small accuracy loss for significant speedup.
|
|
13
|
+
//
|
|
14
|
+
// The core idea is the following.
|
|
15
|
+
// Say we need to find beam_size indices with the minimal distance
|
|
16
|
+
// values. It is done via heap (priority_queue) using the following
|
|
17
|
+
// pseudocode:
|
|
18
|
+
//
|
|
19
|
+
// def baseline():
|
|
20
|
+
// distances = np.empty([beam_size * n], dtype=float)
|
|
21
|
+
// indices = np.empty([beam_size * n], dtype=int)
|
|
22
|
+
//
|
|
23
|
+
// heap = Heap(max_heap_size=beam_size)
|
|
24
|
+
//
|
|
25
|
+
// for i in range(0, beam_size * n):
|
|
26
|
+
// heap.push(distances[i], indices[i])
|
|
27
|
+
//
|
|
28
|
+
// Basically, this is what heap_addn() function from utils/Heap.h does.
|
|
29
|
+
//
|
|
30
|
+
// The following scheme can be used for approximate beam search.
|
|
31
|
+
// Say, we need to find elements with min distance.
|
|
32
|
+
// Basically, we split n elements of every beam into NBUCKETS buckets
|
|
33
|
+
// and track the index with the minimal distance for every bucket.
|
|
34
|
+
// This can be effectively SIMD-ed and significantly lowers the number
|
|
35
|
+
// of operations, but yields approximate results for beam_size >= 2.
|
|
36
|
+
//
|
|
37
|
+
// def approximate_v1():
|
|
38
|
+
// distances = np.empty([beam_size * n], dtype=float)
|
|
39
|
+
// indices = np.empty([beam_size * n], dtype=int)
|
|
40
|
+
//
|
|
41
|
+
// heap = Heap(max_heap_size=beam_size)
|
|
42
|
+
//
|
|
43
|
+
// for beam in range(0, beam_size):
|
|
44
|
+
// # The value of 32 is just an example.
|
|
45
|
+
// # The value may be varied: the larger the value is,
|
|
46
|
+
// # the slower and the more precise vs baseline beam search is
|
|
47
|
+
// NBUCKETS = 32
|
|
48
|
+
//
|
|
49
|
+
// local_min_distances = [HUGE_VALF] * NBUCKETS
|
|
50
|
+
// local_min_indices = [0] * NBUCKETS
|
|
51
|
+
//
|
|
52
|
+
// for i in range(0, n / NBUCKETS):
|
|
53
|
+
// for j in range(0, NBUCKETS):
|
|
54
|
+
// idx = beam * n + i * NBUCKETS + j
|
|
55
|
+
// if distances[idx] < local_min_distances[j]:
|
|
56
|
+
// local_min_distances[j] = distances[idx]
|
|
57
|
+
// local_min_indices[j] = indices[idx]
|
|
58
|
+
//
|
|
59
|
+
// for j in range(0, NBUCKETS):
|
|
60
|
+
// heap.push(local_min_distances[j], local_min_indices[j])
|
|
61
|
+
//
|
|
62
|
+
// The accuracy can be improved by tracking min-2 elements for every
|
|
63
|
+
// bucket. Such a min-2 implementation with NBUCKETS buckets provides
|
|
64
|
+
// better accuracy than top-1 implementation with 2 * NBUCKETS buckets.
|
|
65
|
+
// Min-3 is also doable. One can use min-N approach, but I'm not sure
|
|
66
|
+
// whether min-4 and above are practical, because of the lack of SIMD
|
|
67
|
+
// registers (unless AVX-512 version is used).
|
|
68
|
+
//
|
|
69
|
+
// C++ template for top-N implementation is provided. The code
|
|
70
|
+
// assumes that indices[idx] == idx. One can write a code that lifts
|
|
71
|
+
// such an assumption easily.
|
|
72
|
+
//
|
|
73
|
+
// Currently, the code that tracks elements with min distances is implemented
|
|
74
|
+
// (Max Heap). Min Heap option can be added easily.
|
|
75
|
+
//
|
|
76
|
+
// Dispatch:
|
|
77
|
+
// AVX2 / ARM_NEON → HeapWithBucketsCMaxFloat (simdlib256-inl.h)
|
|
78
|
+
// NONE (scalar) → HeapWithBucketsGenericCMaxFloat (generic.h)
|
|
79
|
+
//
|
|
80
|
+
// The SIMD definitions live in simdlib256-inl.h (only included by per-ISA
|
|
81
|
+
// .cpp files). Common TUs see only declarations here, so no extern template
|
|
82
|
+
// suppression is needed.
|
|
83
|
+
|
|
84
|
+
#pragma once
|
|
85
|
+
|
|
86
|
+
#include <cstdint>
|
|
87
|
+
|
|
88
|
+
#include <faiss/impl/approx_topk/generic.h>
|
|
89
|
+
#include <faiss/impl/platform_macros.h>
|
|
90
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
91
|
+
#include <faiss/utils/Heap.h>
|
|
92
|
+
#include <faiss/utils/simd_levels.h>
|
|
93
|
+
|
|
94
|
+
// -----------------------------------------------------------------------
|
|
95
|
+
// ApproxTopK_mode_t
|
|
96
|
+
// -----------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
/// Represents the mode of use of approximate top-k computations
|
|
99
|
+
/// that allows to trade accuracy vs speed. So, every options
|
|
100
|
+
/// besides EXACT_TOPK increases the speed.
|
|
101
|
+
///
|
|
102
|
+
/// B represents the number of buckets.
|
|
103
|
+
/// D is the number of min-k elements to track within every bucket.
|
|
104
|
+
///
|
|
105
|
+
/// Default option is EXACT_TOPK.
|
|
106
|
+
/// APPROX_TOPK_BUCKETS_B16_D2 is worth starting from, if you'd like
|
|
107
|
+
/// to experiment a bit.
|
|
108
|
+
///
|
|
109
|
+
/// It seems that only the limited number of combinations are
|
|
110
|
+
/// meaningful, because of the limited supply of SIMD registers.
|
|
111
|
+
/// Also, certain combinations, such as B32_D1 and B16_D1, were concluded
|
|
112
|
+
/// to be not very precise in benchmarks, so they were not introduced.
|
|
113
|
+
|
|
114
|
+
enum ApproxTopK_mode_t : int {
|
|
115
|
+
EXACT_TOPK = 0,
|
|
116
|
+
APPROX_TOPK_BUCKETS_B32_D2 = 1,
|
|
117
|
+
APPROX_TOPK_BUCKETS_B8_D3 = 2,
|
|
118
|
+
APPROX_TOPK_BUCKETS_B16_D2 = 3,
|
|
119
|
+
APPROX_TOPK_BUCKETS_B8_D2 = 4,
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// -----------------------------------------------------------------------
|
|
123
|
+
// HeapWithBuckets dispatch
|
|
124
|
+
// -----------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
namespace faiss {
|
|
127
|
+
|
|
128
|
+
// Primary template: declared, bs_addn NOT defined here.
|
|
129
|
+
// The out-of-line definition lives in simdlib256-inl.h, which is only
|
|
130
|
+
// included by the per-ISA .cpp files (avx2.cpp, neon.cpp).
|
|
131
|
+
template <uint32_t NBUCKETS, uint32_t N, SIMDLevel SL>
|
|
132
|
+
struct HeapWithBucketsCMaxFloat {
|
|
133
|
+
static_assert(
|
|
134
|
+
(NBUCKETS) > 0 && ((NBUCKETS % 8) == 0),
|
|
135
|
+
"Number of buckets needs to be 8, 16, 24, ...");
|
|
136
|
+
|
|
137
|
+
static void addn(
|
|
138
|
+
const uint32_t n,
|
|
139
|
+
const float* const __restrict distances,
|
|
140
|
+
const uint32_t k,
|
|
141
|
+
float* const __restrict bh_val,
|
|
142
|
+
int32_t* const __restrict bh_ids) {
|
|
143
|
+
bs_addn(1, n, distances, k, bh_val, bh_ids);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Declared but not defined — resolved at link time from avx2.cpp/neon.cpp.
|
|
147
|
+
static void bs_addn(
|
|
148
|
+
const uint32_t beam_size,
|
|
149
|
+
const uint32_t n_per_beam,
|
|
150
|
+
const float* const __restrict distances,
|
|
151
|
+
const uint32_t k,
|
|
152
|
+
float* const __restrict bh_val,
|
|
153
|
+
int32_t* const __restrict bh_ids);
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
// NONE specialization: delegates to the scalar generic implementation.
|
|
157
|
+
template <uint32_t NBUCKETS, uint32_t N>
|
|
158
|
+
struct HeapWithBucketsCMaxFloat<NBUCKETS, N, SIMDLevel::NONE> {
|
|
159
|
+
static void addn(
|
|
160
|
+
const uint32_t n,
|
|
161
|
+
const float* const __restrict distances,
|
|
162
|
+
const uint32_t k,
|
|
163
|
+
float* const __restrict bh_val,
|
|
164
|
+
int32_t* const __restrict bh_ids) {
|
|
165
|
+
bs_addn(1, n, distances, k, bh_val, bh_ids);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
static void bs_addn(
|
|
169
|
+
const uint32_t beam_size,
|
|
170
|
+
const uint32_t n_per_beam,
|
|
171
|
+
const float* const __restrict distances,
|
|
172
|
+
const uint32_t k,
|
|
173
|
+
float* const __restrict bh_val,
|
|
174
|
+
int32_t* const __restrict bh_ids) {
|
|
175
|
+
HeapWithBucketsGenericCMaxFloat<NBUCKETS, N>::bs_addn(
|
|
176
|
+
beam_size, n_per_beam, distances, k, bh_val, bh_ids);
|
|
177
|
+
}
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
// Primary template — not implemented for arbitrary comparators.
|
|
181
|
+
template <typename C, uint32_t NBUCKETS, uint32_t N>
|
|
182
|
+
struct HeapWithBuckets {
|
|
183
|
+
static_assert(
|
|
184
|
+
sizeof(C) == 0,
|
|
185
|
+
"HeapWithBuckets: unsupported comparator type");
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
// Partial specialization for CMax<float, int> that dispatches
|
|
189
|
+
// via with_simd_level_256bit.
|
|
190
|
+
template <uint32_t NBUCKETS, uint32_t N>
|
|
191
|
+
struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
|
|
192
|
+
static void addn(
|
|
193
|
+
const uint32_t n,
|
|
194
|
+
const float* const __restrict distances,
|
|
195
|
+
const uint32_t k,
|
|
196
|
+
float* const __restrict bh_val,
|
|
197
|
+
int32_t* const __restrict bh_ids) {
|
|
198
|
+
bs_addn(1, n, distances, k, bh_val, bh_ids);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
static void bs_addn(
|
|
202
|
+
const uint32_t beam_size,
|
|
203
|
+
const uint32_t n_per_beam,
|
|
204
|
+
const float* const __restrict distances,
|
|
205
|
+
const uint32_t k,
|
|
206
|
+
float* const __restrict bh_val,
|
|
207
|
+
int32_t* const __restrict bh_ids) {
|
|
208
|
+
with_simd_level_256bit([&]<SIMDLevel SL>() {
|
|
209
|
+
HeapWithBucketsCMaxFloat<NBUCKETS, N, SL>::bs_addn(
|
|
210
|
+
beam_size, n_per_beam, distances, k, bh_val, bh_ids);
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
// -----------------------------------------------------------------------
|
|
216
|
+
// approx_topk_by_mode: consolidates the mode switch + dispatch pattern
|
|
217
|
+
// used by residual_quantizer_encode_steps.cpp and other callers.
|
|
218
|
+
// -----------------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
// SL-parameterized version for callers that have already resolved the
|
|
221
|
+
// SIMD level (e.g., inside a with_simd_level_256bit lambda).
|
|
222
|
+
template <SIMDLevel SL>
|
|
223
|
+
inline void approx_topk_by_mode(
|
|
224
|
+
ApproxTopK_mode_t mode,
|
|
225
|
+
uint32_t beam_size,
|
|
226
|
+
uint32_t n_per_beam,
|
|
227
|
+
const float* distances,
|
|
228
|
+
uint32_t k,
|
|
229
|
+
float* bh_val,
|
|
230
|
+
int32_t* bh_ids) {
|
|
231
|
+
using C = CMax<float, int>;
|
|
232
|
+
auto approx = [&]<uint32_t NB, uint32_t ND>() {
|
|
233
|
+
HeapWithBucketsCMaxFloat<NB, ND, SL>::bs_addn(
|
|
234
|
+
beam_size, n_per_beam, distances, k, bh_val, bh_ids);
|
|
235
|
+
};
|
|
236
|
+
switch (mode) {
|
|
237
|
+
case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B8_D3:
|
|
238
|
+
approx.template operator()<8, 3>();
|
|
239
|
+
break;
|
|
240
|
+
case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B8_D2:
|
|
241
|
+
approx.template operator()<8, 2>();
|
|
242
|
+
break;
|
|
243
|
+
case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B16_D2:
|
|
244
|
+
approx.template operator()<16, 2>();
|
|
245
|
+
break;
|
|
246
|
+
case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B32_D2:
|
|
247
|
+
approx.template operator()<32, 2>();
|
|
248
|
+
break;
|
|
249
|
+
default:
|
|
250
|
+
heap_addn<C>(
|
|
251
|
+
k,
|
|
252
|
+
bh_val,
|
|
253
|
+
bh_ids,
|
|
254
|
+
distances,
|
|
255
|
+
nullptr,
|
|
256
|
+
beam_size * n_per_beam);
|
|
257
|
+
break;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Non-SL wrapper that dispatches via with_simd_level_256bit.
|
|
262
|
+
inline void approx_topk_by_mode(
|
|
263
|
+
ApproxTopK_mode_t mode,
|
|
264
|
+
uint32_t beam_size,
|
|
265
|
+
uint32_t n_per_beam,
|
|
266
|
+
const float* distances,
|
|
267
|
+
uint32_t k,
|
|
268
|
+
float* bh_val,
|
|
269
|
+
int32_t* bh_ids) {
|
|
270
|
+
with_simd_level_256bit([&]<SIMDLevel SL>() {
|
|
271
|
+
approx_topk_by_mode<SL>(
|
|
272
|
+
mode, beam_size, n_per_beam, distances, k, bh_val, bh_ids);
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
} // namespace faiss
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Explicit template instantiations of HeapWithBucketsCMaxFloat and
|
|
9
|
+
// accum_and_*_tab for SIMDLevel::AVX2.
|
|
10
|
+
|
|
11
|
+
#ifdef COMPILE_SIMD_AVX2
|
|
12
|
+
|
|
13
|
+
#include <faiss/impl/approx_topk/rq_beam_search_tab-inl.h>
|
|
14
|
+
#include <faiss/impl/approx_topk/simdlib256-inl.h>
|
|
15
|
+
#include <faiss/impl/simdlib/simdlib_avx2.h>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
template struct HeapWithBucketsCMaxFloat<8, 3, SIMDLevel::AVX2>;
|
|
20
|
+
template struct HeapWithBucketsCMaxFloat<8, 2, SIMDLevel::AVX2>;
|
|
21
|
+
template struct HeapWithBucketsCMaxFloat<16, 2, SIMDLevel::AVX2>;
|
|
22
|
+
template struct HeapWithBucketsCMaxFloat<16, 1, SIMDLevel::AVX2>;
|
|
23
|
+
template struct HeapWithBucketsCMaxFloat<32, 2, SIMDLevel::AVX2>;
|
|
24
|
+
|
|
25
|
+
#define INSTANTIATE_ACCUM_TAB(M) \
|
|
26
|
+
template void accum_and_store_tab<M, 4, SIMDLevel::AVX2>( \
|
|
27
|
+
size_t, \
|
|
28
|
+
const float* __restrict, \
|
|
29
|
+
const uint64_t* __restrict, \
|
|
30
|
+
const int32_t* __restrict, \
|
|
31
|
+
size_t, \
|
|
32
|
+
size_t, \
|
|
33
|
+
size_t, \
|
|
34
|
+
float* __restrict); \
|
|
35
|
+
template void accum_and_add_tab<M, 4, SIMDLevel::AVX2>( \
|
|
36
|
+
size_t, \
|
|
37
|
+
const float* __restrict, \
|
|
38
|
+
const uint64_t* __restrict, \
|
|
39
|
+
const int32_t* __restrict, \
|
|
40
|
+
size_t, \
|
|
41
|
+
size_t, \
|
|
42
|
+
size_t, \
|
|
43
|
+
float* __restrict); \
|
|
44
|
+
template void accum_and_finalize_tab<M, 4, SIMDLevel::AVX2>( \
|
|
45
|
+
const float* __restrict, \
|
|
46
|
+
const uint64_t* __restrict, \
|
|
47
|
+
const int32_t* __restrict, \
|
|
48
|
+
size_t, \
|
|
49
|
+
size_t, \
|
|
50
|
+
size_t, \
|
|
51
|
+
const float* __restrict, \
|
|
52
|
+
const float* __restrict, \
|
|
53
|
+
float* __restrict);
|
|
54
|
+
|
|
55
|
+
INSTANTIATE_ACCUM_TAB(1)
|
|
56
|
+
INSTANTIATE_ACCUM_TAB(2)
|
|
57
|
+
INSTANTIATE_ACCUM_TAB(3)
|
|
58
|
+
INSTANTIATE_ACCUM_TAB(4)
|
|
59
|
+
INSTANTIATE_ACCUM_TAB(5)
|
|
60
|
+
INSTANTIATE_ACCUM_TAB(6)
|
|
61
|
+
INSTANTIATE_ACCUM_TAB(7)
|
|
62
|
+
INSTANTIATE_ACCUM_TAB(8)
|
|
63
|
+
|
|
64
|
+
#undef INSTANTIATE_ACCUM_TAB
|
|
65
|
+
|
|
66
|
+
} // namespace faiss
|
|
67
|
+
|
|
68
|
+
#endif // COMPILE_SIMD_AVX2
|
|
@@ -16,16 +16,12 @@
|
|
|
16
16
|
|
|
17
17
|
namespace faiss {
|
|
18
18
|
|
|
19
|
-
//
|
|
20
|
-
// because a compiler is unable to
|
|
21
|
-
|
|
22
|
-
template <typename C, uint32_t NBUCKETS, uint32_t N>
|
|
23
|
-
struct HeapWithBuckets {
|
|
24
|
-
// this case was not implemented yet.
|
|
25
|
-
};
|
|
19
|
+
// Scalar (generic) implementation of HeapWithBuckets.
|
|
20
|
+
// This is correct but slow because a compiler is unable to
|
|
21
|
+
// vectorize it properly. Used as the SIMDLevel::NONE fallback.
|
|
26
22
|
|
|
27
23
|
template <uint32_t NBUCKETS, uint32_t N>
|
|
28
|
-
struct
|
|
24
|
+
struct HeapWithBucketsGenericCMaxFloat {
|
|
29
25
|
static void addn(
|
|
30
26
|
// number of elements
|
|
31
27
|
const uint32_t n,
|
|
@@ -135,4 +131,15 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
|
|
|
135
131
|
}
|
|
136
132
|
};
|
|
137
133
|
|
|
134
|
+
// Legacy name kept for backward compatibility (used when
|
|
135
|
+
// approx_topk.h is not included).
|
|
136
|
+
template <typename C, uint32_t NBUCKETS, uint32_t N>
|
|
137
|
+
struct HeapWithBucketsGeneric {
|
|
138
|
+
// not implemented for arbitrary C
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
template <uint32_t NBUCKETS, uint32_t N>
|
|
142
|
+
struct HeapWithBucketsGeneric<CMax<float, int>, NBUCKETS, N>
|
|
143
|
+
: HeapWithBucketsGenericCMaxFloat<NBUCKETS, N> {};
|
|
144
|
+
|
|
138
145
|
} // namespace faiss
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Explicit template instantiations of HeapWithBucketsCMaxFloat and
|
|
9
|
+
// accum_and_*_tab for SIMDLevel::ARM_NEON.
|
|
10
|
+
|
|
11
|
+
#ifdef COMPILE_SIMD_ARM_NEON
|
|
12
|
+
|
|
13
|
+
#include <faiss/impl/approx_topk/rq_beam_search_tab-inl.h>
|
|
14
|
+
#include <faiss/impl/approx_topk/simdlib256-inl.h>
|
|
15
|
+
#include <faiss/impl/simdlib/simdlib_neon.h>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
template struct HeapWithBucketsCMaxFloat<8, 3, SIMDLevel::ARM_NEON>;
|
|
20
|
+
template struct HeapWithBucketsCMaxFloat<8, 2, SIMDLevel::ARM_NEON>;
|
|
21
|
+
template struct HeapWithBucketsCMaxFloat<16, 2, SIMDLevel::ARM_NEON>;
|
|
22
|
+
template struct HeapWithBucketsCMaxFloat<16, 1, SIMDLevel::ARM_NEON>;
|
|
23
|
+
template struct HeapWithBucketsCMaxFloat<32, 2, SIMDLevel::ARM_NEON>;
|
|
24
|
+
|
|
25
|
+
#define INSTANTIATE_ACCUM_TAB(M) \
|
|
26
|
+
template void accum_and_store_tab<M, 4, SIMDLevel::ARM_NEON>( \
|
|
27
|
+
size_t, \
|
|
28
|
+
const float* __restrict, \
|
|
29
|
+
const uint64_t* __restrict, \
|
|
30
|
+
const int32_t* __restrict, \
|
|
31
|
+
size_t, \
|
|
32
|
+
size_t, \
|
|
33
|
+
size_t, \
|
|
34
|
+
float* __restrict); \
|
|
35
|
+
template void accum_and_add_tab<M, 4, SIMDLevel::ARM_NEON>( \
|
|
36
|
+
size_t, \
|
|
37
|
+
const float* __restrict, \
|
|
38
|
+
const uint64_t* __restrict, \
|
|
39
|
+
const int32_t* __restrict, \
|
|
40
|
+
size_t, \
|
|
41
|
+
size_t, \
|
|
42
|
+
size_t, \
|
|
43
|
+
float* __restrict); \
|
|
44
|
+
template void accum_and_finalize_tab<M, 4, SIMDLevel::ARM_NEON>( \
|
|
45
|
+
const float* __restrict, \
|
|
46
|
+
const uint64_t* __restrict, \
|
|
47
|
+
const int32_t* __restrict, \
|
|
48
|
+
size_t, \
|
|
49
|
+
size_t, \
|
|
50
|
+
size_t, \
|
|
51
|
+
const float* __restrict, \
|
|
52
|
+
const float* __restrict, \
|
|
53
|
+
float* __restrict);
|
|
54
|
+
|
|
55
|
+
INSTANTIATE_ACCUM_TAB(1)
|
|
56
|
+
INSTANTIATE_ACCUM_TAB(2)
|
|
57
|
+
INSTANTIATE_ACCUM_TAB(3)
|
|
58
|
+
INSTANTIATE_ACCUM_TAB(4)
|
|
59
|
+
INSTANTIATE_ACCUM_TAB(5)
|
|
60
|
+
INSTANTIATE_ACCUM_TAB(6)
|
|
61
|
+
INSTANTIATE_ACCUM_TAB(7)
|
|
62
|
+
INSTANTIATE_ACCUM_TAB(8)
|
|
63
|
+
|
|
64
|
+
#undef INSTANTIATE_ACCUM_TAB
|
|
65
|
+
|
|
66
|
+
} // namespace faiss
|
|
67
|
+
|
|
68
|
+
#endif // COMPILE_SIMD_ARM_NEON
|