faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Definitions of the SIMDLevel-templatized accum_and_*_tab functions.
|
|
9
|
+
// Only included by per-ISA .cpp files (avx2.cpp, neon.cpp).
|
|
10
|
+
// Do NOT include this from common translation units.
|
|
11
|
+
//
|
|
12
|
+
// Common TUs include rq_beam_search_tab.h (declarations only).
|
|
13
|
+
|
|
14
|
+
#pragma once
|
|
15
|
+
|
|
16
|
+
#include <cstddef>
|
|
17
|
+
#include <cstdint>
|
|
18
|
+
|
|
19
|
+
#include <faiss/impl/approx_topk/rq_beam_search_tab.h>
|
|
20
|
+
#include <faiss/impl/simdlib/simdlib.h>
|
|
21
|
+
|
|
22
|
+
namespace faiss {
|
|
23
|
+
|
|
24
|
+
template <size_t M, size_t NK, SIMDLevel SL>
|
|
25
|
+
void accum_and_store_tab(
|
|
26
|
+
const size_t m_offset,
|
|
27
|
+
const float* const __restrict codebook_cross_norms,
|
|
28
|
+
const uint64_t* const __restrict codebook_offsets,
|
|
29
|
+
const int32_t* const __restrict codes_i,
|
|
30
|
+
const size_t b,
|
|
31
|
+
const size_t ldc,
|
|
32
|
+
const size_t K,
|
|
33
|
+
float* const __restrict output) {
|
|
34
|
+
using simd_float = simd8float32_tpl<SL>;
|
|
35
|
+
|
|
36
|
+
const float* cbs[M];
|
|
37
|
+
for (size_t ij = 0; ij < M; ij++) {
|
|
38
|
+
const size_t code = static_cast<size_t>(codes_i[b * m_offset + ij]);
|
|
39
|
+
cbs[ij] = &codebook_cross_norms[(codebook_offsets[ij] + code) * ldc];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const size_t K8 = (K / (8 * NK)) * (8 * NK);
|
|
43
|
+
|
|
44
|
+
for (size_t kk = 0; kk < K8; kk += 8 * NK) {
|
|
45
|
+
simd_float regs[NK];
|
|
46
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
47
|
+
regs[ik] = simd_float(cbs[0] + kk + ik * 8);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
for (size_t ij = 1; ij < M; ij++) {
|
|
51
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
52
|
+
regs[ik] += simd_float(cbs[ij] + kk + ik * 8);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
57
|
+
regs[ik].storeu(output + kk + ik * 8);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
for (size_t kk = K8; kk < K; kk++) {
|
|
62
|
+
float reg = cbs[0][kk];
|
|
63
|
+
for (size_t ij = 1; ij < M; ij++) {
|
|
64
|
+
reg += cbs[ij][kk];
|
|
65
|
+
}
|
|
66
|
+
output[kk] = reg;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
template <size_t M, size_t NK, SIMDLevel SL>
|
|
71
|
+
void accum_and_add_tab(
|
|
72
|
+
const size_t m_offset,
|
|
73
|
+
const float* const __restrict codebook_cross_norms,
|
|
74
|
+
const uint64_t* const __restrict codebook_offsets,
|
|
75
|
+
const int32_t* const __restrict codes_i,
|
|
76
|
+
const size_t b,
|
|
77
|
+
const size_t ldc,
|
|
78
|
+
const size_t K,
|
|
79
|
+
float* const __restrict output) {
|
|
80
|
+
using simd_float = simd8float32_tpl<SL>;
|
|
81
|
+
|
|
82
|
+
const float* cbs[M];
|
|
83
|
+
for (size_t ij = 0; ij < M; ij++) {
|
|
84
|
+
const size_t code = static_cast<size_t>(codes_i[b * m_offset + ij]);
|
|
85
|
+
cbs[ij] = &codebook_cross_norms[(codebook_offsets[ij] + code) * ldc];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const size_t K8 = (K / (8 * NK)) * (8 * NK);
|
|
89
|
+
|
|
90
|
+
for (size_t kk = 0; kk < K8; kk += 8 * NK) {
|
|
91
|
+
simd_float regs[NK];
|
|
92
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
93
|
+
regs[ik] = simd_float(cbs[0] + kk + ik * 8);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
for (size_t ij = 1; ij < M; ij++) {
|
|
97
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
98
|
+
regs[ik] += simd_float(cbs[ij] + kk + ik * 8);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
103
|
+
simd_float existing(output + kk + ik * 8);
|
|
104
|
+
existing += regs[ik];
|
|
105
|
+
existing.storeu(output + kk + ik * 8);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (size_t kk = K8; kk < K; kk++) {
|
|
110
|
+
float reg = cbs[0][kk];
|
|
111
|
+
for (size_t ij = 1; ij < M; ij++) {
|
|
112
|
+
reg += cbs[ij][kk];
|
|
113
|
+
}
|
|
114
|
+
output[kk] += reg;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
template <size_t M, size_t NK, SIMDLevel SL>
|
|
119
|
+
void accum_and_finalize_tab(
|
|
120
|
+
const float* const __restrict codebook_cross_norms,
|
|
121
|
+
const uint64_t* const __restrict codebook_offsets,
|
|
122
|
+
const int32_t* const __restrict codes_i,
|
|
123
|
+
const size_t b,
|
|
124
|
+
const size_t ldc,
|
|
125
|
+
const size_t K,
|
|
126
|
+
const float* const __restrict distances_i,
|
|
127
|
+
const float* const __restrict cd_common,
|
|
128
|
+
float* const __restrict output) {
|
|
129
|
+
using simd_float = simd8float32_tpl<SL>;
|
|
130
|
+
|
|
131
|
+
const float* cbs[M];
|
|
132
|
+
for (size_t ij = 0; ij < M; ij++) {
|
|
133
|
+
const size_t code = static_cast<size_t>(codes_i[b * M + ij]);
|
|
134
|
+
cbs[ij] = &codebook_cross_norms[(codebook_offsets[ij] + code) * ldc];
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const size_t K8 = (K / (8 * NK)) * (8 * NK);
|
|
138
|
+
|
|
139
|
+
for (size_t kk = 0; kk < K8; kk += 8 * NK) {
|
|
140
|
+
simd_float regs[NK];
|
|
141
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
142
|
+
regs[ik] = simd_float(cbs[0] + kk + ik * 8);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
for (size_t ij = 1; ij < M; ij++) {
|
|
146
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
147
|
+
regs[ik] += simd_float(cbs[ij] + kk + ik * 8);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
simd_float two(2.0f);
|
|
152
|
+
for (size_t ik = 0; ik < NK; ik++) {
|
|
153
|
+
simd_float common_v(cd_common + kk + ik * 8);
|
|
154
|
+
common_v = fmadd(two, regs[ik], common_v);
|
|
155
|
+
common_v += simd_float(distances_i[b]);
|
|
156
|
+
common_v.storeu(output + b * K + kk + ik * 8);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
for (size_t kk = K8; kk < K; kk++) {
|
|
161
|
+
float reg = cbs[0][kk];
|
|
162
|
+
for (size_t ij = 1; ij < M; ij++) {
|
|
163
|
+
reg += cbs[ij][kk];
|
|
164
|
+
}
|
|
165
|
+
output[b * K + kk] = distances_i[b] + cd_common[kk] + 2 * reg;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
} // namespace faiss
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/// @file rq_beam_search_tab.h
|
|
9
|
+
/// @brief Declarations for SIMDLevel-templatized codebook accumulation
|
|
10
|
+
/// functions.
|
|
11
|
+
///
|
|
12
|
+
/// These functions accumulate codebook cross-norm tables for beam search
|
|
13
|
+
/// encoding in the Residual Quantizer. They compute the distance
|
|
14
|
+
/// contributions from previously encoded codebooks using SIMD-accelerated
|
|
15
|
+
/// register accumulation.
|
|
16
|
+
///
|
|
17
|
+
/// Definitions are in rq_beam_search_tab-inl.h (only included by per-ISA
|
|
18
|
+
/// .cpp files). The common TU only sees these declarations, so no extern
|
|
19
|
+
/// template suppression is needed — the linker resolves to the explicit
|
|
20
|
+
/// instantiations in avx2.cpp / neon.cpp.
|
|
21
|
+
|
|
22
|
+
#pragma once
|
|
23
|
+
|
|
24
|
+
#include <cstddef>
|
|
25
|
+
#include <cstdint>
|
|
26
|
+
|
|
27
|
+
#include <faiss/utils/simd_levels.h>
|
|
28
|
+
|
|
29
|
+
namespace faiss {
|
|
30
|
+
|
|
31
|
+
/// Accumulate cross-norms for M codebooks and store the result.
|
|
32
|
+
///
|
|
33
|
+
/// Loads M codebook rows (selected by codes_i) and sums them using
|
|
34
|
+
/// NK×8-wide SIMD chunks, writing the result to output. Used to
|
|
35
|
+
/// initialize the temporary buffer in the m≥8 path.
|
|
36
|
+
///
|
|
37
|
+
/// @tparam M number of codebook rows to accumulate
|
|
38
|
+
/// @tparam NK number of 8-float SIMD chunks per loop iteration
|
|
39
|
+
/// @tparam SL SIMD level (AVX2, ARM_NEON, etc.)
|
|
40
|
+
/// @param m_offset stride between beam entries in codes_i
|
|
41
|
+
/// @param codebook_cross_norms cross-norm table, shape (total_codes, ldc)
|
|
42
|
+
/// @param codebook_offsets per-codebook offset into cross-norm table
|
|
43
|
+
/// @param codes_i code indices for the current query
|
|
44
|
+
/// @param b beam index
|
|
45
|
+
/// @param ldc leading dimension of cross-norm table (≥ K)
|
|
46
|
+
/// @param K number of centroids in the current codebook
|
|
47
|
+
/// @param output output buffer, size K (overwritten)
|
|
48
|
+
template <size_t M, size_t NK, SIMDLevel SL>
|
|
49
|
+
void accum_and_store_tab(
|
|
50
|
+
size_t m_offset,
|
|
51
|
+
const float* __restrict codebook_cross_norms,
|
|
52
|
+
const uint64_t* __restrict codebook_offsets,
|
|
53
|
+
const int32_t* __restrict codes_i,
|
|
54
|
+
size_t b,
|
|
55
|
+
size_t ldc,
|
|
56
|
+
size_t K,
|
|
57
|
+
float* __restrict output);
|
|
58
|
+
|
|
59
|
+
/// Accumulate cross-norms for M codebooks and add to existing output.
|
|
60
|
+
///
|
|
61
|
+
/// Like accum_and_store_tab, but adds the accumulated result to the
|
|
62
|
+
/// existing values in output (output[k] += sum). Used for subsequent
|
|
63
|
+
/// chunks of 8 codebooks in the m≥8 path.
|
|
64
|
+
///
|
|
65
|
+
/// @tparam M number of codebook rows to accumulate
|
|
66
|
+
/// @tparam NK number of 8-float SIMD chunks per loop iteration
|
|
67
|
+
/// @tparam SL SIMD level (AVX2, ARM_NEON, etc.)
|
|
68
|
+
/// @param m_offset stride between beam entries in codes_i
|
|
69
|
+
/// @param codebook_cross_norms cross-norm table
|
|
70
|
+
/// @param codebook_offsets per-codebook offset
|
|
71
|
+
/// @param codes_i code indices
|
|
72
|
+
/// @param b beam index
|
|
73
|
+
/// @param ldc leading dimension of cross-norm table
|
|
74
|
+
/// @param K number of centroids
|
|
75
|
+
/// @param output output buffer, size K (accumulated into)
|
|
76
|
+
template <size_t M, size_t NK, SIMDLevel SL>
|
|
77
|
+
void accum_and_add_tab(
|
|
78
|
+
size_t m_offset,
|
|
79
|
+
const float* __restrict codebook_cross_norms,
|
|
80
|
+
const uint64_t* __restrict codebook_offsets,
|
|
81
|
+
const int32_t* __restrict codes_i,
|
|
82
|
+
size_t b,
|
|
83
|
+
size_t ldc,
|
|
84
|
+
size_t K,
|
|
85
|
+
float* __restrict output);
|
|
86
|
+
|
|
87
|
+
/// Accumulate cross-norms for M codebooks and finalize distances.
|
|
88
|
+
///
|
|
89
|
+
/// Accumulates M codebook rows, then computes the final centroid distance:
|
|
90
|
+
/// output[b*K + k] = distances_i[b] + cd_common[k] + 2 * sum[k]
|
|
91
|
+
/// Used for m=1..7 where the entire accumulation fits in registers.
|
|
92
|
+
///
|
|
93
|
+
/// @tparam M number of codebook rows to accumulate (equals m)
|
|
94
|
+
/// @tparam NK number of 8-float SIMD chunks per loop iteration
|
|
95
|
+
/// @tparam SL SIMD level (AVX2, ARM_NEON, etc.)
|
|
96
|
+
/// @param codebook_cross_norms cross-norm table
|
|
97
|
+
/// @param codebook_offsets per-codebook offset
|
|
98
|
+
/// @param codes_i code indices (stride is M)
|
|
99
|
+
/// @param b beam index
|
|
100
|
+
/// @param ldc leading dimension of cross-norm table
|
|
101
|
+
/// @param K number of centroids
|
|
102
|
+
/// @param distances_i per-beam input distances, size beam_size
|
|
103
|
+
/// @param cd_common common distance term, size K
|
|
104
|
+
/// @param output output centroid distances (b*K offset)
|
|
105
|
+
template <size_t M, size_t NK, SIMDLevel SL>
|
|
106
|
+
void accum_and_finalize_tab(
|
|
107
|
+
const float* __restrict codebook_cross_norms,
|
|
108
|
+
const uint64_t* __restrict codebook_offsets,
|
|
109
|
+
const int32_t* __restrict codes_i,
|
|
110
|
+
size_t b,
|
|
111
|
+
size_t ldc,
|
|
112
|
+
size_t K,
|
|
113
|
+
const float* __restrict distances_i,
|
|
114
|
+
const float* __restrict cd_common,
|
|
115
|
+
float* __restrict output);
|
|
116
|
+
|
|
117
|
+
} // namespace faiss
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Out-of-line definition of HeapWithBucketsCMaxFloat::bs_addn using
|
|
9
|
+
// simdlib types. Only included by per-ISA .cpp files (avx2.cpp, neon.cpp).
|
|
10
|
+
// Do NOT include this from common translation units.
|
|
11
|
+
|
|
12
|
+
#pragma once
|
|
13
|
+
|
|
14
|
+
#include <cstdint>
|
|
15
|
+
#include <limits>
|
|
16
|
+
|
|
17
|
+
#include <faiss/impl/approx_topk/approx_topk.h>
|
|
18
|
+
#include <faiss/impl/simdlib/simdlib.h>
|
|
19
|
+
#include <faiss/utils/Heap.h>
|
|
20
|
+
#include <faiss/utils/simd_levels.h>
|
|
21
|
+
|
|
22
|
+
namespace faiss {
|
|
23
|
+
|
|
24
|
+
// Element-wise max of two simd8float32 vectors, implemented via
|
|
25
|
+
// cmplt_min_max_fast (which computes both min and max).
|
|
26
|
+
template <SIMDLevel SL>
|
|
27
|
+
inline simd8float32_tpl<SL> simd8float32_max(
|
|
28
|
+
simd8float32_tpl<SL> a,
|
|
29
|
+
simd8float32_tpl<SL> b) {
|
|
30
|
+
simd8float32_tpl<SL> min_val, max_val;
|
|
31
|
+
simd8uint32_tpl<SL> dummy(0u), dmin, dmax;
|
|
32
|
+
cmplt_min_max_fast(a, dummy, b, dummy, min_val, dmin, max_val, dmax);
|
|
33
|
+
return max_val;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
template <uint32_t NBUCKETS, uint32_t N, SIMDLevel SL>
|
|
37
|
+
void HeapWithBucketsCMaxFloat<NBUCKETS, N, SL>::bs_addn(
|
|
38
|
+
const uint32_t beam_size,
|
|
39
|
+
const uint32_t n_per_beam,
|
|
40
|
+
const float* const __restrict distances,
|
|
41
|
+
const uint32_t k,
|
|
42
|
+
float* const __restrict bh_val,
|
|
43
|
+
int32_t* const __restrict bh_ids) {
|
|
44
|
+
using C = CMax<float, int>;
|
|
45
|
+
using simd_float = simd8float32_tpl<SL>;
|
|
46
|
+
using simd_uint = simd8uint32_tpl<SL>;
|
|
47
|
+
|
|
48
|
+
for (uint32_t beam_index = 0; beam_index < beam_size; beam_index++) {
|
|
49
|
+
simd_float min_distances_i[NBUCKETS / 8][N];
|
|
50
|
+
simd_uint min_indices_i[NBUCKETS / 8][N];
|
|
51
|
+
|
|
52
|
+
for (uint32_t j = 0; j < NBUCKETS / 8; j++) {
|
|
53
|
+
for (uint32_t p = 0; p < N; p++) {
|
|
54
|
+
min_distances_i[j][p] =
|
|
55
|
+
simd_float(std::numeric_limits<float>::max());
|
|
56
|
+
min_indices_i[j][p] = simd_uint(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
simd_uint current_indices(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u);
|
|
61
|
+
simd_uint indices_delta(NBUCKETS);
|
|
62
|
+
|
|
63
|
+
const uint32_t nb = (n_per_beam / NBUCKETS) * NBUCKETS;
|
|
64
|
+
|
|
65
|
+
// put the data into buckets
|
|
66
|
+
for (uint32_t ip = 0; ip < nb; ip += NBUCKETS) {
|
|
67
|
+
for (uint32_t j = 0; j < NBUCKETS / 8; j++) {
|
|
68
|
+
const simd_float distances_reg(
|
|
69
|
+
distances + j * 8 + ip + n_per_beam * beam_index);
|
|
70
|
+
|
|
71
|
+
simd_float distance_candidate = distances_reg;
|
|
72
|
+
simd_uint indices_candidate = current_indices;
|
|
73
|
+
|
|
74
|
+
for (uint32_t p = 0; p < N; p++) {
|
|
75
|
+
// Use cmplt_min_max_fast for comparison, min values,
|
|
76
|
+
// min indices, and max indices.
|
|
77
|
+
simd_float min_d_new, max_d_unused;
|
|
78
|
+
simd_uint min_idx_new, max_idx_new;
|
|
79
|
+
cmplt_min_max_fast(
|
|
80
|
+
distance_candidate,
|
|
81
|
+
indices_candidate,
|
|
82
|
+
min_distances_i[j][p],
|
|
83
|
+
min_indices_i[j][p],
|
|
84
|
+
min_d_new,
|
|
85
|
+
min_idx_new,
|
|
86
|
+
max_d_unused,
|
|
87
|
+
max_idx_new);
|
|
88
|
+
|
|
89
|
+
// The max distance uses distances_reg (the original
|
|
90
|
+
// input), NOT distance_candidate. This is a deliberate
|
|
91
|
+
// approximation that breaks the data dependency chain.
|
|
92
|
+
simd_float max_d_new = simd8float32_max<SL>(
|
|
93
|
+
min_distances_i[j][p], distances_reg);
|
|
94
|
+
|
|
95
|
+
distance_candidate = max_d_new;
|
|
96
|
+
indices_candidate = max_idx_new;
|
|
97
|
+
|
|
98
|
+
min_distances_i[j][p] = min_d_new;
|
|
99
|
+
min_indices_i[j][p] = min_idx_new;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
current_indices = current_indices + indices_delta;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// fix the indices
|
|
107
|
+
for (uint32_t j = 0; j < NBUCKETS / 8; j++) {
|
|
108
|
+
const simd_uint offset(n_per_beam * beam_index + j * 8);
|
|
109
|
+
for (uint32_t p = 0; p < N; p++) {
|
|
110
|
+
min_indices_i[j][p] = min_indices_i[j][p] + offset;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// merge every bucket into the regular heap
|
|
115
|
+
for (uint32_t p = 0; p < N; p++) {
|
|
116
|
+
for (uint32_t j = 0; j < NBUCKETS / 8; j++) {
|
|
117
|
+
uint32_t min_indices_scalar[8];
|
|
118
|
+
float min_distances_scalar[8];
|
|
119
|
+
|
|
120
|
+
min_indices_i[j][p].storeu(min_indices_scalar);
|
|
121
|
+
min_distances_i[j][p].storeu(min_distances_scalar);
|
|
122
|
+
|
|
123
|
+
for (size_t j8 = 0; j8 < 8; j8++) {
|
|
124
|
+
const auto value = min_distances_scalar[j8];
|
|
125
|
+
const auto index =
|
|
126
|
+
static_cast<int32_t>(min_indices_scalar[j8]);
|
|
127
|
+
if (C::cmp2(bh_val[0], value, bh_ids[0], index)) {
|
|
128
|
+
heap_replace_top<C>(k, bh_val, bh_ids, value, index);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// process leftovers
|
|
135
|
+
for (uint32_t ip = nb; ip < n_per_beam; ip++) {
|
|
136
|
+
const int32_t index = ip + n_per_beam * beam_index;
|
|
137
|
+
const float value = distances[index];
|
|
138
|
+
|
|
139
|
+
if (C::cmp(bh_val[0], value)) {
|
|
140
|
+
heap_replace_top<C>(k, bh_val, bh_ids, value, index);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
} // namespace faiss
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/*
|
|
9
|
+
* Per-ISA implementation of Hamming distance computation for
|
|
10
|
+
* IndexBinaryHNSW. Included once per SIMD TU with THE_SIMD_LEVEL
|
|
11
|
+
* set to the desired SIMDLevel.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
#pragma once
|
|
15
|
+
|
|
16
|
+
#ifndef THE_SIMD_LEVEL
|
|
17
|
+
#error "THE_SIMD_LEVEL must be defined before including this file"
|
|
18
|
+
#endif
|
|
19
|
+
|
|
20
|
+
// The including TU (or the per-ISA hamming_computer-*.h it pulls in first)
|
|
21
|
+
// is responsible for providing the HammingComputer*_tpl<SL> specializations;
|
|
22
|
+
// this header only needs the forward declarations and with_HammingComputer<SL>
|
|
23
|
+
// dispatcher from hamming_computer.h.
|
|
24
|
+
#include <faiss/utils/hamming_distance/hamming_computer.h>
|
|
25
|
+
|
|
26
|
+
#include <faiss/IndexBinaryFlat.h>
|
|
27
|
+
#include <faiss/impl/DistanceComputer.h>
|
|
28
|
+
#include <faiss/impl/binary_hamming/dispatch.h>
|
|
29
|
+
#include <faiss/utils/hamming.h>
|
|
30
|
+
|
|
31
|
+
namespace faiss {
|
|
32
|
+
|
|
33
|
+
namespace {
|
|
34
|
+
|
|
35
|
+
template <class HammingComputer>
|
|
36
|
+
struct FlatHammingDis : DistanceComputer {
|
|
37
|
+
const int code_size;
|
|
38
|
+
const uint8_t* b;
|
|
39
|
+
HammingComputer hc;
|
|
40
|
+
|
|
41
|
+
float operator()(idx_t i) override {
|
|
42
|
+
return hc.hamming(b + i * code_size);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
float symmetric_dis(idx_t i, idx_t j) override {
|
|
46
|
+
return HammingComputerDefault_tpl<THE_SIMD_LEVEL>(
|
|
47
|
+
b + j * code_size, code_size)
|
|
48
|
+
.hamming(b + i * code_size);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
explicit FlatHammingDis(const IndexBinaryFlat& storage)
|
|
52
|
+
: code_size(storage.code_size), b(storage.xb.data()), hc() {}
|
|
53
|
+
|
|
54
|
+
// NOTE: Pointers are cast from float in order to reuse the floating-point
|
|
55
|
+
// DistanceComputer.
|
|
56
|
+
void set_query(const float* x) override {
|
|
57
|
+
hc.set((uint8_t*)x, code_size);
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
} // anonymous namespace
|
|
62
|
+
|
|
63
|
+
template <>
|
|
64
|
+
DistanceComputer* make_binary_hnsw_distance_computer_fixSL<THE_SIMD_LEVEL>(
|
|
65
|
+
int code_size,
|
|
66
|
+
IndexBinaryFlat* flat_storage) {
|
|
67
|
+
return with_HammingComputer<THE_SIMD_LEVEL>(
|
|
68
|
+
code_size, [&]<class HammingComputer>() -> DistanceComputer* {
|
|
69
|
+
return new FlatHammingDis<HammingComputer>(*flat_storage);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
} // namespace faiss
|