faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -5,114 +5,32 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
// This TU provides
|
|
9
|
-
//
|
|
10
|
-
//
|
|
11
|
-
//
|
|
12
|
-
//
|
|
13
|
-
//
|
|
14
|
-
//
|
|
15
|
-
|
|
16
|
-
|
|
8
|
+
// This TU provides non-templated PQ code distance dispatch wrappers
|
|
9
|
+
// (pq_code_distance_8bit_single, pq_code_distance_8bit_four) declared
|
|
10
|
+
// in pq_code_distance-inl.h. These use with_simd_level to route to the
|
|
11
|
+
// best available SIMD implementation via pq_code_distance_8bit_*_impl
|
|
12
|
+
// function template specializations.
|
|
13
|
+
//
|
|
14
|
+
// The NONE and ARM_NEON _impl specializations are defined inline in
|
|
15
|
+
// pq_code_distance-generic.h (included transitively). The AVX2, AVX512,
|
|
16
|
+
// and ARM_SVE specializations are in their respective per-SIMD files.
|
|
17
|
+
|
|
18
|
+
#include <faiss/impl/pq_code_distance/pq_code_distance-generic.h>
|
|
17
19
|
|
|
18
20
|
namespace faiss {
|
|
19
21
|
namespace pq_code_distance {
|
|
20
22
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
24
|
-
template <>
|
|
25
|
-
float pq_code_distance_single_impl<SIMDLevel::NONE>(
|
|
26
|
-
size_t M,
|
|
27
|
-
size_t nbits,
|
|
28
|
-
const float* sim_table,
|
|
29
|
-
const uint8_t* code) {
|
|
30
|
-
return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
|
|
31
|
-
M, nbits, sim_table, code);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
35
|
-
template <>
|
|
36
|
-
void pq_code_distance_four_impl<SIMDLevel::NONE>(
|
|
37
|
-
size_t M,
|
|
38
|
-
size_t nbits,
|
|
39
|
-
const float* sim_table,
|
|
40
|
-
const uint8_t* __restrict code0,
|
|
41
|
-
const uint8_t* __restrict code1,
|
|
42
|
-
const uint8_t* __restrict code2,
|
|
43
|
-
const uint8_t* __restrict code3,
|
|
44
|
-
float& result0,
|
|
45
|
-
float& result1,
|
|
46
|
-
float& result2,
|
|
47
|
-
float& result3) {
|
|
48
|
-
PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
|
|
49
|
-
M,
|
|
50
|
-
nbits,
|
|
51
|
-
sim_table,
|
|
52
|
-
code0,
|
|
53
|
-
code1,
|
|
54
|
-
code2,
|
|
55
|
-
code3,
|
|
56
|
-
result0,
|
|
57
|
-
result1,
|
|
58
|
-
result2,
|
|
59
|
-
result3);
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
#ifdef COMPILE_SIMD_ARM_NEON
|
|
63
|
-
// ARM_NEON: No NEON-optimized PQ code distance exists. Use scalar.
|
|
64
|
-
|
|
65
|
-
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
66
|
-
template <>
|
|
67
|
-
float pq_code_distance_single_impl<SIMDLevel::ARM_NEON>(
|
|
68
|
-
size_t M,
|
|
69
|
-
size_t nbits,
|
|
70
|
-
const float* sim_table,
|
|
71
|
-
const uint8_t* code) {
|
|
72
|
-
return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
|
|
73
|
-
M, nbits, sim_table, code);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
77
|
-
template <>
|
|
78
|
-
void pq_code_distance_four_impl<SIMDLevel::ARM_NEON>(
|
|
79
|
-
size_t M,
|
|
80
|
-
size_t nbits,
|
|
81
|
-
const float* sim_table,
|
|
82
|
-
const uint8_t* __restrict code0,
|
|
83
|
-
const uint8_t* __restrict code1,
|
|
84
|
-
const uint8_t* __restrict code2,
|
|
85
|
-
const uint8_t* __restrict code3,
|
|
86
|
-
float& result0,
|
|
87
|
-
float& result1,
|
|
88
|
-
float& result2,
|
|
89
|
-
float& result3) {
|
|
90
|
-
PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
|
|
91
|
-
M,
|
|
92
|
-
nbits,
|
|
93
|
-
sim_table,
|
|
94
|
-
code0,
|
|
95
|
-
code1,
|
|
96
|
-
code2,
|
|
97
|
-
code3,
|
|
98
|
-
result0,
|
|
99
|
-
result1,
|
|
100
|
-
result2,
|
|
101
|
-
result3);
|
|
102
|
-
}
|
|
103
|
-
#endif // COMPILE_SIMD_ARM_NEON
|
|
104
|
-
|
|
105
|
-
float pq_code_distance_single(
|
|
23
|
+
float pq_code_distance_8bit_single(
|
|
106
24
|
size_t M,
|
|
107
|
-
size_t nbits,
|
|
108
25
|
const float* sim_table,
|
|
109
26
|
const uint8_t* code) {
|
|
110
|
-
|
|
27
|
+
return with_simd_level([&]<SIMDLevel SL>() {
|
|
28
|
+
return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
|
|
29
|
+
});
|
|
111
30
|
}
|
|
112
31
|
|
|
113
|
-
void
|
|
32
|
+
void pq_code_distance_8bit_four(
|
|
114
33
|
size_t M,
|
|
115
|
-
size_t nbits,
|
|
116
34
|
const float* sim_table,
|
|
117
35
|
const uint8_t* __restrict code0,
|
|
118
36
|
const uint8_t* __restrict code1,
|
|
@@ -122,19 +40,19 @@ void pq_code_distance_four(
|
|
|
122
40
|
float& result1,
|
|
123
41
|
float& result2,
|
|
124
42
|
float& result3) {
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
43
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
44
|
+
pq_code_distance_8bit_four_impl<SL>(
|
|
45
|
+
M,
|
|
46
|
+
sim_table,
|
|
47
|
+
code0,
|
|
48
|
+
code1,
|
|
49
|
+
code2,
|
|
50
|
+
code3,
|
|
51
|
+
result0,
|
|
52
|
+
result1,
|
|
53
|
+
result2,
|
|
54
|
+
result3);
|
|
55
|
+
});
|
|
138
56
|
}
|
|
139
57
|
|
|
140
58
|
} // namespace pq_code_distance
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
|
|
11
|
+
|
|
12
|
+
namespace faiss {
|
|
13
|
+
namespace pq_code_distance {
|
|
14
|
+
|
|
15
|
+
// NONE: use scalar directly.
|
|
16
|
+
|
|
17
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
18
|
+
template <>
|
|
19
|
+
inline float pq_code_distance_8bit_single_impl<SIMDLevel::NONE>(
|
|
20
|
+
size_t M,
|
|
21
|
+
const float* sim_table,
|
|
22
|
+
const uint8_t* code) {
|
|
23
|
+
return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
|
|
24
|
+
M, 8, sim_table, code);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
28
|
+
template <>
|
|
29
|
+
inline void pq_code_distance_8bit_four_impl<SIMDLevel::NONE>(
|
|
30
|
+
size_t M,
|
|
31
|
+
const float* sim_table,
|
|
32
|
+
const uint8_t* __restrict code0,
|
|
33
|
+
const uint8_t* __restrict code1,
|
|
34
|
+
const uint8_t* __restrict code2,
|
|
35
|
+
const uint8_t* __restrict code3,
|
|
36
|
+
float& result0,
|
|
37
|
+
float& result1,
|
|
38
|
+
float& result2,
|
|
39
|
+
float& result3) {
|
|
40
|
+
PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
|
|
41
|
+
M,
|
|
42
|
+
8,
|
|
43
|
+
sim_table,
|
|
44
|
+
code0,
|
|
45
|
+
code1,
|
|
46
|
+
code2,
|
|
47
|
+
code3,
|
|
48
|
+
result0,
|
|
49
|
+
result1,
|
|
50
|
+
result2,
|
|
51
|
+
result3);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
#ifdef COMPILE_SIMD_ARM_NEON
|
|
55
|
+
// ARM_NEON: No NEON-optimized PQ code distance exists. Use scalar.
|
|
56
|
+
|
|
57
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
58
|
+
template <>
|
|
59
|
+
inline float pq_code_distance_8bit_single_impl<SIMDLevel::ARM_NEON>(
|
|
60
|
+
size_t M,
|
|
61
|
+
const float* sim_table,
|
|
62
|
+
const uint8_t* code) {
|
|
63
|
+
return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
|
|
64
|
+
M, 8, sim_table, code);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
68
|
+
template <>
|
|
69
|
+
inline void pq_code_distance_8bit_four_impl<SIMDLevel::ARM_NEON>(
|
|
70
|
+
size_t M,
|
|
71
|
+
const float* sim_table,
|
|
72
|
+
const uint8_t* __restrict code0,
|
|
73
|
+
const uint8_t* __restrict code1,
|
|
74
|
+
const uint8_t* __restrict code2,
|
|
75
|
+
const uint8_t* __restrict code3,
|
|
76
|
+
float& result0,
|
|
77
|
+
float& result1,
|
|
78
|
+
float& result2,
|
|
79
|
+
float& result3) {
|
|
80
|
+
PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
|
|
81
|
+
M,
|
|
82
|
+
8,
|
|
83
|
+
sim_table,
|
|
84
|
+
code0,
|
|
85
|
+
code1,
|
|
86
|
+
code2,
|
|
87
|
+
code3,
|
|
88
|
+
result0,
|
|
89
|
+
result1,
|
|
90
|
+
result2,
|
|
91
|
+
result3);
|
|
92
|
+
}
|
|
93
|
+
#endif // COMPILE_SIMD_ARM_NEON
|
|
94
|
+
|
|
95
|
+
} // namespace pq_code_distance
|
|
96
|
+
} // namespace faiss
|
|
@@ -9,15 +9,248 @@
|
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
11
|
* @file pq_code_distance-inl.h
|
|
12
|
-
* @brief
|
|
12
|
+
* @brief PQ code distance SIMD-dispatched implementations.
|
|
13
13
|
*
|
|
14
14
|
* This is a PRIVATE header — do not include in public APIs or user code.
|
|
15
15
|
* Only faiss internal .cpp files (the per-SIMD implementation files and
|
|
16
16
|
* pq_code_distance-generic.cpp) should include this header.
|
|
17
|
-
*
|
|
18
|
-
* This header re-exports the public API (pq_code_distance.h) plus the
|
|
19
|
-
* simd_dispatch.h machinery needed by the implementation files.
|
|
20
17
|
*/
|
|
21
18
|
|
|
19
|
+
#include <cstddef>
|
|
20
|
+
#include <cstdint>
|
|
21
|
+
#include <type_traits>
|
|
22
|
+
|
|
23
|
+
#include <faiss/impl/ProductQuantizer.h>
|
|
24
|
+
#include <faiss/impl/platform_macros.h>
|
|
22
25
|
#include <faiss/impl/simd_dispatch.h>
|
|
23
|
-
|
|
26
|
+
|
|
27
|
+
namespace faiss {
|
|
28
|
+
namespace pq_code_distance {
|
|
29
|
+
|
|
30
|
+
/*********************************************************************
|
|
31
|
+
* PQCodeDistance — SIMD-dispatched PQ code distance
|
|
32
|
+
*
|
|
33
|
+
* Computes the distance from a PQ-encoded vector to a query vector,
|
|
34
|
+
* given a precomputed table of sub-distances (one per subquantizer
|
|
35
|
+
* per centroid). Originally extracted from IndexIVFPQ.cpp.
|
|
36
|
+
*
|
|
37
|
+
* DESIGN:
|
|
38
|
+
*
|
|
39
|
+
* PQCodeDistance<PQDecoderT, SL> computes PQ code distances at a given
|
|
40
|
+
* SIMD level. The dispatch site (IndexIVFPQ.cpp, IndexPQ.cpp) uses
|
|
41
|
+
* with_simd_level to select SL at runtime, which instantiates
|
|
42
|
+
* PQCodeDistance for ALL decoder types (PQDecoder8, PQDecoder16,
|
|
43
|
+
* PQDecoderGeneric) at the chosen level.
|
|
44
|
+
*
|
|
45
|
+
* Only PQDecoder8 has SIMD-optimized implementations (AVX2, AVX512,
|
|
46
|
+
* ARM_SVE). The other decoders always use scalar code — their decode()
|
|
47
|
+
* method is inherently sequential, so SIMD doesn't help.
|
|
48
|
+
*
|
|
49
|
+
* The primary template is always complete (no forward declarations
|
|
50
|
+
* needed). For PQDecoder8, it delegates to _impl dispatch bridge
|
|
51
|
+
* functions whose specializations are defined in per-SIMD .cpp files
|
|
52
|
+
* and resolved at link time. For other decoders, it uses scalar.
|
|
53
|
+
*
|
|
54
|
+
* ADDING A NEW SIMD LEVEL:
|
|
55
|
+
*
|
|
56
|
+
* 1. Add the level to SIMDLevel enum (simd_levels.h)
|
|
57
|
+
* 2. Add dispatch_config entry (simd_dispatch.bzl)
|
|
58
|
+
* 3. Define pq_code_distance_8bit_single_impl<NEW_LEVEL> and
|
|
59
|
+
* pq_code_distance_8bit_four_impl<NEW_LEVEL> specializations in a
|
|
60
|
+
* new .cpp file compiled with appropriate SIMD flags
|
|
61
|
+
* 4. Add the .cpp to the build (CMakeLists.txt, xplat.bzl)
|
|
62
|
+
*********************************************************************/
|
|
63
|
+
|
|
64
|
+
/// Scalar PQ code distance implementation.
|
|
65
|
+
/// Templated only on decoder type, independent of SIMD level.
|
|
66
|
+
/// Used directly by non-PQDecoder8 decoders (PQDecoder16,
|
|
67
|
+
/// PQDecoderGeneric) and as fallback for PQDecoder8 at NONE/NEON.
|
|
68
|
+
template <typename PQDecoderT>
|
|
69
|
+
struct PQCodeDistanceScalar {
|
|
70
|
+
using PQDecoder = PQDecoderT;
|
|
71
|
+
|
|
72
|
+
static float distance_single_code(
|
|
73
|
+
// number of subquantizers
|
|
74
|
+
size_t M,
|
|
75
|
+
size_t nbits,
|
|
76
|
+
// precomputed distances, layout (M, ksub)
|
|
77
|
+
const float* sim_table,
|
|
78
|
+
const uint8_t* code) {
|
|
79
|
+
PQDecoderT decoder(code, nbits);
|
|
80
|
+
const size_t ksub = 1 << nbits;
|
|
81
|
+
|
|
82
|
+
const float* tab = sim_table;
|
|
83
|
+
float result = 0;
|
|
84
|
+
|
|
85
|
+
for (size_t m = 0; m < M; m++) {
|
|
86
|
+
result += tab[decoder.decode()];
|
|
87
|
+
tab += ksub;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return result;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
static void distance_four_codes(
|
|
94
|
+
size_t M,
|
|
95
|
+
size_t nbits,
|
|
96
|
+
const float* sim_table,
|
|
97
|
+
const uint8_t* __restrict code0,
|
|
98
|
+
const uint8_t* __restrict code1,
|
|
99
|
+
const uint8_t* __restrict code2,
|
|
100
|
+
const uint8_t* __restrict code3,
|
|
101
|
+
float& result0,
|
|
102
|
+
float& result1,
|
|
103
|
+
float& result2,
|
|
104
|
+
float& result3) {
|
|
105
|
+
PQDecoderT decoder0(code0, nbits);
|
|
106
|
+
PQDecoderT decoder1(code1, nbits);
|
|
107
|
+
PQDecoderT decoder2(code2, nbits);
|
|
108
|
+
PQDecoderT decoder3(code3, nbits);
|
|
109
|
+
const size_t ksub = 1 << nbits;
|
|
110
|
+
|
|
111
|
+
const float* tab = sim_table;
|
|
112
|
+
result0 = 0;
|
|
113
|
+
result1 = 0;
|
|
114
|
+
result2 = 0;
|
|
115
|
+
result3 = 0;
|
|
116
|
+
|
|
117
|
+
for (size_t m = 0; m < M; m++) {
|
|
118
|
+
result0 += tab[decoder0.decode()];
|
|
119
|
+
result1 += tab[decoder1.decode()];
|
|
120
|
+
result2 += tab[decoder2.decode()];
|
|
121
|
+
result3 += tab[decoder3.decode()];
|
|
122
|
+
tab += ksub;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
/*********************************************************************
|
|
128
|
+
* Dispatch bridge — function templates for PQDecoder8 SIMD dispatch.
|
|
129
|
+
*
|
|
130
|
+
* Primary declarations only; specializations are defined in per-SIMD
|
|
131
|
+
* .cpp files (AVX2, AVX512, ARM_SVE) and pq_code_distance-generic.cpp
|
|
132
|
+
* (NONE, ARM_NEON). Same pattern as fvec_L2sqr et al. in distances.h.
|
|
133
|
+
*********************************************************************/
|
|
134
|
+
|
|
135
|
+
template <SIMDLevel SL>
|
|
136
|
+
float pq_code_distance_8bit_single_impl(
|
|
137
|
+
size_t M,
|
|
138
|
+
const float* sim_table,
|
|
139
|
+
const uint8_t* code);
|
|
140
|
+
|
|
141
|
+
template <SIMDLevel SL>
|
|
142
|
+
void pq_code_distance_8bit_four_impl(
|
|
143
|
+
size_t M,
|
|
144
|
+
const float* sim_table,
|
|
145
|
+
const uint8_t* __restrict code0,
|
|
146
|
+
const uint8_t* __restrict code1,
|
|
147
|
+
const uint8_t* __restrict code2,
|
|
148
|
+
const uint8_t* __restrict code3,
|
|
149
|
+
float& result0,
|
|
150
|
+
float& result1,
|
|
151
|
+
float& result2,
|
|
152
|
+
float& result3);
|
|
153
|
+
|
|
154
|
+
/// Primary template — always complete.
|
|
155
|
+
/// For PQDecoder8, delegates to _impl dispatch bridges (resolved at
|
|
156
|
+
/// link time to per-SIMD implementations). For other decoders, uses
|
|
157
|
+
/// scalar — their sequential decode() methods don't benefit from SIMD.
|
|
158
|
+
template <typename PQDecoderT, SIMDLevel SL>
|
|
159
|
+
struct PQCodeDistance {
|
|
160
|
+
using PQDecoder = PQDecoderT;
|
|
161
|
+
static constexpr SIMDLevel simd_level = SL;
|
|
162
|
+
|
|
163
|
+
static float distance_single_code(
|
|
164
|
+
size_t M,
|
|
165
|
+
size_t nbits,
|
|
166
|
+
const float* sim_table,
|
|
167
|
+
const uint8_t* code) {
|
|
168
|
+
if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
|
|
169
|
+
return pq_code_distance_8bit_single_impl<SL>(M, sim_table, code);
|
|
170
|
+
} else {
|
|
171
|
+
return PQCodeDistanceScalar<PQDecoderT>::distance_single_code(
|
|
172
|
+
M, nbits, sim_table, code);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
static void distance_four_codes(
|
|
177
|
+
size_t M,
|
|
178
|
+
size_t nbits,
|
|
179
|
+
const float* sim_table,
|
|
180
|
+
const uint8_t* __restrict code0,
|
|
181
|
+
const uint8_t* __restrict code1,
|
|
182
|
+
const uint8_t* __restrict code2,
|
|
183
|
+
const uint8_t* __restrict code3,
|
|
184
|
+
float& result0,
|
|
185
|
+
float& result1,
|
|
186
|
+
float& result2,
|
|
187
|
+
float& result3) {
|
|
188
|
+
if constexpr (std::is_same_v<PQDecoderT, PQDecoder8>) {
|
|
189
|
+
pq_code_distance_8bit_four_impl<SL>(
|
|
190
|
+
M,
|
|
191
|
+
sim_table,
|
|
192
|
+
code0,
|
|
193
|
+
code1,
|
|
194
|
+
code2,
|
|
195
|
+
code3,
|
|
196
|
+
result0,
|
|
197
|
+
result1,
|
|
198
|
+
result2,
|
|
199
|
+
result3);
|
|
200
|
+
} else {
|
|
201
|
+
PQCodeDistanceScalar<PQDecoderT>::distance_four_codes(
|
|
202
|
+
M,
|
|
203
|
+
nbits,
|
|
204
|
+
sim_table,
|
|
205
|
+
code0,
|
|
206
|
+
code1,
|
|
207
|
+
code2,
|
|
208
|
+
code3,
|
|
209
|
+
result0,
|
|
210
|
+
result1,
|
|
211
|
+
result2,
|
|
212
|
+
result3);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
/*********************************************************************
|
|
218
|
+
* Non-templated PQ code distance dispatch (PQDecoder8 only).
|
|
219
|
+
*
|
|
220
|
+
* These follow the same pattern as distances.h: the caller does not
|
|
221
|
+
* name a SIMDLevel. Internally they dispatch via with_simd_level
|
|
222
|
+
* to the best available SIMD implementation (DD: runtime detection,
|
|
223
|
+
* static: compile-time selection). Definitions are in
|
|
224
|
+
* pq_code_distance-generic.cpp.
|
|
225
|
+
*********************************************************************/
|
|
226
|
+
|
|
227
|
+
/// Compute PQ distance for a single code, dispatching to the best
|
|
228
|
+
/// available SIMD level.
|
|
229
|
+
FAISS_API float pq_code_distance_8bit_single(
|
|
230
|
+
size_t M,
|
|
231
|
+
const float* sim_table,
|
|
232
|
+
const uint8_t* code);
|
|
233
|
+
|
|
234
|
+
/// Compute PQ distances for four codes simultaneously, dispatching
|
|
235
|
+
/// to the best available SIMD level.
|
|
236
|
+
FAISS_API void pq_code_distance_8bit_four(
|
|
237
|
+
size_t M,
|
|
238
|
+
const float* sim_table,
|
|
239
|
+
const uint8_t* __restrict code0,
|
|
240
|
+
const uint8_t* __restrict code1,
|
|
241
|
+
const uint8_t* __restrict code2,
|
|
242
|
+
const uint8_t* __restrict code3,
|
|
243
|
+
float& result0,
|
|
244
|
+
float& result1,
|
|
245
|
+
float& result2,
|
|
246
|
+
float& result3);
|
|
247
|
+
|
|
248
|
+
} // namespace pq_code_distance
|
|
249
|
+
|
|
250
|
+
// Re-export public API into namespace faiss for convenience
|
|
251
|
+
using pq_code_distance::pq_code_distance_8bit_four;
|
|
252
|
+
using pq_code_distance::pq_code_distance_8bit_single;
|
|
253
|
+
using pq_code_distance::PQCodeDistance;
|
|
254
|
+
using pq_code_distance::PQCodeDistanceScalar;
|
|
255
|
+
|
|
256
|
+
} // namespace faiss
|
|
@@ -83,16 +83,15 @@ namespace pq_code_distance {
|
|
|
83
83
|
|
|
84
84
|
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
85
85
|
template <>
|
|
86
|
-
float
|
|
86
|
+
float pq_code_distance_8bit_single_impl<SIMDLevel::ARM_SVE>(
|
|
87
87
|
size_t M,
|
|
88
|
-
size_t nbits,
|
|
89
88
|
const float* sim_table,
|
|
90
89
|
const uint8_t* code) {
|
|
91
90
|
if (M <= svcntw())
|
|
92
91
|
return distance_single_code_sve_for_small_m(M, sim_table, code);
|
|
93
92
|
|
|
94
93
|
const float* tab = sim_table;
|
|
95
|
-
|
|
94
|
+
constexpr size_t ksub = 1 << 8;
|
|
96
95
|
|
|
97
96
|
const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
|
|
98
97
|
auto partialSum = svdup_n_f32(0.f);
|
|
@@ -159,12 +158,11 @@ float pq_code_distance_single_impl<SIMDLevel::ARM_SVE>(
|
|
|
159
158
|
return svaddv_f32(svptrue_b32(), partialSum);
|
|
160
159
|
}
|
|
161
160
|
|
|
162
|
-
// Combines 4 operations of
|
|
161
|
+
// Combines 4 operations of pq_code_distance_8bit_single_impl().
|
|
163
162
|
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
164
163
|
template <>
|
|
165
|
-
void
|
|
164
|
+
void pq_code_distance_8bit_four_impl<SIMDLevel::ARM_SVE>(
|
|
166
165
|
size_t M,
|
|
167
|
-
size_t nbits,
|
|
168
166
|
const float* sim_table,
|
|
169
167
|
const uint8_t* __restrict code0,
|
|
170
168
|
const uint8_t* __restrict code1,
|
|
@@ -190,7 +188,7 @@ void pq_code_distance_four_impl<SIMDLevel::ARM_SVE>(
|
|
|
190
188
|
}
|
|
191
189
|
|
|
192
190
|
const float* tab = sim_table;
|
|
193
|
-
|
|
191
|
+
constexpr size_t ksub = 1 << 8;
|
|
194
192
|
|
|
195
193
|
const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
|
|
196
194
|
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_RISCV_RVV
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
|
|
11
|
+
|
|
12
|
+
namespace faiss {
|
|
13
|
+
namespace pq_code_distance {
|
|
14
|
+
|
|
15
|
+
// RISCV_RVV: no RVV-optimized PQ code distance exists yet. Use scalar.
|
|
16
|
+
|
|
17
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
18
|
+
template <>
|
|
19
|
+
float pq_code_distance_8bit_single_impl<SIMDLevel::RISCV_RVV>(
|
|
20
|
+
size_t M,
|
|
21
|
+
const float* sim_table,
|
|
22
|
+
const uint8_t* code) {
|
|
23
|
+
return PQCodeDistanceScalar<PQDecoder8>::distance_single_code(
|
|
24
|
+
M, 8, sim_table, code);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
|
|
28
|
+
template <>
|
|
29
|
+
void pq_code_distance_8bit_four_impl<SIMDLevel::RISCV_RVV>(
|
|
30
|
+
size_t M,
|
|
31
|
+
const float* sim_table,
|
|
32
|
+
const uint8_t* __restrict code0,
|
|
33
|
+
const uint8_t* __restrict code1,
|
|
34
|
+
const uint8_t* __restrict code2,
|
|
35
|
+
const uint8_t* __restrict code3,
|
|
36
|
+
float& result0,
|
|
37
|
+
float& result1,
|
|
38
|
+
float& result2,
|
|
39
|
+
float& result3) {
|
|
40
|
+
PQCodeDistanceScalar<PQDecoder8>::distance_four_codes(
|
|
41
|
+
M,
|
|
42
|
+
8,
|
|
43
|
+
sim_table,
|
|
44
|
+
code0,
|
|
45
|
+
code1,
|
|
46
|
+
code2,
|
|
47
|
+
code3,
|
|
48
|
+
result0,
|
|
49
|
+
result1,
|
|
50
|
+
result2,
|
|
51
|
+
result3);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
} // namespace pq_code_distance
|
|
55
|
+
} // namespace faiss
|
|
56
|
+
|
|
57
|
+
#define THE_SIMD_LEVEL SIMDLevel::RISCV_RVV
|
|
58
|
+
|
|
59
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
60
|
+
#include <faiss/utils/hamming_distance/hamming_computer-rvv.h>
|
|
61
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
62
|
+
#include <faiss/impl/pq_code_distance/PQDistanceComputer_impl.h>
|
|
63
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
64
|
+
#include <faiss/impl/pq_code_distance/IVFPQScanner_impl.h>
|
|
65
|
+
|
|
66
|
+
#undef THE_SIMD_LEVEL
|
|
67
|
+
|
|
68
|
+
#endif // COMPILE_SIMD_RISCV_RVV
|