faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifndef HAMMING_COMPUTER_AVX512_H
|
|
9
|
+
#define HAMMING_COMPUTER_AVX512_H
|
|
10
|
+
|
|
11
|
+
// AVX512 HammingComputer and GenHammingComputer specializations.
|
|
12
|
+
// Types without custom AVX512 code inherit from the NONE specializations
|
|
13
|
+
// in hamming_computer-generic.h. Custom specializations for
|
|
14
|
+
// HammingComputer64 and HammingComputerDefault use _mm512_popcnt_epi64
|
|
15
|
+
// when __AVX512VPOPCNTDQ__ is available. GenHammingComputer classes
|
|
16
|
+
// leverage SSE/AVX2 intrinsics.
|
|
17
|
+
|
|
18
|
+
#include <cassert>
|
|
19
|
+
#include <cstdint>
|
|
20
|
+
|
|
21
|
+
#include <faiss/impl/platform_macros.h>
|
|
22
|
+
#include <faiss/utils/hamming_distance/hamming_computer-generic.h>
|
|
23
|
+
|
|
24
|
+
#include <immintrin.h>
|
|
25
|
+
|
|
26
|
+
namespace faiss {
|
|
27
|
+
|
|
28
|
+
/***************************************************************************
|
|
29
|
+
* AVX512 inheriting specializations for types without custom AVX512 code.
|
|
30
|
+
* These explicitly inherit the scalar (NONE) implementation so that
|
|
31
|
+
* every SIMDLevel has a concrete specialization.
|
|
32
|
+
***************************************************************************/
|
|
33
|
+
|
|
34
|
+
#define FAISS_INHERIT_HAMMING(Class) \
|
|
35
|
+
template <> \
|
|
36
|
+
struct Class##_tpl<SIMDLevel::AVX512> : Class##_tpl<SIMDLevel::NONE> { \
|
|
37
|
+
using Class##_tpl<SIMDLevel::NONE>::Class##_tpl; \
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
FAISS_INHERIT_HAMMING(HammingComputer16);
|
|
41
|
+
FAISS_INHERIT_HAMMING(HammingComputer20);
|
|
42
|
+
FAISS_INHERIT_HAMMING(HammingComputer32);
|
|
43
|
+
FAISS_INHERIT_HAMMING(GenHammingComputer8);
|
|
44
|
+
|
|
45
|
+
#undef FAISS_INHERIT_HAMMING
|
|
46
|
+
|
|
47
|
+
/***************************************************************************
|
|
48
|
+
* Custom AVX512 specializations.
|
|
49
|
+
***************************************************************************/
|
|
50
|
+
|
|
51
|
+
template <>
|
|
52
|
+
struct HammingComputer64_tpl<SIMDLevel::AVX512> {
|
|
53
|
+
uint64_t a0, a1, a2, a3, a4, a5, a6, a7;
|
|
54
|
+
const uint64_t* a;
|
|
55
|
+
|
|
56
|
+
HammingComputer64_tpl() {}
|
|
57
|
+
|
|
58
|
+
HammingComputer64_tpl(const uint8_t* a8, int code_size) {
|
|
59
|
+
set(a8, code_size);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
void set(const uint8_t* a8, FAISS_MAYBE_UNUSED int code_size) {
|
|
63
|
+
assert(code_size == 64);
|
|
64
|
+
a = reinterpret_cast<const uint64_t*>(a8);
|
|
65
|
+
a0 = a[0];
|
|
66
|
+
a1 = a[1];
|
|
67
|
+
a2 = a[2];
|
|
68
|
+
a3 = a[3];
|
|
69
|
+
a4 = a[4];
|
|
70
|
+
a5 = a[5];
|
|
71
|
+
a6 = a[6];
|
|
72
|
+
a7 = a[7];
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
inline int hamming(const uint8_t* b8) const {
|
|
76
|
+
const uint64_t* b = reinterpret_cast<const uint64_t*>(b8);
|
|
77
|
+
#ifdef __AVX512VPOPCNTDQ__
|
|
78
|
+
__m512i vxor =
|
|
79
|
+
_mm512_xor_si512(_mm512_loadu_si512(a), _mm512_loadu_si512(b));
|
|
80
|
+
__m512i vpcnt = _mm512_popcnt_epi64(vxor);
|
|
81
|
+
// reduce performs better than adding the lower and higher parts
|
|
82
|
+
return _mm512_reduce_add_epi32(vpcnt);
|
|
83
|
+
#else
|
|
84
|
+
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
|
|
85
|
+
popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3) +
|
|
86
|
+
popcount64(b[4] ^ a4) + popcount64(b[5] ^ a5) +
|
|
87
|
+
popcount64(b[6] ^ a6) + popcount64(b[7] ^ a7);
|
|
88
|
+
#endif
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
inline static constexpr int get_code_size() {
|
|
92
|
+
return 64;
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
template <>
|
|
97
|
+
struct HammingComputerDefault_tpl<SIMDLevel::AVX512> {
|
|
98
|
+
const uint8_t* a8;
|
|
99
|
+
int quotient8;
|
|
100
|
+
int remainder8;
|
|
101
|
+
|
|
102
|
+
HammingComputerDefault_tpl() {}
|
|
103
|
+
|
|
104
|
+
HammingComputerDefault_tpl(const uint8_t* a8_in, int code_size) {
|
|
105
|
+
set(a8_in, code_size);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
void set(const uint8_t* a8_2, int code_size) {
|
|
109
|
+
this->a8 = a8_2;
|
|
110
|
+
quotient8 = code_size / 8;
|
|
111
|
+
remainder8 = code_size % 8;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
int hamming(const uint8_t* b8) const {
|
|
115
|
+
int accu = 0;
|
|
116
|
+
|
|
117
|
+
const uint64_t* a64 = reinterpret_cast<const uint64_t*>(a8);
|
|
118
|
+
const uint64_t* b64 = reinterpret_cast<const uint64_t*>(b8);
|
|
119
|
+
|
|
120
|
+
int i = 0;
|
|
121
|
+
#ifdef __AVX512VPOPCNTDQ__
|
|
122
|
+
int quotient64 = quotient8 / 8;
|
|
123
|
+
for (; i < quotient64; ++i) {
|
|
124
|
+
__m512i vxor = _mm512_xor_si512(
|
|
125
|
+
_mm512_loadu_si512(&a64[i * 8]),
|
|
126
|
+
_mm512_loadu_si512(&b64[i * 8]));
|
|
127
|
+
__m512i vpcnt = _mm512_popcnt_epi64(vxor);
|
|
128
|
+
// reduce performs better than adding the lower and higher parts
|
|
129
|
+
accu += _mm512_reduce_add_epi32(vpcnt);
|
|
130
|
+
}
|
|
131
|
+
i *= 8;
|
|
132
|
+
#endif
|
|
133
|
+
accu += hamming_popcount_tail(
|
|
134
|
+
a64, b64, i, quotient8, a8, b8, remainder8);
|
|
135
|
+
return accu;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
inline int get_code_size() const {
|
|
139
|
+
return quotient8 * 8 + remainder8;
|
|
140
|
+
}
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
// I'm not sure whether this version is faster of slower, tbh
|
|
144
|
+
// todo: test on different CPUs
|
|
145
|
+
template <>
|
|
146
|
+
struct GenHammingComputer16_tpl<SIMDLevel::AVX512> {
|
|
147
|
+
__m128i a;
|
|
148
|
+
|
|
149
|
+
GenHammingComputer16_tpl(
|
|
150
|
+
const uint8_t* a8,
|
|
151
|
+
FAISS_MAYBE_UNUSED int code_size) {
|
|
152
|
+
assert(code_size == 16);
|
|
153
|
+
a = _mm_loadu_si128((const __m128i_u*)a8);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
inline int hamming(const uint8_t* b8) const {
|
|
157
|
+
const __m128i b = _mm_loadu_si128((const __m128i_u*)b8);
|
|
158
|
+
const __m128i cmp = _mm_cmpeq_epi8(a, b);
|
|
159
|
+
const auto movemask = _mm_movemask_epi8(cmp);
|
|
160
|
+
return 16 - popcount32(movemask);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
inline static constexpr int get_code_size() {
|
|
164
|
+
return 16;
|
|
165
|
+
}
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
template <>
|
|
169
|
+
struct GenHammingComputer32_tpl<SIMDLevel::AVX512> {
|
|
170
|
+
__m256i a;
|
|
171
|
+
|
|
172
|
+
GenHammingComputer32_tpl(
|
|
173
|
+
const uint8_t* a8,
|
|
174
|
+
FAISS_MAYBE_UNUSED int code_size) {
|
|
175
|
+
assert(code_size == 32);
|
|
176
|
+
a = _mm256_loadu_si256((const __m256i_u*)a8);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
inline int hamming(const uint8_t* b8) const {
|
|
180
|
+
const __m256i b = _mm256_loadu_si256((const __m256i_u*)b8);
|
|
181
|
+
const __m256i cmp = _mm256_cmpeq_epi8(a, b);
|
|
182
|
+
const uint32_t movemask = _mm256_movemask_epi8(cmp);
|
|
183
|
+
return 32 - popcount32(movemask);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
inline static constexpr int get_code_size() {
|
|
187
|
+
return 32;
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
// A specialized version might be needed for the very long
|
|
192
|
+
// GenHamming code_size. In such a case, one may accumulate
|
|
193
|
+
// counts using _mm256_sub_epi8 and then compute a horizontal
|
|
194
|
+
// sum (using _mm256_sad_epu8, maybe, in blocks of no larger
|
|
195
|
+
// than 256 * 32 bytes).
|
|
196
|
+
|
|
197
|
+
template <>
|
|
198
|
+
struct GenHammingComputerM8_tpl<SIMDLevel::AVX512> {
|
|
199
|
+
const uint64_t* a;
|
|
200
|
+
int n;
|
|
201
|
+
|
|
202
|
+
GenHammingComputerM8_tpl(const uint8_t* a8, int code_size) {
|
|
203
|
+
assert(code_size % 8 == 0);
|
|
204
|
+
a = (uint64_t*)a8;
|
|
205
|
+
n = code_size / 8;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
int hamming(const uint8_t* b8) const {
|
|
209
|
+
const uint64_t* b = (uint64_t*)b8;
|
|
210
|
+
int accu = 0;
|
|
211
|
+
|
|
212
|
+
int i = 0;
|
|
213
|
+
int n4 = (n / 4) * 4;
|
|
214
|
+
for (; i < n4; i += 4) {
|
|
215
|
+
const __m256i av = _mm256_loadu_si256((const __m256i_u*)(a + i));
|
|
216
|
+
const __m256i bv = _mm256_loadu_si256((const __m256i_u*)(b + i));
|
|
217
|
+
const __m256i cmp = _mm256_cmpeq_epi8(av, bv);
|
|
218
|
+
const uint32_t movemask = _mm256_movemask_epi8(cmp);
|
|
219
|
+
accu += 32 - popcount32(movemask);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
for (; i < n; i++)
|
|
223
|
+
accu += generalized_hamming_64(a[i] ^ b[i]);
|
|
224
|
+
return accu;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
inline int get_code_size() const {
|
|
228
|
+
return n * 8;
|
|
229
|
+
}
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
} // namespace faiss
|
|
233
|
+
|
|
234
|
+
#endif
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifndef HAMMING_COMPUTER_GENERIC_H
|
|
9
|
+
#define HAMMING_COMPUTER_GENERIC_H
|
|
10
|
+
|
|
11
|
+
// Scalar (NONE) HammingComputer specializations and hamming_popcount_tail
|
|
12
|
+
// utility. No ISA-specific intrinsics. Per-ISA files (hamming_computer-avx2.h,
|
|
13
|
+
// etc.) include this file and inherit or override the NONE specializations.
|
|
14
|
+
|
|
15
|
+
#include <faiss/utils/hamming_distance/hamming_computer.h>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
/* Duff's device + byte remainder tail for HammingComputerDefault.
|
|
20
|
+
* Processes uint64 words starting at index i_start using popcount,
|
|
21
|
+
* then handles any remaining bytes via lookup table. */
|
|
22
|
+
inline int hamming_popcount_tail(
|
|
23
|
+
const uint64_t* a64,
|
|
24
|
+
const uint64_t* b64,
|
|
25
|
+
int i_start,
|
|
26
|
+
int quotient8,
|
|
27
|
+
const uint8_t* a8,
|
|
28
|
+
const uint8_t* b8,
|
|
29
|
+
int remainder8) {
|
|
30
|
+
int accu = 0;
|
|
31
|
+
int i = i_start;
|
|
32
|
+
int len = quotient8 - i_start;
|
|
33
|
+
switch (len & 7) {
|
|
34
|
+
default:
|
|
35
|
+
while (len > 7) {
|
|
36
|
+
len -= 8;
|
|
37
|
+
accu += popcount64(a64[i] ^ b64[i]);
|
|
38
|
+
i++;
|
|
39
|
+
[[fallthrough]];
|
|
40
|
+
case 7:
|
|
41
|
+
accu += popcount64(a64[i] ^ b64[i]);
|
|
42
|
+
i++;
|
|
43
|
+
[[fallthrough]];
|
|
44
|
+
case 6:
|
|
45
|
+
accu += popcount64(a64[i] ^ b64[i]);
|
|
46
|
+
i++;
|
|
47
|
+
[[fallthrough]];
|
|
48
|
+
case 5:
|
|
49
|
+
accu += popcount64(a64[i] ^ b64[i]);
|
|
50
|
+
i++;
|
|
51
|
+
[[fallthrough]];
|
|
52
|
+
case 4:
|
|
53
|
+
accu += popcount64(a64[i] ^ b64[i]);
|
|
54
|
+
i++;
|
|
55
|
+
[[fallthrough]];
|
|
56
|
+
case 3:
|
|
57
|
+
accu += popcount64(a64[i] ^ b64[i]);
|
|
58
|
+
i++;
|
|
59
|
+
[[fallthrough]];
|
|
60
|
+
case 2:
|
|
61
|
+
accu += popcount64(a64[i] ^ b64[i]);
|
|
62
|
+
i++;
|
|
63
|
+
[[fallthrough]];
|
|
64
|
+
case 1:
|
|
65
|
+
accu += popcount64(a64[i] ^ b64[i]);
|
|
66
|
+
i++;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (remainder8) {
|
|
70
|
+
const uint8_t* a = a8 + 8 * quotient8;
|
|
71
|
+
const uint8_t* b = b8 + 8 * quotient8;
|
|
72
|
+
if (remainder8 >= 4) {
|
|
73
|
+
accu += popcount32(*(uint32_t*)a ^ *(uint32_t*)b);
|
|
74
|
+
a += 4;
|
|
75
|
+
b += 4;
|
|
76
|
+
remainder8 -= 4;
|
|
77
|
+
}
|
|
78
|
+
if (remainder8 >= 2) {
|
|
79
|
+
accu += popcount32(*(uint16_t*)a ^ *(uint16_t*)b);
|
|
80
|
+
a += 2;
|
|
81
|
+
b += 2;
|
|
82
|
+
remainder8 -= 2;
|
|
83
|
+
}
|
|
84
|
+
if (remainder8 >= 1) {
|
|
85
|
+
accu += popcount32(*a ^ *b);
|
|
86
|
+
remainder8 -= 2;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return accu;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/***************************************************************************
|
|
93
|
+
* HammingComputer NONE specializations — scalar bodies.
|
|
94
|
+
* Per-ISA backend files (hamming_computer-avx512.h, hamming_computer-neon.h,
|
|
95
|
+
* etc.) provide their own specializations; those without custom code
|
|
96
|
+
* inherit from NONE.
|
|
97
|
+
***************************************************************************/
|
|
98
|
+
|
|
99
|
+
template <>
|
|
100
|
+
struct HammingComputer16_tpl<SIMDLevel::NONE> {
|
|
101
|
+
uint64_t a0, a1;
|
|
102
|
+
|
|
103
|
+
HammingComputer16_tpl() {}
|
|
104
|
+
|
|
105
|
+
HammingComputer16_tpl(const uint8_t* a8, int code_size) {
|
|
106
|
+
set(a8, code_size);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
void set(const uint8_t* a8, FAISS_MAYBE_UNUSED int code_size) {
|
|
110
|
+
assert(code_size == 16);
|
|
111
|
+
const uint64_t* a = reinterpret_cast<const uint64_t*>(a8);
|
|
112
|
+
a0 = a[0];
|
|
113
|
+
a1 = a[1];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
inline int hamming(const uint8_t* b8) const {
|
|
117
|
+
const uint64_t* b = reinterpret_cast<const uint64_t*>(b8);
|
|
118
|
+
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
inline static constexpr int get_code_size() {
|
|
122
|
+
return 16;
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
// when applied to an array, 1/2 of the 64-bit accesses are unaligned.
|
|
127
|
+
// This incurs a penalty of ~10% wrt. fully aligned accesses.
|
|
128
|
+
template <>
|
|
129
|
+
struct HammingComputer20_tpl<SIMDLevel::NONE> {
|
|
130
|
+
uint64_t a0, a1;
|
|
131
|
+
uint32_t a2;
|
|
132
|
+
|
|
133
|
+
HammingComputer20_tpl() {}
|
|
134
|
+
|
|
135
|
+
HammingComputer20_tpl(const uint8_t* a8, int code_size) {
|
|
136
|
+
set(a8, code_size);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
void set(const uint8_t* a8, FAISS_MAYBE_UNUSED int code_size) {
|
|
140
|
+
assert(code_size == 20);
|
|
141
|
+
const uint64_t* a = reinterpret_cast<const uint64_t*>(a8);
|
|
142
|
+
const uint32_t* a32 = reinterpret_cast<const uint32_t*>(a8);
|
|
143
|
+
a0 = a[0];
|
|
144
|
+
a1 = a[1];
|
|
145
|
+
// can't read a[2] since it is uint64_t, not uint32_t
|
|
146
|
+
// results in AddressSanitizer failure reading past end of array
|
|
147
|
+
a2 = a32[4];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
inline int hamming(const uint8_t* b8) const {
|
|
151
|
+
const uint64_t* b = reinterpret_cast<const uint64_t*>(b8);
|
|
152
|
+
const uint32_t* b32_tail = reinterpret_cast<const uint32_t*>(b + 2);
|
|
153
|
+
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
|
|
154
|
+
popcount64(*b32_tail ^ a2);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
inline static constexpr int get_code_size() {
|
|
158
|
+
return 20;
|
|
159
|
+
}
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
template <>
|
|
163
|
+
struct HammingComputer32_tpl<SIMDLevel::NONE> {
|
|
164
|
+
uint64_t a0, a1, a2, a3;
|
|
165
|
+
|
|
166
|
+
HammingComputer32_tpl() {}
|
|
167
|
+
|
|
168
|
+
HammingComputer32_tpl(const uint8_t* a8, int code_size) {
|
|
169
|
+
set(a8, code_size);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
void set(const uint8_t* a8, FAISS_MAYBE_UNUSED int code_size) {
|
|
173
|
+
assert(code_size == 32);
|
|
174
|
+
const uint64_t* a = reinterpret_cast<const uint64_t*>(a8);
|
|
175
|
+
a0 = a[0];
|
|
176
|
+
a1 = a[1];
|
|
177
|
+
a2 = a[2];
|
|
178
|
+
a3 = a[3];
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
inline int hamming(const uint8_t* b8) const {
|
|
182
|
+
const uint64_t* b = reinterpret_cast<const uint64_t*>(b8);
|
|
183
|
+
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
|
|
184
|
+
popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
inline static constexpr int get_code_size() {
|
|
188
|
+
return 32;
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
template <>
|
|
193
|
+
struct GenHammingComputer8_tpl<SIMDLevel::NONE> {
|
|
194
|
+
uint64_t a0;
|
|
195
|
+
|
|
196
|
+
GenHammingComputer8_tpl(
|
|
197
|
+
const uint8_t* a,
|
|
198
|
+
FAISS_MAYBE_UNUSED int code_size) {
|
|
199
|
+
assert(code_size == 8);
|
|
200
|
+
const uint64_t* a64 = reinterpret_cast<const uint64_t*>(a);
|
|
201
|
+
a0 = *a64;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
inline int hamming(const uint8_t* b) const {
|
|
205
|
+
const uint64_t* b64 = reinterpret_cast<const uint64_t*>(b);
|
|
206
|
+
return generalized_hamming_64(*b64 ^ a0);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
inline static constexpr int get_code_size() {
|
|
210
|
+
return 8;
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
/***************************************************************************
|
|
215
|
+
* Scalar HammingComputer64 and HammingComputerDefault NONE specializations.
|
|
216
|
+
* AVX512 and NEON override via per-ISA specializations.
|
|
217
|
+
***************************************************************************/
|
|
218
|
+
|
|
219
|
+
template <>
|
|
220
|
+
struct HammingComputer64_tpl<SIMDLevel::NONE> {
|
|
221
|
+
uint64_t a0, a1, a2, a3, a4, a5, a6, a7;
|
|
222
|
+
|
|
223
|
+
HammingComputer64_tpl() {}
|
|
224
|
+
|
|
225
|
+
HammingComputer64_tpl(const uint8_t* a8, int code_size) {
|
|
226
|
+
set(a8, code_size);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
void set(const uint8_t* a8, FAISS_MAYBE_UNUSED int code_size) {
|
|
230
|
+
assert(code_size == 64);
|
|
231
|
+
const uint64_t* a = reinterpret_cast<const uint64_t*>(a8);
|
|
232
|
+
a0 = a[0];
|
|
233
|
+
a1 = a[1];
|
|
234
|
+
a2 = a[2];
|
|
235
|
+
a3 = a[3];
|
|
236
|
+
a4 = a[4];
|
|
237
|
+
a5 = a[5];
|
|
238
|
+
a6 = a[6];
|
|
239
|
+
a7 = a[7];
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
inline int hamming(const uint8_t* b8) const {
|
|
243
|
+
const uint64_t* b = reinterpret_cast<const uint64_t*>(b8);
|
|
244
|
+
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
|
|
245
|
+
popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3) +
|
|
246
|
+
popcount64(b[4] ^ a4) + popcount64(b[5] ^ a5) +
|
|
247
|
+
popcount64(b[6] ^ a6) + popcount64(b[7] ^ a7);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
inline static constexpr int get_code_size() {
|
|
251
|
+
return 64;
|
|
252
|
+
}
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
template <>
|
|
256
|
+
struct HammingComputerDefault_tpl<SIMDLevel::NONE> {
|
|
257
|
+
const uint8_t* a8;
|
|
258
|
+
int quotient8;
|
|
259
|
+
int remainder8;
|
|
260
|
+
|
|
261
|
+
HammingComputerDefault_tpl() {}
|
|
262
|
+
|
|
263
|
+
HammingComputerDefault_tpl(const uint8_t* a8_in, int code_size) {
|
|
264
|
+
set(a8_in, code_size);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
void set(const uint8_t* a8_in, int code_size) {
|
|
268
|
+
this->a8 = a8_in;
|
|
269
|
+
quotient8 = code_size / 8;
|
|
270
|
+
remainder8 = code_size % 8;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
int hamming(const uint8_t* b8) const {
|
|
274
|
+
const uint64_t* a64 = reinterpret_cast<const uint64_t*>(a8);
|
|
275
|
+
const uint64_t* b64 = reinterpret_cast<const uint64_t*>(b8);
|
|
276
|
+
return hamming_popcount_tail(
|
|
277
|
+
a64, b64, 0, quotient8, a8, b8, remainder8);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
inline int get_code_size() const {
|
|
281
|
+
return quotient8 * 8 + remainder8;
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
/***************************************************************************
|
|
286
|
+
* Generalized HammingComputer NONE specializations (scalar bodies).
|
|
287
|
+
* AVX2/AVX512/NEON override via per-ISA specializations.
|
|
288
|
+
***************************************************************************/
|
|
289
|
+
|
|
290
|
+
template <>
|
|
291
|
+
struct GenHammingComputer16_tpl<SIMDLevel::NONE> {
|
|
292
|
+
uint64_t a0, a1;
|
|
293
|
+
|
|
294
|
+
GenHammingComputer16_tpl(
|
|
295
|
+
const uint8_t* a8,
|
|
296
|
+
FAISS_MAYBE_UNUSED int code_size) {
|
|
297
|
+
assert(code_size == 16);
|
|
298
|
+
const uint64_t* a = reinterpret_cast<const uint64_t*>(a8);
|
|
299
|
+
a0 = a[0];
|
|
300
|
+
a1 = a[1];
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
inline int hamming(const uint8_t* b8) const {
|
|
304
|
+
const uint64_t* b = reinterpret_cast<const uint64_t*>(b8);
|
|
305
|
+
return generalized_hamming_64(b[0] ^ a0) +
|
|
306
|
+
generalized_hamming_64(b[1] ^ a1);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
inline static constexpr int get_code_size() {
|
|
310
|
+
return 16;
|
|
311
|
+
}
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
template <>
|
|
315
|
+
struct GenHammingComputer32_tpl<SIMDLevel::NONE> {
|
|
316
|
+
uint64_t a0, a1, a2, a3;
|
|
317
|
+
|
|
318
|
+
GenHammingComputer32_tpl(
|
|
319
|
+
const uint8_t* a8,
|
|
320
|
+
FAISS_MAYBE_UNUSED int code_size) {
|
|
321
|
+
assert(code_size == 32);
|
|
322
|
+
const uint64_t* a = reinterpret_cast<const uint64_t*>(a8);
|
|
323
|
+
a0 = a[0];
|
|
324
|
+
a1 = a[1];
|
|
325
|
+
a2 = a[2];
|
|
326
|
+
a3 = a[3];
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
inline int hamming(const uint8_t* b8) const {
|
|
330
|
+
const uint64_t* b = reinterpret_cast<const uint64_t*>(b8);
|
|
331
|
+
return generalized_hamming_64(b[0] ^ a0) +
|
|
332
|
+
generalized_hamming_64(b[1] ^ a1) +
|
|
333
|
+
generalized_hamming_64(b[2] ^ a2) +
|
|
334
|
+
generalized_hamming_64(b[3] ^ a3);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
inline static constexpr int get_code_size() {
|
|
338
|
+
return 32;
|
|
339
|
+
}
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
template <>
|
|
343
|
+
struct GenHammingComputerM8_tpl<SIMDLevel::NONE> {
|
|
344
|
+
const uint64_t* a;
|
|
345
|
+
int n;
|
|
346
|
+
|
|
347
|
+
GenHammingComputerM8_tpl(const uint8_t* a8, int code_size) {
|
|
348
|
+
assert(code_size % 8 == 0);
|
|
349
|
+
a = reinterpret_cast<const uint64_t*>(a8);
|
|
350
|
+
n = code_size / 8;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
int hamming(const uint8_t* b8) const {
|
|
354
|
+
const uint64_t* b = reinterpret_cast<const uint64_t*>(b8);
|
|
355
|
+
int accu = 0;
|
|
356
|
+
for (int i = 0; i < n; i++)
|
|
357
|
+
accu += generalized_hamming_64(a[i] ^ b[i]);
|
|
358
|
+
return accu;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
inline int get_code_size() const {
|
|
362
|
+
return n * 8;
|
|
363
|
+
}
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
} // namespace faiss
|
|
367
|
+
|
|
368
|
+
#endif
|