faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -1,235 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
*
|
|
4
|
-
* This source code is licensed under the MIT license found in the
|
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
#pragma once
|
|
9
|
-
|
|
10
|
-
/** In this file are the implementations of extra metrics beyond L2
|
|
11
|
-
* and inner product */
|
|
12
|
-
|
|
13
|
-
#include <faiss/MetricType.h>
|
|
14
|
-
#include <faiss/impl/FaissAssert.h>
|
|
15
|
-
#include <faiss/impl/simd_dispatch.h>
|
|
16
|
-
#include <faiss/utils/distances.h>
|
|
17
|
-
#include <cmath>
|
|
18
|
-
#include <type_traits>
|
|
19
|
-
|
|
20
|
-
namespace faiss {
|
|
21
|
-
|
|
22
|
-
/***************************************************************************
|
|
23
|
-
* VectorDistance base class - contains common data members and type defs
|
|
24
|
-
**************************************************************************/
|
|
25
|
-
|
|
26
|
-
template <MetricType mt>
|
|
27
|
-
struct VectorDistanceBase {
|
|
28
|
-
size_t d;
|
|
29
|
-
float metric_arg;
|
|
30
|
-
static constexpr MetricType metric = mt;
|
|
31
|
-
static constexpr bool is_similarity = is_similarity_metric(mt);
|
|
32
|
-
|
|
33
|
-
using C = typename std::conditional<
|
|
34
|
-
is_similarity_metric(mt),
|
|
35
|
-
CMin<float, int64_t>,
|
|
36
|
-
CMax<float, int64_t>>::type;
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
/***************************************************************************
|
|
40
|
-
* VectorDistance struct template - specializations for each metric type
|
|
41
|
-
**************************************************************************/
|
|
42
|
-
|
|
43
|
-
template <MetricType mt, SIMDLevel level>
|
|
44
|
-
struct VectorDistance : VectorDistanceBase<mt> {
|
|
45
|
-
inline float operator()(const float* x, const float* y) const;
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
template <SIMDLevel level>
|
|
49
|
-
struct VectorDistance<METRIC_L2, level> : VectorDistanceBase<METRIC_L2> {
|
|
50
|
-
inline float operator()(const float* x, const float* y) const {
|
|
51
|
-
return fvec_L2sqr<level>(x, y, this->d);
|
|
52
|
-
}
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
template <SIMDLevel level>
|
|
56
|
-
struct VectorDistance<METRIC_INNER_PRODUCT, level>
|
|
57
|
-
: VectorDistanceBase<METRIC_INNER_PRODUCT> {
|
|
58
|
-
inline float operator()(const float* x, const float* y) const {
|
|
59
|
-
return fvec_inner_product<level>(x, y, this->d);
|
|
60
|
-
}
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
template <SIMDLevel level>
|
|
64
|
-
struct VectorDistance<METRIC_L1, level> : VectorDistanceBase<METRIC_L1> {
|
|
65
|
-
inline float operator()(const float* x, const float* y) const {
|
|
66
|
-
return fvec_L1<level>(x, y, this->d);
|
|
67
|
-
}
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
template <SIMDLevel level>
|
|
71
|
-
struct VectorDistance<METRIC_Linf, level> : VectorDistanceBase<METRIC_Linf> {
|
|
72
|
-
inline float operator()(const float* x, const float* y) const {
|
|
73
|
-
return fvec_Linf<level>(x, y, this->d);
|
|
74
|
-
}
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
template <>
|
|
78
|
-
struct VectorDistance<METRIC_Lp, SIMDLevel::NONE>
|
|
79
|
-
: VectorDistanceBase<METRIC_Lp> {
|
|
80
|
-
inline float operator()(const float* x, const float* y) const {
|
|
81
|
-
float accu = 0;
|
|
82
|
-
for (size_t i = 0; i < this->d; i++) {
|
|
83
|
-
float diff = fabs(x[i] - y[i]);
|
|
84
|
-
accu += powf(diff, this->metric_arg);
|
|
85
|
-
}
|
|
86
|
-
return accu;
|
|
87
|
-
}
|
|
88
|
-
};
|
|
89
|
-
|
|
90
|
-
template <>
|
|
91
|
-
struct VectorDistance<METRIC_Canberra, SIMDLevel::NONE>
|
|
92
|
-
: VectorDistanceBase<METRIC_Canberra> {
|
|
93
|
-
inline float operator()(const float* x, const float* y) const {
|
|
94
|
-
float accu = 0;
|
|
95
|
-
for (size_t i = 0; i < this->d; i++) {
|
|
96
|
-
float xi = x[i], yi = y[i];
|
|
97
|
-
accu += fabs(xi - yi) / (fabs(xi) + fabs(yi));
|
|
98
|
-
}
|
|
99
|
-
return accu;
|
|
100
|
-
}
|
|
101
|
-
};
|
|
102
|
-
|
|
103
|
-
template <>
|
|
104
|
-
struct VectorDistance<METRIC_BrayCurtis, SIMDLevel::NONE>
|
|
105
|
-
: VectorDistanceBase<METRIC_BrayCurtis> {
|
|
106
|
-
inline float operator()(const float* x, const float* y) const {
|
|
107
|
-
float accu_num = 0, accu_den = 0;
|
|
108
|
-
for (size_t i = 0; i < this->d; i++) {
|
|
109
|
-
float xi = x[i], yi = y[i];
|
|
110
|
-
accu_num += fabs(xi - yi);
|
|
111
|
-
accu_den += fabs(xi + yi);
|
|
112
|
-
}
|
|
113
|
-
return accu_num / accu_den;
|
|
114
|
-
}
|
|
115
|
-
};
|
|
116
|
-
|
|
117
|
-
template <>
|
|
118
|
-
struct VectorDistance<METRIC_JensenShannon, SIMDLevel::NONE>
|
|
119
|
-
: VectorDistanceBase<METRIC_JensenShannon> {
|
|
120
|
-
inline float operator()(const float* x, const float* y) const {
|
|
121
|
-
float accu = 0;
|
|
122
|
-
for (size_t i = 0; i < this->d; i++) {
|
|
123
|
-
float xi = x[i], yi = y[i];
|
|
124
|
-
float mi = 0.5 * (xi + yi);
|
|
125
|
-
float kl1 = -xi * log(mi / xi);
|
|
126
|
-
float kl2 = -yi * log(mi / yi);
|
|
127
|
-
accu += kl1 + kl2;
|
|
128
|
-
}
|
|
129
|
-
return 0.5 * accu;
|
|
130
|
-
}
|
|
131
|
-
};
|
|
132
|
-
|
|
133
|
-
template <>
|
|
134
|
-
struct VectorDistance<METRIC_Jaccard, SIMDLevel::NONE>
|
|
135
|
-
: VectorDistanceBase<METRIC_Jaccard> {
|
|
136
|
-
inline float operator()(const float* x, const float* y) const {
|
|
137
|
-
// WARNING: this distance is defined only for positive input vectors.
|
|
138
|
-
// Providing vectors with negative values would lead to incorrect
|
|
139
|
-
// results.
|
|
140
|
-
float accu_num = 0, accu_den = 0;
|
|
141
|
-
for (size_t i = 0; i < this->d; i++) {
|
|
142
|
-
accu_num += fmin(x[i], y[i]);
|
|
143
|
-
accu_den += fmax(x[i], y[i]);
|
|
144
|
-
}
|
|
145
|
-
return accu_num / accu_den;
|
|
146
|
-
}
|
|
147
|
-
};
|
|
148
|
-
|
|
149
|
-
template <>
|
|
150
|
-
struct VectorDistance<METRIC_NaNEuclidean, SIMDLevel::NONE>
|
|
151
|
-
: VectorDistanceBase<METRIC_NaNEuclidean> {
|
|
152
|
-
inline float operator()(const float* x, const float* y) const {
|
|
153
|
-
// https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.nan_euclidean_distances.html
|
|
154
|
-
float accu = 0;
|
|
155
|
-
size_t present = 0;
|
|
156
|
-
for (size_t i = 0; i < this->d; i++) {
|
|
157
|
-
if (!std::isnan(x[i]) && !std::isnan(y[i])) {
|
|
158
|
-
float diff = x[i] - y[i];
|
|
159
|
-
accu += diff * diff;
|
|
160
|
-
present++;
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
if (present == 0) {
|
|
164
|
-
return NAN;
|
|
165
|
-
}
|
|
166
|
-
return float(this->d) / float(present) * accu;
|
|
167
|
-
}
|
|
168
|
-
};
|
|
169
|
-
|
|
170
|
-
template <>
|
|
171
|
-
struct VectorDistance<METRIC_GOWER, SIMDLevel::NONE>
|
|
172
|
-
: VectorDistanceBase<METRIC_GOWER> {
|
|
173
|
-
inline float operator()(const float* x, const float* y) const {
|
|
174
|
-
float accu = 0;
|
|
175
|
-
size_t valid_dims = 0;
|
|
176
|
-
|
|
177
|
-
for (size_t i = 0; i < this->d; i++) {
|
|
178
|
-
if (std::isnan(x[i]) || std::isnan(y[i])) {
|
|
179
|
-
continue;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
if (x[i] >= 0 && y[i] >= 0) {
|
|
183
|
-
if (x[i] > 1 || y[i] > 1) {
|
|
184
|
-
return std::numeric_limits<float>::quiet_NaN();
|
|
185
|
-
}
|
|
186
|
-
accu += fabs(x[i] - y[i]);
|
|
187
|
-
} else if (x[i] < 0 && y[i] < 0) {
|
|
188
|
-
accu += float(int(x[i] != y[i]));
|
|
189
|
-
} else {
|
|
190
|
-
return std::numeric_limits<float>::quiet_NaN();
|
|
191
|
-
}
|
|
192
|
-
valid_dims++;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
if (valid_dims == 0) {
|
|
196
|
-
return std::numeric_limits<float>::quiet_NaN();
|
|
197
|
-
}
|
|
198
|
-
return accu / valid_dims;
|
|
199
|
-
}
|
|
200
|
-
};
|
|
201
|
-
|
|
202
|
-
/***************************************************************************
|
|
203
|
-
* Dispatching function that takes a lambda directly.
|
|
204
|
-
* The lambda should be templated on VectorDistance, eg.:
|
|
205
|
-
*
|
|
206
|
-
* auto result = with_VectorDistance(
|
|
207
|
-
* metric, metric_arg, [&]<class VD>(VD vd) {
|
|
208
|
-
* return vd(x, y);
|
|
209
|
-
* });
|
|
210
|
-
**************************************************************************/
|
|
211
|
-
|
|
212
|
-
template <typename LambdaType>
|
|
213
|
-
auto with_VectorDistance(
|
|
214
|
-
size_t d,
|
|
215
|
-
MetricType metric,
|
|
216
|
-
float metric_arg,
|
|
217
|
-
LambdaType&& action) {
|
|
218
|
-
auto dispatch_metric = [&]<MetricType mt>() {
|
|
219
|
-
auto call = [&]<SIMDLevel level>() {
|
|
220
|
-
VectorDistance<mt, level> vd = {d, metric_arg};
|
|
221
|
-
return action(vd);
|
|
222
|
-
};
|
|
223
|
-
|
|
224
|
-
constexpr bool has_simd = mt == METRIC_INNER_PRODUCT ||
|
|
225
|
-
mt == METRIC_L2 || mt == METRIC_L1 || mt == METRIC_Linf;
|
|
226
|
-
if constexpr (!has_simd) {
|
|
227
|
-
return call.template operator()<SIMDLevel::NONE>();
|
|
228
|
-
} else {
|
|
229
|
-
DISPATCH_SIMDLevel(call.template operator());
|
|
230
|
-
}
|
|
231
|
-
};
|
|
232
|
-
return with_metric_type(metric, dispatch_metric);
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
} // namespace faiss
|
|
@@ -1,462 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
-
*
|
|
4
|
-
* This source code is licensed under the MIT license found in the
|
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
#ifndef HAMMING_AVX2_INL_H
|
|
9
|
-
#define HAMMING_AVX2_INL_H
|
|
10
|
-
|
|
11
|
-
// AVX2 version
|
|
12
|
-
|
|
13
|
-
#include <cassert>
|
|
14
|
-
#include <cstddef>
|
|
15
|
-
#include <cstdint>
|
|
16
|
-
|
|
17
|
-
#include <faiss/impl/platform_macros.h>
|
|
18
|
-
|
|
19
|
-
#include <immintrin.h>
|
|
20
|
-
|
|
21
|
-
namespace faiss {
|
|
22
|
-
|
|
23
|
-
/* Elementary Hamming distance computation: unoptimized */
|
|
24
|
-
template <size_t nbits, typename T>
|
|
25
|
-
inline T hamming(const uint8_t* bs1, const uint8_t* bs2) {
|
|
26
|
-
const size_t nbytes = nbits / 8;
|
|
27
|
-
size_t i;
|
|
28
|
-
T h = 0;
|
|
29
|
-
for (i = 0; i < nbytes; i++) {
|
|
30
|
-
h += (T)hamdis_tab_ham_bytes[bs1[i] ^ bs2[i]];
|
|
31
|
-
}
|
|
32
|
-
return h;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
/* Hamming distances for multiples of 64 bits */
|
|
36
|
-
template <size_t nbits>
|
|
37
|
-
inline hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2) {
|
|
38
|
-
const size_t nwords = nbits / 64;
|
|
39
|
-
size_t i;
|
|
40
|
-
hamdis_t h = 0;
|
|
41
|
-
for (i = 0; i < nwords; i++) {
|
|
42
|
-
h += popcount64(bs1[i] ^ bs2[i]);
|
|
43
|
-
}
|
|
44
|
-
return h;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/* specialized (optimized) functions */
|
|
48
|
-
template <>
|
|
49
|
-
inline hamdis_t hamming<64>(const uint64_t* pa, const uint64_t* pb) {
|
|
50
|
-
return popcount64(pa[0] ^ pb[0]);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
template <>
|
|
54
|
-
inline hamdis_t hamming<128>(const uint64_t* pa, const uint64_t* pb) {
|
|
55
|
-
return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
template <>
|
|
59
|
-
inline hamdis_t hamming<256>(const uint64_t* pa, const uint64_t* pb) {
|
|
60
|
-
return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]) +
|
|
61
|
-
popcount64(pa[2] ^ pb[2]) + popcount64(pa[3] ^ pb[3]);
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
/* Hamming distances for multiple of 64 bits */
|
|
65
|
-
inline hamdis_t hamming(
|
|
66
|
-
const uint64_t* bs1,
|
|
67
|
-
const uint64_t* bs2,
|
|
68
|
-
size_t nwords) {
|
|
69
|
-
hamdis_t h = 0;
|
|
70
|
-
for (size_t i = 0; i < nwords; i++) {
|
|
71
|
-
h += popcount64(bs1[i] ^ bs2[i]);
|
|
72
|
-
}
|
|
73
|
-
return h;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
/******************************************************************
|
|
77
|
-
* The HammingComputer series of classes compares a single code of
|
|
78
|
-
* size 4 to 32 to incoming codes. They are intended for use as a
|
|
79
|
-
* template class where it would be inefficient to switch on the code
|
|
80
|
-
* size in the inner loop. Hopefully the compiler will inline the
|
|
81
|
-
* hamming() functions and put the a0, a1, ... in registers.
|
|
82
|
-
******************************************************************/
|
|
83
|
-
|
|
84
|
-
struct HammingComputer4 {
|
|
85
|
-
uint32_t a0;
|
|
86
|
-
|
|
87
|
-
HammingComputer4() {}
|
|
88
|
-
|
|
89
|
-
HammingComputer4(const uint8_t* a, int code_size) {
|
|
90
|
-
set(a, code_size);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
void set(const uint8_t* a, int code_size) {
|
|
94
|
-
assert(code_size == 4);
|
|
95
|
-
a0 = *(uint32_t*)a;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
inline int hamming(const uint8_t* b) const {
|
|
99
|
-
return popcount64(*(uint32_t*)b ^ a0);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
inline static constexpr int get_code_size() {
|
|
103
|
-
return 4;
|
|
104
|
-
}
|
|
105
|
-
};
|
|
106
|
-
|
|
107
|
-
struct HammingComputer8 {
|
|
108
|
-
uint64_t a0;
|
|
109
|
-
|
|
110
|
-
HammingComputer8() {}
|
|
111
|
-
|
|
112
|
-
HammingComputer8(const uint8_t* a, int code_size) {
|
|
113
|
-
set(a, code_size);
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
void set(const uint8_t* a, int code_size) {
|
|
117
|
-
assert(code_size == 8);
|
|
118
|
-
a0 = *(uint64_t*)a;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
inline int hamming(const uint8_t* b) const {
|
|
122
|
-
return popcount64(*(uint64_t*)b ^ a0);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
inline static constexpr int get_code_size() {
|
|
126
|
-
return 8;
|
|
127
|
-
}
|
|
128
|
-
};
|
|
129
|
-
|
|
130
|
-
struct HammingComputer16 {
|
|
131
|
-
uint64_t a0, a1;
|
|
132
|
-
|
|
133
|
-
HammingComputer16() {}
|
|
134
|
-
|
|
135
|
-
HammingComputer16(const uint8_t* a8, int code_size) {
|
|
136
|
-
set(a8, code_size);
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
void set(const uint8_t* a8, int code_size) {
|
|
140
|
-
assert(code_size == 16);
|
|
141
|
-
const uint64_t* a = (uint64_t*)a8;
|
|
142
|
-
a0 = a[0];
|
|
143
|
-
a1 = a[1];
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
inline int hamming(const uint8_t* b8) const {
|
|
147
|
-
const uint64_t* b = (uint64_t*)b8;
|
|
148
|
-
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1);
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
inline static constexpr int get_code_size() {
|
|
152
|
-
return 16;
|
|
153
|
-
}
|
|
154
|
-
};
|
|
155
|
-
|
|
156
|
-
// when applied to an array, 1/2 of the 64-bit accesses are unaligned.
|
|
157
|
-
// This incurs a penalty of ~10% wrt. fully aligned accesses.
|
|
158
|
-
struct HammingComputer20 {
|
|
159
|
-
uint64_t a0, a1;
|
|
160
|
-
uint32_t a2;
|
|
161
|
-
|
|
162
|
-
HammingComputer20() {}
|
|
163
|
-
|
|
164
|
-
HammingComputer20(const uint8_t* a8, int code_size) {
|
|
165
|
-
set(a8, code_size);
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
void set(const uint8_t* a8, int code_size) {
|
|
169
|
-
assert(code_size == 20);
|
|
170
|
-
const uint64_t* a = (uint64_t*)a8;
|
|
171
|
-
a0 = a[0];
|
|
172
|
-
a1 = a[1];
|
|
173
|
-
a2 = a[2];
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
inline int hamming(const uint8_t* b8) const {
|
|
177
|
-
const uint64_t* b = (uint64_t*)b8;
|
|
178
|
-
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
|
|
179
|
-
popcount64(*(uint32_t*)(b + 2) ^ a2);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
inline static constexpr int get_code_size() {
|
|
183
|
-
return 20;
|
|
184
|
-
}
|
|
185
|
-
};
|
|
186
|
-
|
|
187
|
-
struct HammingComputer32 {
|
|
188
|
-
uint64_t a0, a1, a2, a3;
|
|
189
|
-
|
|
190
|
-
HammingComputer32() {}
|
|
191
|
-
|
|
192
|
-
HammingComputer32(const uint8_t* a8, int code_size) {
|
|
193
|
-
set(a8, code_size);
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
void set(const uint8_t* a8, int code_size) {
|
|
197
|
-
assert(code_size == 32);
|
|
198
|
-
const uint64_t* a = (uint64_t*)a8;
|
|
199
|
-
a0 = a[0];
|
|
200
|
-
a1 = a[1];
|
|
201
|
-
a2 = a[2];
|
|
202
|
-
a3 = a[3];
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
inline int hamming(const uint8_t* b8) const {
|
|
206
|
-
const uint64_t* b = (uint64_t*)b8;
|
|
207
|
-
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
|
|
208
|
-
popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3);
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
inline static constexpr int get_code_size() {
|
|
212
|
-
return 32;
|
|
213
|
-
}
|
|
214
|
-
};
|
|
215
|
-
|
|
216
|
-
struct HammingComputer64 {
|
|
217
|
-
uint64_t a0, a1, a2, a3, a4, a5, a6, a7;
|
|
218
|
-
|
|
219
|
-
HammingComputer64() {}
|
|
220
|
-
|
|
221
|
-
HammingComputer64(const uint8_t* a8, int code_size) {
|
|
222
|
-
set(a8, code_size);
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
void set(const uint8_t* a8, int code_size) {
|
|
226
|
-
assert(code_size == 64);
|
|
227
|
-
const uint64_t* a = (uint64_t*)a8;
|
|
228
|
-
a0 = a[0];
|
|
229
|
-
a1 = a[1];
|
|
230
|
-
a2 = a[2];
|
|
231
|
-
a3 = a[3];
|
|
232
|
-
a4 = a[4];
|
|
233
|
-
a5 = a[5];
|
|
234
|
-
a6 = a[6];
|
|
235
|
-
a7 = a[7];
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
inline int hamming(const uint8_t* b8) const {
|
|
239
|
-
const uint64_t* b = (uint64_t*)b8;
|
|
240
|
-
return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
|
|
241
|
-
popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3) +
|
|
242
|
-
popcount64(b[4] ^ a4) + popcount64(b[5] ^ a5) +
|
|
243
|
-
popcount64(b[6] ^ a6) + popcount64(b[7] ^ a7);
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
inline static constexpr int get_code_size() {
|
|
247
|
-
return 64;
|
|
248
|
-
}
|
|
249
|
-
};
|
|
250
|
-
|
|
251
|
-
struct HammingComputerDefault {
|
|
252
|
-
const uint8_t* a8;
|
|
253
|
-
int quotient8;
|
|
254
|
-
int remainder8;
|
|
255
|
-
|
|
256
|
-
HammingComputerDefault() {}
|
|
257
|
-
|
|
258
|
-
HammingComputerDefault(const uint8_t* a8, int code_size) {
|
|
259
|
-
set(a8, code_size);
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
void set(const uint8_t* a8_2, int code_size) {
|
|
263
|
-
this->a8 = a8_2;
|
|
264
|
-
quotient8 = code_size / 8;
|
|
265
|
-
remainder8 = code_size % 8;
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
int hamming(const uint8_t* b8) const {
|
|
269
|
-
int accu = 0;
|
|
270
|
-
|
|
271
|
-
const uint64_t* a64 = reinterpret_cast<const uint64_t*>(a8);
|
|
272
|
-
const uint64_t* b64 = reinterpret_cast<const uint64_t*>(b8);
|
|
273
|
-
int i = 0, len = quotient8;
|
|
274
|
-
switch (len & 7) {
|
|
275
|
-
default:
|
|
276
|
-
while (len > 7) {
|
|
277
|
-
len -= 8;
|
|
278
|
-
accu += popcount64(a64[i] ^ b64[i]);
|
|
279
|
-
i++;
|
|
280
|
-
[[fallthrough]];
|
|
281
|
-
case 7:
|
|
282
|
-
accu += popcount64(a64[i] ^ b64[i]);
|
|
283
|
-
i++;
|
|
284
|
-
[[fallthrough]];
|
|
285
|
-
case 6:
|
|
286
|
-
accu += popcount64(a64[i] ^ b64[i]);
|
|
287
|
-
i++;
|
|
288
|
-
[[fallthrough]];
|
|
289
|
-
case 5:
|
|
290
|
-
accu += popcount64(a64[i] ^ b64[i]);
|
|
291
|
-
i++;
|
|
292
|
-
[[fallthrough]];
|
|
293
|
-
case 4:
|
|
294
|
-
accu += popcount64(a64[i] ^ b64[i]);
|
|
295
|
-
i++;
|
|
296
|
-
[[fallthrough]];
|
|
297
|
-
case 3:
|
|
298
|
-
accu += popcount64(a64[i] ^ b64[i]);
|
|
299
|
-
i++;
|
|
300
|
-
[[fallthrough]];
|
|
301
|
-
case 2:
|
|
302
|
-
accu += popcount64(a64[i] ^ b64[i]);
|
|
303
|
-
i++;
|
|
304
|
-
[[fallthrough]];
|
|
305
|
-
case 1:
|
|
306
|
-
accu += popcount64(a64[i] ^ b64[i]);
|
|
307
|
-
i++;
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
if (remainder8) {
|
|
311
|
-
const uint8_t* a = a8 + 8 * quotient8;
|
|
312
|
-
const uint8_t* b = b8 + 8 * quotient8;
|
|
313
|
-
switch (remainder8) {
|
|
314
|
-
case 7:
|
|
315
|
-
accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
|
|
316
|
-
[[fallthrough]];
|
|
317
|
-
case 6:
|
|
318
|
-
accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
|
|
319
|
-
[[fallthrough]];
|
|
320
|
-
case 5:
|
|
321
|
-
accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
|
|
322
|
-
[[fallthrough]];
|
|
323
|
-
case 4:
|
|
324
|
-
accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
|
|
325
|
-
[[fallthrough]];
|
|
326
|
-
case 3:
|
|
327
|
-
accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
|
|
328
|
-
[[fallthrough]];
|
|
329
|
-
case 2:
|
|
330
|
-
accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
|
|
331
|
-
[[fallthrough]];
|
|
332
|
-
case 1:
|
|
333
|
-
accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
|
|
334
|
-
[[fallthrough]];
|
|
335
|
-
default:
|
|
336
|
-
break;
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
return accu;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
inline int get_code_size() const {
|
|
344
|
-
return quotient8 * 8 + remainder8;
|
|
345
|
-
}
|
|
346
|
-
};
|
|
347
|
-
|
|
348
|
-
/***************************************************************************
|
|
349
|
-
* generalized Hamming = number of bytes that are different between
|
|
350
|
-
* two codes.
|
|
351
|
-
***************************************************************************/
|
|
352
|
-
|
|
353
|
-
inline int generalized_hamming_64(uint64_t a) {
|
|
354
|
-
a |= a >> 1;
|
|
355
|
-
a |= a >> 2;
|
|
356
|
-
a |= a >> 4;
|
|
357
|
-
a &= 0x0101010101010101UL;
|
|
358
|
-
return popcount64(a);
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
struct GenHammingComputer8 {
|
|
362
|
-
uint64_t a0;
|
|
363
|
-
|
|
364
|
-
GenHammingComputer8(const uint8_t* a, int code_size) {
|
|
365
|
-
assert(code_size == 8);
|
|
366
|
-
a0 = *(uint64_t*)a;
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
inline int hamming(const uint8_t* b) const {
|
|
370
|
-
return generalized_hamming_64(*(uint64_t*)b ^ a0);
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
inline static constexpr int get_code_size() {
|
|
374
|
-
return 8;
|
|
375
|
-
}
|
|
376
|
-
};
|
|
377
|
-
|
|
378
|
-
// I'm not sure whether this version is faster of slower, tbh
|
|
379
|
-
// todo: test on different CPUs
|
|
380
|
-
struct GenHammingComputer16 {
|
|
381
|
-
__m128i a;
|
|
382
|
-
|
|
383
|
-
GenHammingComputer16(const uint8_t* a8, int code_size) {
|
|
384
|
-
assert(code_size == 16);
|
|
385
|
-
a = _mm_loadu_si128((const __m128i_u*)a8);
|
|
386
|
-
}
|
|
387
|
-
|
|
388
|
-
inline int hamming(const uint8_t* b8) const {
|
|
389
|
-
const __m128i b = _mm_loadu_si128((const __m128i_u*)b8);
|
|
390
|
-
const __m128i cmp = _mm_cmpeq_epi8(a, b);
|
|
391
|
-
const auto movemask = _mm_movemask_epi8(cmp);
|
|
392
|
-
return 16 - popcount32(movemask);
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
inline static constexpr int get_code_size() {
|
|
396
|
-
return 16;
|
|
397
|
-
}
|
|
398
|
-
};
|
|
399
|
-
|
|
400
|
-
struct GenHammingComputer32 {
|
|
401
|
-
__m256i a;
|
|
402
|
-
|
|
403
|
-
GenHammingComputer32(const uint8_t* a8, int code_size) {
|
|
404
|
-
assert(code_size == 32);
|
|
405
|
-
a = _mm256_loadu_si256((const __m256i_u*)a8);
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
inline int hamming(const uint8_t* b8) const {
|
|
409
|
-
const __m256i b = _mm256_loadu_si256((const __m256i_u*)b8);
|
|
410
|
-
const __m256i cmp = _mm256_cmpeq_epi8(a, b);
|
|
411
|
-
const uint32_t movemask = _mm256_movemask_epi8(cmp);
|
|
412
|
-
return 32 - popcount32(movemask);
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
inline static constexpr int get_code_size() {
|
|
416
|
-
return 32;
|
|
417
|
-
}
|
|
418
|
-
};
|
|
419
|
-
|
|
420
|
-
// A specialized version might be needed for the very long
|
|
421
|
-
// GenHamming code_size. In such a case, one may accumulate
|
|
422
|
-
// counts using _mm256_sub_epi8 and then compute a horizontal
|
|
423
|
-
// sum (using _mm256_sad_epu8, maybe, in blocks of no larger
|
|
424
|
-
// than 256 * 32 bytes).
|
|
425
|
-
|
|
426
|
-
struct GenHammingComputerM8 {
|
|
427
|
-
const uint64_t* a;
|
|
428
|
-
int n;
|
|
429
|
-
|
|
430
|
-
GenHammingComputerM8(const uint8_t* a8, int code_size) {
|
|
431
|
-
assert(code_size % 8 == 0);
|
|
432
|
-
a = (uint64_t*)a8;
|
|
433
|
-
n = code_size / 8;
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
int hamming(const uint8_t* b8) const {
|
|
437
|
-
const uint64_t* b = (uint64_t*)b8;
|
|
438
|
-
int accu = 0;
|
|
439
|
-
|
|
440
|
-
int i = 0;
|
|
441
|
-
int n4 = (n / 4) * 4;
|
|
442
|
-
for (; i < n4; i += 4) {
|
|
443
|
-
const __m256i av = _mm256_loadu_si256((const __m256i_u*)(a + i));
|
|
444
|
-
const __m256i bv = _mm256_loadu_si256((const __m256i_u*)(b + i));
|
|
445
|
-
const __m256i cmp = _mm256_cmpeq_epi8(av, bv);
|
|
446
|
-
const uint32_t movemask = _mm256_movemask_epi8(cmp);
|
|
447
|
-
accu += 32 - popcount32(movemask);
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
for (; i < n; i++)
|
|
451
|
-
accu += generalized_hamming_64(a[i] ^ b[i]);
|
|
452
|
-
return accu;
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
inline int get_code_size() const {
|
|
456
|
-
return n * 8;
|
|
457
|
-
}
|
|
458
|
-
};
|
|
459
|
-
|
|
460
|
-
} // namespace faiss
|
|
461
|
-
|
|
462
|
-
#endif
|