faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -23,354 +23,34 @@
|
|
|
23
23
|
|
|
24
24
|
#include <faiss/utils/hamming.h>
|
|
25
25
|
|
|
26
|
-
#include <algorithm>
|
|
27
26
|
#include <cstdio>
|
|
28
|
-
#include <
|
|
29
|
-
#include <vector>
|
|
27
|
+
#include <cstring>
|
|
30
28
|
|
|
31
|
-
#include <faiss/impl/AuxIndexStructures.h>
|
|
32
29
|
#include <faiss/impl/FaissAssert.h>
|
|
33
|
-
#include <faiss/impl/
|
|
34
|
-
#include <faiss/utils/Heap.h>
|
|
35
|
-
#include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h>
|
|
30
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
36
31
|
#include <faiss/utils/utils.h>
|
|
37
32
|
|
|
33
|
+
// Scalar (NONE) fallback — only needs the generic specializations.
|
|
34
|
+
#define THE_SIMD_LEVEL SIMDLevel::NONE
|
|
35
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
36
|
+
#include <faiss/utils/hamming_distance/hamming_computer-generic.h>
|
|
37
|
+
#include <faiss/utils/hamming_distance/hamming_impl.h>
|
|
38
|
+
#undef THE_SIMD_LEVEL
|
|
39
|
+
|
|
38
40
|
namespace faiss {
|
|
39
41
|
|
|
40
42
|
size_t hamming_batch_size = 65536;
|
|
41
43
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
const uint64_t* __restrict bs2,
|
|
46
|
-
size_t n1,
|
|
47
|
-
size_t n2,
|
|
48
|
-
hamdis_t* __restrict dis)
|
|
49
|
-
|
|
50
|
-
{
|
|
51
|
-
size_t i, j;
|
|
52
|
-
const size_t nwords = nbits / 64;
|
|
53
|
-
for (i = 0; i < n1; i++) {
|
|
54
|
-
const uint64_t* __restrict bs1_ = bs1 + i * nwords;
|
|
55
|
-
hamdis_t* __restrict dis_ = dis + i * n2;
|
|
56
|
-
for (j = 0; j < n2; j++) {
|
|
57
|
-
dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
void hammings(
|
|
63
|
-
const uint64_t* __restrict bs1,
|
|
64
|
-
const uint64_t* __restrict bs2,
|
|
65
|
-
size_t n1,
|
|
66
|
-
size_t n2,
|
|
67
|
-
size_t nbits,
|
|
68
|
-
hamdis_t* __restrict dis) {
|
|
69
|
-
size_t i, j;
|
|
70
|
-
const size_t nwords = nbits / 64;
|
|
71
|
-
for (i = 0; i < n1; i++) {
|
|
72
|
-
const uint64_t* __restrict bs1_ = bs1 + i * nwords;
|
|
73
|
-
hamdis_t* __restrict dis_ = dis + i * n2;
|
|
74
|
-
for (j = 0; j < n2; j++) {
|
|
75
|
-
dis_[j] = hamming(bs1_, bs2 + j * nwords, nwords);
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/* Count number of matches given a max threshold */
|
|
81
|
-
template <size_t nbits>
|
|
82
|
-
void hamming_count_thres(
|
|
83
|
-
const uint64_t* __restrict bs1,
|
|
84
|
-
const uint64_t* __restrict bs2,
|
|
85
|
-
size_t n1,
|
|
86
|
-
size_t n2,
|
|
87
|
-
hamdis_t ht,
|
|
88
|
-
size_t* __restrict nptr) {
|
|
89
|
-
const size_t nwords = nbits / 64;
|
|
90
|
-
size_t i, j, posm = 0;
|
|
91
|
-
const uint64_t* bs2_ = bs2;
|
|
92
|
-
|
|
93
|
-
for (i = 0; i < n1; i++) {
|
|
94
|
-
bs2 = bs2_;
|
|
95
|
-
for (j = 0; j < n2; j++) {
|
|
96
|
-
/* collect the match only if this satisfies the threshold */
|
|
97
|
-
if (hamming<nbits>(bs1, bs2) <= ht) {
|
|
98
|
-
posm++;
|
|
99
|
-
}
|
|
100
|
-
bs2 += nwords;
|
|
101
|
-
}
|
|
102
|
-
bs1 += nwords; /* next signature */
|
|
103
|
-
}
|
|
104
|
-
*nptr = posm;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
template <size_t nbits>
|
|
108
|
-
void crosshamming_count_thres(
|
|
109
|
-
const uint64_t* __restrict dbs,
|
|
110
|
-
size_t n,
|
|
111
|
-
int ht,
|
|
112
|
-
size_t* __restrict nptr) {
|
|
113
|
-
const size_t nwords = nbits / 64;
|
|
114
|
-
size_t i, j, posm = 0;
|
|
115
|
-
const uint64_t* bs1 = dbs;
|
|
116
|
-
for (i = 0; i < n; i++) {
|
|
117
|
-
const uint64_t* bs2 = bs1 + 2;
|
|
118
|
-
for (j = i + 1; j < n; j++) {
|
|
119
|
-
/* collect the match only if this satisfies the threshold */
|
|
120
|
-
if (hamming<nbits>(bs1, bs2) <= ht) {
|
|
121
|
-
posm++;
|
|
122
|
-
}
|
|
123
|
-
bs2 += nwords;
|
|
124
|
-
}
|
|
125
|
-
bs1 += nwords;
|
|
126
|
-
}
|
|
127
|
-
*nptr = posm;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
template <size_t nbits>
|
|
131
|
-
size_t match_hamming_thres(
|
|
132
|
-
const uint64_t* __restrict bs1,
|
|
133
|
-
const uint64_t* __restrict bs2,
|
|
134
|
-
size_t n1,
|
|
135
|
-
size_t n2,
|
|
136
|
-
int ht,
|
|
137
|
-
int64_t* __restrict idx,
|
|
138
|
-
hamdis_t* __restrict hams) {
|
|
139
|
-
const size_t nwords = nbits / 64;
|
|
140
|
-
size_t i, j, posm = 0;
|
|
141
|
-
hamdis_t h;
|
|
142
|
-
const uint64_t* bs2_ = bs2;
|
|
143
|
-
for (i = 0; i < n1; i++) {
|
|
144
|
-
bs2 = bs2_;
|
|
145
|
-
for (j = 0; j < n2; j++) {
|
|
146
|
-
/* Here perform the real work of computing the distance */
|
|
147
|
-
h = hamming<nbits>(bs1, bs2);
|
|
148
|
-
|
|
149
|
-
/* collect the match only if this satisfies the threshold */
|
|
150
|
-
if (h <= ht) {
|
|
151
|
-
/* Enough space to store another match ? */
|
|
152
|
-
*idx = i;
|
|
153
|
-
idx++;
|
|
154
|
-
*idx = j;
|
|
155
|
-
idx++;
|
|
156
|
-
*hams = h;
|
|
157
|
-
hams++;
|
|
158
|
-
posm++;
|
|
159
|
-
}
|
|
160
|
-
bs2 += nwords; /* next signature */
|
|
161
|
-
}
|
|
162
|
-
bs1 += nwords;
|
|
163
|
-
}
|
|
164
|
-
return posm;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
namespace {
|
|
168
|
-
|
|
169
|
-
/* Return closest neighbors w.r.t Hamming distance, using a heap. */
|
|
170
|
-
template <class HammingComputer>
|
|
171
|
-
void hammings_knn_hc(
|
|
172
|
-
int bytes_per_code,
|
|
173
|
-
int_maxheap_array_t* __restrict ha,
|
|
174
|
-
const uint8_t* __restrict bs1,
|
|
175
|
-
const uint8_t* __restrict bs2,
|
|
176
|
-
size_t n2,
|
|
177
|
-
bool order = true,
|
|
178
|
-
bool init_heap = true,
|
|
179
|
-
ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK,
|
|
180
|
-
const faiss::IDSelector* sel = nullptr) {
|
|
181
|
-
size_t k = ha->k;
|
|
182
|
-
if (init_heap) {
|
|
183
|
-
ha->heapify();
|
|
184
|
-
}
|
|
44
|
+
/******************************************************************
|
|
45
|
+
* Scalar utility functions (no SIMD, no dispatch needed)
|
|
46
|
+
******************************************************************/
|
|
185
47
|
|
|
186
|
-
const size_t block_size = hamming_batch_size;
|
|
187
|
-
for (size_t j0 = 0; j0 < n2; j0 += block_size) {
|
|
188
|
-
const size_t j1 = std::min(j0 + block_size, n2);
|
|
189
|
-
#pragma omp parallel for
|
|
190
|
-
for (int64_t i = 0; i < ha->nh; i++) {
|
|
191
|
-
HammingComputer hc(bs1 + i * bytes_per_code, bytes_per_code);
|
|
192
|
-
|
|
193
|
-
const uint8_t* __restrict bs2_ = bs2 + j0 * bytes_per_code;
|
|
194
|
-
hamdis_t dis;
|
|
195
|
-
hamdis_t* __restrict bh_val_ = ha->val + i * k;
|
|
196
|
-
int64_t* __restrict bh_ids_ = ha->ids + i * k;
|
|
197
|
-
|
|
198
|
-
// if larger number of k is required, then ::bs_addn() needs to be
|
|
199
|
-
// used instead of ::addn()
|
|
200
|
-
#define HANDLE_APPROX(NB, BD) \
|
|
201
|
-
case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD: \
|
|
202
|
-
FAISS_THROW_IF_NOT_FMT( \
|
|
203
|
-
k <= NB * BD, \
|
|
204
|
-
"The chosen mode (%d) of approximate top-k supports " \
|
|
205
|
-
"up to %d values, but %zd is requested.", \
|
|
206
|
-
(int)(ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD), \
|
|
207
|
-
NB * BD, \
|
|
208
|
-
k); \
|
|
209
|
-
HeapWithBucketsForHamming32< \
|
|
210
|
-
CMax<hamdis_t, int64_t>, \
|
|
211
|
-
NB, \
|
|
212
|
-
BD, \
|
|
213
|
-
HammingComputer>:: \
|
|
214
|
-
addn(j1 - j0, hc, bs2_, k, bh_val_, bh_ids_, sel); \
|
|
215
|
-
break;
|
|
216
|
-
|
|
217
|
-
switch (approx_topk_mode) {
|
|
218
|
-
HANDLE_APPROX(8, 3)
|
|
219
|
-
HANDLE_APPROX(8, 2)
|
|
220
|
-
HANDLE_APPROX(16, 2)
|
|
221
|
-
HANDLE_APPROX(32, 2)
|
|
222
|
-
default: {
|
|
223
|
-
for (size_t j = j0; j < j1; j++, bs2_ += bytes_per_code) {
|
|
224
|
-
if (sel && !sel->is_member(j)) {
|
|
225
|
-
continue;
|
|
226
|
-
}
|
|
227
|
-
dis = hc.hamming(bs2_);
|
|
228
|
-
if (dis < bh_val_[0]) {
|
|
229
|
-
faiss::maxheap_replace_top<hamdis_t>(
|
|
230
|
-
k, bh_val_, bh_ids_, dis, j);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
} break;
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
if (order) {
|
|
238
|
-
ha->reorder();
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
/* Return closest neighbors w.r.t Hamming distance, using max count. */
|
|
243
|
-
template <class HammingComputer>
|
|
244
|
-
void hammings_knn_mc(
|
|
245
|
-
int bytes_per_code,
|
|
246
|
-
const uint8_t* __restrict a,
|
|
247
|
-
const uint8_t* __restrict b,
|
|
248
|
-
size_t na,
|
|
249
|
-
size_t nb,
|
|
250
|
-
size_t k,
|
|
251
|
-
int32_t* __restrict distances,
|
|
252
|
-
int64_t* __restrict labels,
|
|
253
|
-
const faiss::IDSelector* sel) {
|
|
254
|
-
const int nBuckets = bytes_per_code * 8 + 1;
|
|
255
|
-
std::vector<int> all_counters(na * nBuckets, 0);
|
|
256
|
-
std::unique_ptr<int64_t[]> all_ids_per_dis(new int64_t[na * nBuckets * k]);
|
|
257
|
-
|
|
258
|
-
std::vector<HCounterState<HammingComputer>> cs;
|
|
259
|
-
for (size_t i = 0; i < na; ++i) {
|
|
260
|
-
cs.push_back(
|
|
261
|
-
HCounterState<HammingComputer>(
|
|
262
|
-
all_counters.data() + i * nBuckets,
|
|
263
|
-
all_ids_per_dis.get() + i * nBuckets * k,
|
|
264
|
-
a + i * bytes_per_code,
|
|
265
|
-
8 * bytes_per_code,
|
|
266
|
-
k));
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
const size_t block_size = hamming_batch_size;
|
|
270
|
-
for (size_t j0 = 0; j0 < nb; j0 += block_size) {
|
|
271
|
-
const size_t j1 = std::min(j0 + block_size, nb);
|
|
272
|
-
#pragma omp parallel for
|
|
273
|
-
for (int64_t i = 0; i < na; ++i) {
|
|
274
|
-
for (size_t j = j0; j < j1; ++j) {
|
|
275
|
-
if (!sel || sel->is_member(j)) {
|
|
276
|
-
cs[i].update_counter(b + j * bytes_per_code, j);
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
for (size_t i = 0; i < na; ++i) {
|
|
283
|
-
HCounterState<HammingComputer>& csi = cs[i];
|
|
284
|
-
|
|
285
|
-
int nres = 0;
|
|
286
|
-
for (int b_2 = 0; b_2 < nBuckets && nres < k; b_2++) {
|
|
287
|
-
for (int l = 0; l < csi.counters[b_2] && nres < k; l++) {
|
|
288
|
-
labels[i * k + nres] = csi.ids_per_dis[b_2 * k + l];
|
|
289
|
-
distances[i * k + nres] = b_2;
|
|
290
|
-
nres++;
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
while (nres < k) {
|
|
294
|
-
labels[i * k + nres] = -1;
|
|
295
|
-
distances[i * k + nres] = std::numeric_limits<int32_t>::max();
|
|
296
|
-
++nres;
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
|
|
301
|
-
template <class HammingComputer>
|
|
302
|
-
void hamming_range_search(
|
|
303
|
-
const uint8_t* a,
|
|
304
|
-
const uint8_t* b,
|
|
305
|
-
size_t na,
|
|
306
|
-
size_t nb,
|
|
307
|
-
int radius,
|
|
308
|
-
size_t code_size,
|
|
309
|
-
RangeSearchResult* res,
|
|
310
|
-
const faiss::IDSelector* sel) {
|
|
311
|
-
#pragma omp parallel
|
|
312
|
-
{
|
|
313
|
-
RangeSearchPartialResult pres(res);
|
|
314
|
-
|
|
315
|
-
#pragma omp for
|
|
316
|
-
for (int64_t i = 0; i < na; i++) {
|
|
317
|
-
HammingComputer hc(a + i * code_size, code_size);
|
|
318
|
-
const uint8_t* yi = b;
|
|
319
|
-
RangeQueryResult& qres = pres.new_result(i);
|
|
320
|
-
|
|
321
|
-
for (size_t j = 0; j < nb; j++) {
|
|
322
|
-
if (!sel || sel->is_member(j)) {
|
|
323
|
-
int dis = hc.hamming(yi);
|
|
324
|
-
if (dis < radius) {
|
|
325
|
-
qres.add(dis, j);
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
yi += code_size;
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
pres.finalize();
|
|
332
|
-
}
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
struct Run_hammings_knn_hc {
|
|
336
|
-
using T = void;
|
|
337
|
-
template <class HammingComputer, class... Types>
|
|
338
|
-
void f(Types... args) {
|
|
339
|
-
hammings_knn_hc<HammingComputer>(args...);
|
|
340
|
-
}
|
|
341
|
-
};
|
|
342
|
-
|
|
343
|
-
struct Run_hammings_knn_mc {
|
|
344
|
-
using T = void;
|
|
345
|
-
template <class HammingComputer, class... Types>
|
|
346
|
-
void f(Types... args) {
|
|
347
|
-
hammings_knn_mc<HammingComputer>(args...);
|
|
348
|
-
}
|
|
349
|
-
};
|
|
350
|
-
|
|
351
|
-
struct Run_hamming_range_search {
|
|
352
|
-
using T = void;
|
|
353
|
-
template <class HammingComputer, class... Types>
|
|
354
|
-
void f(Types... args) {
|
|
355
|
-
hamming_range_search<HammingComputer>(args...);
|
|
356
|
-
}
|
|
357
|
-
};
|
|
358
|
-
|
|
359
|
-
} // namespace
|
|
360
|
-
|
|
361
|
-
/* Functions to maps vectors to bits. Assume proper allocation done beforehand,
|
|
362
|
-
meaning that b should be be able to receive as many bits as x may produce. */
|
|
363
|
-
|
|
364
|
-
/*
|
|
365
|
-
* dimension 0 corresponds to the least significant bit of b[0], or
|
|
366
|
-
* equivalently to the lsb of the first byte that is stored.
|
|
367
|
-
*/
|
|
368
48
|
void fvec2bitvec(const float* __restrict x, uint8_t* __restrict b, size_t d) {
|
|
369
|
-
for (
|
|
49
|
+
for (size_t i = 0; i < d; i += 8) {
|
|
370
50
|
uint8_t w = 0;
|
|
371
51
|
uint8_t mask = 1;
|
|
372
|
-
|
|
373
|
-
for (
|
|
52
|
+
size_t nj = i + 8 <= d ? 8 : d - i;
|
|
53
|
+
for (size_t j = 0; j < nj; j++) {
|
|
374
54
|
if (x[i + j] >= 0) {
|
|
375
55
|
w |= mask;
|
|
376
56
|
}
|
|
@@ -381,8 +61,6 @@ void fvec2bitvec(const float* __restrict x, uint8_t* __restrict b, size_t d) {
|
|
|
381
61
|
}
|
|
382
62
|
}
|
|
383
63
|
|
|
384
|
-
/* Same but for n vectors.
|
|
385
|
-
Ensure that the output b is byte-aligned (pad with 0s). */
|
|
386
64
|
void fvecs2bitvecs(
|
|
387
65
|
const float* __restrict x,
|
|
388
66
|
uint8_t* __restrict b,
|
|
@@ -390,7 +68,7 @@ void fvecs2bitvecs(
|
|
|
390
68
|
size_t n) {
|
|
391
69
|
const int64_t ncodes = ((d + 7) / 8);
|
|
392
70
|
#pragma omp parallel for if (n > 100000)
|
|
393
|
-
for (int64_t i = 0; i < n; i++) {
|
|
71
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
394
72
|
fvec2bitvec(x + i * d, b + i * ncodes, d);
|
|
395
73
|
}
|
|
396
74
|
}
|
|
@@ -402,12 +80,11 @@ void bitvecs2fvecs(
|
|
|
402
80
|
size_t n) {
|
|
403
81
|
const int64_t ncodes = ((d + 7) / 8);
|
|
404
82
|
#pragma omp parallel for if (n > 100000)
|
|
405
|
-
for (int64_t i = 0; i < n; i++) {
|
|
83
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
406
84
|
binary_to_real(d, b + i * ncodes, x + i * d);
|
|
407
85
|
}
|
|
408
86
|
}
|
|
409
87
|
|
|
410
|
-
/* Reverse bit (NOT a optimized function, only used for print purpose) */
|
|
411
88
|
static uint64_t uint64_reverse_bits(uint64_t b) {
|
|
412
89
|
int i;
|
|
413
90
|
uint64_t revb = 0;
|
|
@@ -419,7 +96,6 @@ static uint64_t uint64_reverse_bits(uint64_t b) {
|
|
|
419
96
|
return revb;
|
|
420
97
|
}
|
|
421
98
|
|
|
422
|
-
/* print the bit vector */
|
|
423
99
|
void bitvec_print(const uint8_t* b, size_t d) {
|
|
424
100
|
size_t i, j;
|
|
425
101
|
for (i = 0; i < d;) {
|
|
@@ -441,13 +117,13 @@ void bitvec_shuffle(
|
|
|
441
117
|
const uint8_t* __restrict a,
|
|
442
118
|
uint8_t* __restrict b) {
|
|
443
119
|
for (size_t i = 0; i < db; i++) {
|
|
444
|
-
FAISS_THROW_IF_NOT(order[i] >= 0 && order[i] < da);
|
|
120
|
+
FAISS_THROW_IF_NOT(order[i] >= 0 && static_cast<size_t>(order[i]) < da);
|
|
445
121
|
}
|
|
446
122
|
size_t lda = (da + 7) / 8;
|
|
447
123
|
size_t ldb = (db + 7) / 8;
|
|
448
124
|
|
|
449
125
|
#pragma omp parallel for if (n > 10000)
|
|
450
|
-
for (int64_t i = 0; i < n; i++) {
|
|
126
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
451
127
|
const uint8_t* ai = a + i * lda;
|
|
452
128
|
uint8_t* bi = b + i * ldb;
|
|
453
129
|
memset(bi, 0, ldb);
|
|
@@ -459,12 +135,10 @@ void bitvec_shuffle(
|
|
|
459
135
|
}
|
|
460
136
|
}
|
|
461
137
|
|
|
462
|
-
|
|
463
|
-
|
|
138
|
+
/******************************************************************
|
|
139
|
+
* Dispatched Hamming distance public API
|
|
140
|
+
******************************************************************/
|
|
464
141
|
|
|
465
|
-
#define C64(x) ((uint64_t*)x)
|
|
466
|
-
|
|
467
|
-
/* Compute a set of Hamming distances */
|
|
468
142
|
void hammings(
|
|
469
143
|
const uint8_t* __restrict a,
|
|
470
144
|
const uint8_t* __restrict b,
|
|
@@ -472,24 +146,9 @@ void hammings(
|
|
|
472
146
|
size_t nb,
|
|
473
147
|
size_t ncodes,
|
|
474
148
|
hamdis_t* __restrict dis) {
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
faiss::hammings<64>(C64(a), C64(b), na, nb, dis);
|
|
479
|
-
return;
|
|
480
|
-
case 16:
|
|
481
|
-
faiss::hammings<128>(C64(a), C64(b), na, nb, dis);
|
|
482
|
-
return;
|
|
483
|
-
case 32:
|
|
484
|
-
faiss::hammings<256>(C64(a), C64(b), na, nb, dis);
|
|
485
|
-
return;
|
|
486
|
-
case 64:
|
|
487
|
-
faiss::hammings<512>(C64(a), C64(b), na, nb, dis);
|
|
488
|
-
return;
|
|
489
|
-
default:
|
|
490
|
-
faiss::hammings(C64(a), C64(b), na, nb, ncodes * 8, dis);
|
|
491
|
-
return;
|
|
492
|
-
}
|
|
149
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
150
|
+
hammings_fixSL<SL>(a, b, na, nb, ncodes, dis);
|
|
151
|
+
});
|
|
493
152
|
}
|
|
494
153
|
|
|
495
154
|
void hammings_knn(
|
|
@@ -511,19 +170,10 @@ void hammings_knn_hc(
|
|
|
511
170
|
int order,
|
|
512
171
|
ApproxTopK_mode_t approx_topk_mode,
|
|
513
172
|
const faiss::IDSelector* sel) {
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
ncodes,
|
|
519
|
-
ha,
|
|
520
|
-
a,
|
|
521
|
-
b,
|
|
522
|
-
nb,
|
|
523
|
-
order,
|
|
524
|
-
true,
|
|
525
|
-
approx_topk_mode,
|
|
526
|
-
sel);
|
|
173
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
174
|
+
hammings_knn_hc_fixSL<SL>(
|
|
175
|
+
ha, a, b, nb, ncodes, order, approx_topk_mode, sel);
|
|
176
|
+
});
|
|
527
177
|
}
|
|
528
178
|
|
|
529
179
|
void hammings_knn_mc(
|
|
@@ -536,9 +186,10 @@ void hammings_knn_mc(
|
|
|
536
186
|
int32_t* __restrict distances,
|
|
537
187
|
int64_t* __restrict labels,
|
|
538
188
|
const faiss::IDSelector* sel) {
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
189
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
190
|
+
hammings_knn_mc_fixSL<SL>(
|
|
191
|
+
a, b, na, nb, k, ncodes, distances, labels, sel);
|
|
192
|
+
});
|
|
542
193
|
}
|
|
543
194
|
|
|
544
195
|
void hamming_range_search(
|
|
@@ -550,12 +201,12 @@ void hamming_range_search(
|
|
|
550
201
|
size_t code_size,
|
|
551
202
|
RangeSearchResult* result,
|
|
552
203
|
const faiss::IDSelector* sel) {
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
204
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
205
|
+
hamming_range_search_fixSL<SL>(
|
|
206
|
+
a, b, na, nb, radius, code_size, result, sel);
|
|
207
|
+
});
|
|
556
208
|
}
|
|
557
209
|
|
|
558
|
-
/* Count number of matches given a max threshold */
|
|
559
210
|
void hamming_count_thres(
|
|
560
211
|
const uint8_t* bs1,
|
|
561
212
|
const uint8_t* bs2,
|
|
@@ -564,54 +215,22 @@ void hamming_count_thres(
|
|
|
564
215
|
hamdis_t ht,
|
|
565
216
|
size_t ncodes,
|
|
566
217
|
size_t* nptr) {
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
C64(bs1), C64(bs2), n1, n2, ht, nptr);
|
|
571
|
-
return;
|
|
572
|
-
case 16:
|
|
573
|
-
faiss::hamming_count_thres<128>(
|
|
574
|
-
C64(bs1), C64(bs2), n1, n2, ht, nptr);
|
|
575
|
-
return;
|
|
576
|
-
case 32:
|
|
577
|
-
faiss::hamming_count_thres<256>(
|
|
578
|
-
C64(bs1), C64(bs2), n1, n2, ht, nptr);
|
|
579
|
-
return;
|
|
580
|
-
case 64:
|
|
581
|
-
faiss::hamming_count_thres<512>(
|
|
582
|
-
C64(bs1), C64(bs2), n1, n2, ht, nptr);
|
|
583
|
-
return;
|
|
584
|
-
default:
|
|
585
|
-
FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
|
|
586
|
-
}
|
|
218
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
219
|
+
hamming_count_thres_fixSL<SL>(bs1, bs2, n1, n2, ht, ncodes, nptr);
|
|
220
|
+
});
|
|
587
221
|
}
|
|
588
222
|
|
|
589
|
-
/* Count number of cross-matches given a threshold */
|
|
590
223
|
void crosshamming_count_thres(
|
|
591
224
|
const uint8_t* dbs,
|
|
592
225
|
size_t n,
|
|
593
226
|
hamdis_t ht,
|
|
594
227
|
size_t ncodes,
|
|
595
228
|
size_t* nptr) {
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
return;
|
|
600
|
-
case 16:
|
|
601
|
-
faiss::crosshamming_count_thres<128>(C64(dbs), n, ht, nptr);
|
|
602
|
-
return;
|
|
603
|
-
case 32:
|
|
604
|
-
faiss::crosshamming_count_thres<256>(C64(dbs), n, ht, nptr);
|
|
605
|
-
return;
|
|
606
|
-
case 64:
|
|
607
|
-
faiss::crosshamming_count_thres<512>(C64(dbs), n, ht, nptr);
|
|
608
|
-
return;
|
|
609
|
-
default:
|
|
610
|
-
FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
|
|
611
|
-
}
|
|
229
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
230
|
+
crosshamming_count_thres_fixSL<SL>(dbs, n, ht, ncodes, nptr);
|
|
231
|
+
});
|
|
612
232
|
}
|
|
613
233
|
|
|
614
|
-
/* Returns all matches given a threshold */
|
|
615
234
|
size_t match_hamming_thres(
|
|
616
235
|
const uint8_t* bs1,
|
|
617
236
|
const uint8_t* bs2,
|
|
@@ -621,49 +240,10 @@ size_t match_hamming_thres(
|
|
|
621
240
|
size_t ncodes,
|
|
622
241
|
int64_t* idx,
|
|
623
242
|
hamdis_t* dis) {
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
case 16:
|
|
629
|
-
return faiss::match_hamming_thres<128>(
|
|
630
|
-
C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
|
|
631
|
-
case 32:
|
|
632
|
-
return faiss::match_hamming_thres<256>(
|
|
633
|
-
C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
|
|
634
|
-
case 64:
|
|
635
|
-
return faiss::match_hamming_thres<512>(
|
|
636
|
-
C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
|
|
637
|
-
default:
|
|
638
|
-
FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
|
|
639
|
-
return 0;
|
|
640
|
-
}
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
#undef C64
|
|
644
|
-
|
|
645
|
-
/*************************************
|
|
646
|
-
* generalized Hamming distances
|
|
647
|
-
************************************/
|
|
648
|
-
|
|
649
|
-
template <class HammingComputer>
|
|
650
|
-
static void hamming_dis_inner_loop(
|
|
651
|
-
const uint8_t* __restrict ca,
|
|
652
|
-
const uint8_t* __restrict cb,
|
|
653
|
-
size_t nb,
|
|
654
|
-
size_t code_size,
|
|
655
|
-
int k,
|
|
656
|
-
hamdis_t* __restrict bh_val_,
|
|
657
|
-
int64_t* __restrict bh_ids_) {
|
|
658
|
-
HammingComputer hc(ca, code_size);
|
|
659
|
-
|
|
660
|
-
for (size_t j = 0; j < nb; j++) {
|
|
661
|
-
int ndiff = hc.hamming(cb);
|
|
662
|
-
cb += code_size;
|
|
663
|
-
if (ndiff < bh_val_[0]) {
|
|
664
|
-
maxheap_replace_top<hamdis_t>(k, bh_val_, bh_ids_, ndiff, j);
|
|
665
|
-
}
|
|
666
|
-
}
|
|
243
|
+
return with_simd_level([&]<SIMDLevel SL>() -> size_t {
|
|
244
|
+
return match_hamming_thres_fixSL<SL>(
|
|
245
|
+
bs1, bs2, n1, n2, ht, ncodes, idx, dis);
|
|
246
|
+
});
|
|
667
247
|
}
|
|
668
248
|
|
|
669
249
|
void generalized_hammings_knn_hc(
|
|
@@ -673,46 +253,15 @@ void generalized_hammings_knn_hc(
|
|
|
673
253
|
size_t nb,
|
|
674
254
|
size_t code_size,
|
|
675
255
|
int ordered) {
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
if (ordered) {
|
|
680
|
-
ha->heapify();
|
|
681
|
-
}
|
|
682
|
-
|
|
683
|
-
#pragma omp parallel for
|
|
684
|
-
for (int i = 0; i < na; i++) {
|
|
685
|
-
const uint8_t* __restrict ca = a + i * code_size;
|
|
686
|
-
const uint8_t* __restrict cb = b;
|
|
687
|
-
|
|
688
|
-
hamdis_t* __restrict bh_val_ = ha->val + i * k;
|
|
689
|
-
int64_t* __restrict bh_ids_ = ha->ids + i * k;
|
|
690
|
-
|
|
691
|
-
switch (code_size) {
|
|
692
|
-
case 8:
|
|
693
|
-
hamming_dis_inner_loop<GenHammingComputer8>(
|
|
694
|
-
ca, cb, nb, 8, k, bh_val_, bh_ids_);
|
|
695
|
-
break;
|
|
696
|
-
case 16:
|
|
697
|
-
hamming_dis_inner_loop<GenHammingComputer16>(
|
|
698
|
-
ca, cb, nb, 16, k, bh_val_, bh_ids_);
|
|
699
|
-
break;
|
|
700
|
-
case 32:
|
|
701
|
-
hamming_dis_inner_loop<GenHammingComputer32>(
|
|
702
|
-
ca, cb, nb, 32, k, bh_val_, bh_ids_);
|
|
703
|
-
break;
|
|
704
|
-
default:
|
|
705
|
-
hamming_dis_inner_loop<GenHammingComputerM8>(
|
|
706
|
-
ca, cb, nb, code_size, k, bh_val_, bh_ids_);
|
|
707
|
-
break;
|
|
708
|
-
}
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
if (ordered) {
|
|
712
|
-
ha->reorder();
|
|
713
|
-
}
|
|
256
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
257
|
+
generalized_hammings_knn_hc_fixSL<SL>(ha, a, b, nb, code_size, ordered);
|
|
258
|
+
});
|
|
714
259
|
}
|
|
715
260
|
|
|
261
|
+
/******************************************************************
|
|
262
|
+
* Bitstring pack/unpack (scalar, no dispatch needed)
|
|
263
|
+
******************************************************************/
|
|
264
|
+
|
|
716
265
|
void pack_bitstrings(
|
|
717
266
|
size_t n,
|
|
718
267
|
size_t M,
|
|
@@ -722,11 +271,11 @@ void pack_bitstrings(
|
|
|
722
271
|
size_t code_size) {
|
|
723
272
|
FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
|
|
724
273
|
#pragma omp parallel for if (n > 1000)
|
|
725
|
-
for (int64_t i = 0; i < n; i++) {
|
|
274
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
726
275
|
const int32_t* in = unpacked + i * M;
|
|
727
276
|
uint8_t* out = packed + i * code_size;
|
|
728
277
|
BitstringWriter wr(out, code_size);
|
|
729
|
-
for (
|
|
278
|
+
for (size_t j = 0; j < M; j++) {
|
|
730
279
|
wr.write(in[j], nbit);
|
|
731
280
|
}
|
|
732
281
|
}
|
|
@@ -740,16 +289,16 @@ void pack_bitstrings(
|
|
|
740
289
|
uint8_t* packed,
|
|
741
290
|
size_t code_size) {
|
|
742
291
|
int totbit = 0;
|
|
743
|
-
for (
|
|
292
|
+
for (size_t j = 0; j < M; j++) {
|
|
744
293
|
totbit += nbit[j];
|
|
745
294
|
}
|
|
746
|
-
FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
|
|
295
|
+
FAISS_THROW_IF_NOT(code_size >= static_cast<size_t>((totbit + 7) / 8));
|
|
747
296
|
#pragma omp parallel for if (n > 1000)
|
|
748
|
-
for (int64_t i = 0; i < n; i++) {
|
|
297
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
749
298
|
const int32_t* in = unpacked + i * M;
|
|
750
299
|
uint8_t* out = packed + i * code_size;
|
|
751
300
|
BitstringWriter wr(out, code_size);
|
|
752
|
-
for (
|
|
301
|
+
for (size_t j = 0; j < M; j++) {
|
|
753
302
|
wr.write(in[j], nbit[j]);
|
|
754
303
|
}
|
|
755
304
|
}
|
|
@@ -764,11 +313,11 @@ void unpack_bitstrings(
|
|
|
764
313
|
int32_t* unpacked) {
|
|
765
314
|
FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
|
|
766
315
|
#pragma omp parallel for if (n > 1000)
|
|
767
|
-
for (int64_t i = 0; i < n; i++) {
|
|
316
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
768
317
|
const uint8_t* in = packed + i * code_size;
|
|
769
318
|
int32_t* out = unpacked + i * M;
|
|
770
319
|
BitstringReader rd(in, code_size);
|
|
771
|
-
for (
|
|
320
|
+
for (size_t j = 0; j < M; j++) {
|
|
772
321
|
out[j] = rd.read(nbit);
|
|
773
322
|
}
|
|
774
323
|
}
|
|
@@ -782,16 +331,16 @@ void unpack_bitstrings(
|
|
|
782
331
|
size_t code_size,
|
|
783
332
|
int32_t* unpacked) {
|
|
784
333
|
int totbit = 0;
|
|
785
|
-
for (
|
|
334
|
+
for (size_t j = 0; j < M; j++) {
|
|
786
335
|
totbit += nbit[j];
|
|
787
336
|
}
|
|
788
|
-
FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
|
|
337
|
+
FAISS_THROW_IF_NOT(code_size >= static_cast<size_t>((totbit + 7) / 8));
|
|
789
338
|
#pragma omp parallel for if (n > 1000)
|
|
790
|
-
for (int64_t i = 0; i < n; i++) {
|
|
339
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
791
340
|
const uint8_t* in = packed + i * code_size;
|
|
792
341
|
int32_t* out = unpacked + i * M;
|
|
793
342
|
BitstringReader rd(in, code_size);
|
|
794
|
-
for (
|
|
343
|
+
for (size_t j = 0; j < M; j++) {
|
|
795
344
|
out[j] = rd.read(nbit[j]);
|
|
796
345
|
}
|
|
797
346
|
}
|