faiss 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
- data/vendor/faiss/faiss/factory_tools.cpp +9 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
- data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
- data/vendor/faiss/faiss/impl/HNSW.h +61 -44
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +269 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
- data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +58 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +90 -18
- data/vendor/faiss/faiss/index_io.h +40 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
- data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/bf16.h +34 -0
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +129 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifndef HAMMING_COMPUTER_RVV_H
|
|
9
|
+
#define HAMMING_COMPUTER_RVV_H
|
|
10
|
+
|
|
11
|
+
// RVV HammingComputer fallbacks. There is no RVV-optimized HammingComputer
|
|
12
|
+
// implementation yet, so provide concrete RISCV_RVV specializations backed by
|
|
13
|
+
// the scalar NONE implementations.
|
|
14
|
+
|
|
15
|
+
#include <faiss/utils/hamming_distance/hamming_computer-generic.h>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
|
|
19
|
+
#define FAISS_INHERIT_HAMMING_RVV(Class) \
|
|
20
|
+
template <> \
|
|
21
|
+
struct Class##_tpl<SIMDLevel::RISCV_RVV> : Class##_tpl<SIMDLevel::NONE> { \
|
|
22
|
+
using Class##_tpl<SIMDLevel::NONE>::Class##_tpl; \
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
FAISS_INHERIT_HAMMING_RVV(HammingComputer16);
|
|
26
|
+
FAISS_INHERIT_HAMMING_RVV(HammingComputer20);
|
|
27
|
+
FAISS_INHERIT_HAMMING_RVV(HammingComputer32);
|
|
28
|
+
FAISS_INHERIT_HAMMING_RVV(HammingComputer64);
|
|
29
|
+
FAISS_INHERIT_HAMMING_RVV(HammingComputerDefault);
|
|
30
|
+
FAISS_INHERIT_HAMMING_RVV(GenHammingComputer8);
|
|
31
|
+
FAISS_INHERIT_HAMMING_RVV(GenHammingComputer16);
|
|
32
|
+
FAISS_INHERIT_HAMMING_RVV(GenHammingComputer32);
|
|
33
|
+
FAISS_INHERIT_HAMMING_RVV(GenHammingComputerM8);
|
|
34
|
+
|
|
35
|
+
#undef FAISS_INHERIT_HAMMING_RVV
|
|
36
|
+
|
|
37
|
+
} // namespace faiss
|
|
38
|
+
|
|
39
|
+
#endif
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// This file contains forward declarations, architecture-independent
|
|
9
|
+
// HammingComputer structs (sizes 4 and 8), and the with_HammingComputer
|
|
10
|
+
// dispatch function. SIMDLevel-specific specializations live in:
|
|
11
|
+
// hamming_computer-generic.h (NONE — scalar fallback)
|
|
12
|
+
// hamming_computer-avx2.h (AVX2)
|
|
13
|
+
// hamming_computer-avx512.h (AVX512)
|
|
14
|
+
// hamming_computer-neon.h (ARM NEON)
|
|
15
|
+
|
|
16
|
+
#ifndef FAISS_hamming_computer_h
|
|
17
|
+
#define FAISS_hamming_computer_h
|
|
18
|
+
|
|
19
|
+
#include <faiss/utils/hamming_distance/common.h>
|
|
20
|
+
|
|
21
|
+
namespace faiss {
|
|
22
|
+
|
|
23
|
+
/***************************************************************************
|
|
24
|
+
* HammingComputer primary templates.
|
|
25
|
+
*
|
|
26
|
+
* Per-ISA backend files (hamming_computer-avx512.h, hamming_computer-neon.h,
|
|
27
|
+
* etc.) provide explicit specializations that override the scalar (NONE)
|
|
28
|
+
* defaults in hamming_computer-generic.h with ISA-optimized code.
|
|
29
|
+
* Templating on SIMDLevel gives each specialization a distinct mangled
|
|
30
|
+
* name, so DD builds with multiple per-ISA TUs do NOT create ODR-violating
|
|
31
|
+
* struct collisions.
|
|
32
|
+
*
|
|
33
|
+
* Call sites use with_HammingComputer<SL>, which is templatized on
|
|
34
|
+
* SIMDLevel to select the matching specialization.
|
|
35
|
+
***************************************************************************/
|
|
36
|
+
|
|
37
|
+
// Forward declarations. The struct bodies live in hamming_computer-generic.h
|
|
38
|
+
// (NONE) and per-ISA hamming_computer-*.h files.
|
|
39
|
+
template <SIMDLevel SL>
|
|
40
|
+
struct HammingComputer16_tpl;
|
|
41
|
+
template <SIMDLevel SL>
|
|
42
|
+
struct HammingComputer20_tpl;
|
|
43
|
+
template <SIMDLevel SL>
|
|
44
|
+
struct HammingComputer32_tpl;
|
|
45
|
+
template <SIMDLevel SL>
|
|
46
|
+
struct HammingComputer64_tpl;
|
|
47
|
+
template <SIMDLevel SL>
|
|
48
|
+
struct HammingComputerDefault_tpl;
|
|
49
|
+
template <SIMDLevel SL>
|
|
50
|
+
struct GenHammingComputer8_tpl;
|
|
51
|
+
template <SIMDLevel SL>
|
|
52
|
+
struct GenHammingComputer16_tpl;
|
|
53
|
+
template <SIMDLevel SL>
|
|
54
|
+
struct GenHammingComputer32_tpl;
|
|
55
|
+
template <SIMDLevel SL>
|
|
56
|
+
struct GenHammingComputerM8_tpl;
|
|
57
|
+
|
|
58
|
+
/******************************************************************
|
|
59
|
+
* The HammingComputer series of classes compares a single code of
|
|
60
|
+
* size 4 to 32 to incoming codes. They are intended for use as a
|
|
61
|
+
* template class where it would be inefficient to switch on the code
|
|
62
|
+
* size in the inner loop. Hopefully the compiler will inline the
|
|
63
|
+
* hamming() functions and put the a0, a1, ... in registers.
|
|
64
|
+
* For code_size = 4 and 8 we don't use SIMD implementations, because
|
|
65
|
+
* register widths are too large.
|
|
66
|
+
******************************************************************/
|
|
67
|
+
|
|
68
|
+
struct HammingComputer4 {
|
|
69
|
+
uint32_t a0;
|
|
70
|
+
|
|
71
|
+
HammingComputer4() {}
|
|
72
|
+
|
|
73
|
+
HammingComputer4(const uint8_t* a, int code_size) {
|
|
74
|
+
set(a, code_size);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void set(const uint8_t* a, FAISS_MAYBE_UNUSED int code_size) {
|
|
78
|
+
assert(code_size == 4);
|
|
79
|
+
const uint32_t* a32 = reinterpret_cast<const uint32_t*>(a);
|
|
80
|
+
a0 = *a32;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
inline int hamming(const uint8_t* b) const {
|
|
84
|
+
const uint32_t* b32 = reinterpret_cast<const uint32_t*>(b);
|
|
85
|
+
return popcount64(*b32 ^ a0);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
inline static constexpr int get_code_size() {
|
|
89
|
+
return 4;
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
struct HammingComputer8 {
|
|
94
|
+
uint64_t a0;
|
|
95
|
+
|
|
96
|
+
HammingComputer8() {}
|
|
97
|
+
|
|
98
|
+
HammingComputer8(const uint8_t* a, int code_size) {
|
|
99
|
+
set(a, code_size);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
void set(const uint8_t* a, FAISS_MAYBE_UNUSED int code_size) {
|
|
103
|
+
assert(code_size == 8);
|
|
104
|
+
const uint64_t* a64 = reinterpret_cast<const uint64_t*>(a);
|
|
105
|
+
a0 = *a64;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
inline int hamming(const uint8_t* b) const {
|
|
109
|
+
const uint64_t* b64 = reinterpret_cast<const uint64_t*>(b);
|
|
110
|
+
return popcount64(*b64 ^ a0);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
inline static constexpr int get_code_size() {
|
|
114
|
+
return 8;
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
/***************************************************************************
|
|
119
|
+
* Dispatching function that takes a code size and a C++20 template lambda.
|
|
120
|
+
* The lambda is called with the appropriate HammingComputer type:
|
|
121
|
+
* with_HammingComputer<SL>(code_size, [&]<class HammingComputer>() { ... });
|
|
122
|
+
**************************************************************************/
|
|
123
|
+
|
|
124
|
+
template <SIMDLevel SL, class F>
|
|
125
|
+
decltype(auto) with_HammingComputer(int code_size, F&& f) {
|
|
126
|
+
switch (code_size) {
|
|
127
|
+
case 4:
|
|
128
|
+
return f.template operator()<HammingComputer4>();
|
|
129
|
+
case 8:
|
|
130
|
+
return f.template operator()<HammingComputer8>();
|
|
131
|
+
case 16:
|
|
132
|
+
return f.template operator()<HammingComputer16_tpl<SL>>();
|
|
133
|
+
case 20:
|
|
134
|
+
return f.template operator()<HammingComputer20_tpl<SL>>();
|
|
135
|
+
case 32:
|
|
136
|
+
return f.template operator()<HammingComputer32_tpl<SL>>();
|
|
137
|
+
case 64:
|
|
138
|
+
return f.template operator()<HammingComputer64_tpl<SL>>();
|
|
139
|
+
default:
|
|
140
|
+
return f.template operator()<HammingComputerDefault_tpl<SL>>();
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
} // namespace faiss
|
|
145
|
+
|
|
146
|
+
#endif
|
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Shared implementation header for Hamming distance dynamic dispatch.
|
|
9
|
+
// Included by per-ISA TUs (hamming_avx2.cpp, hamming_avx512.cpp,
|
|
10
|
+
// hamming_neon.cpp) and by hamming.cpp (for the NONE fallback TU).
|
|
11
|
+
//
|
|
12
|
+
// THE_SIMD_LEVEL must be defined before including this header.
|
|
13
|
+
// Callers must also include the appropriate hamming_computer-*.h
|
|
14
|
+
// (generic, avx2, avx512, or neon) before this header so that the
|
|
15
|
+
// HammingComputer struct specializations are available.
|
|
16
|
+
//
|
|
17
|
+
// ODR CONTRACT: each TU that includes this header MUST define a UNIQUE
|
|
18
|
+
// THE_SIMD_LEVEL value across the linked binary. The template
|
|
19
|
+
// specializations at the bottom of this file (hammings_knn_hc_fixSL<SL>,
|
|
20
|
+
// etc.) have external linkage with SL-keyed mangled names; two TUs sharing
|
|
21
|
+
// THE_SIMD_LEVEL would emit conflicting definitions of the same symbol.
|
|
22
|
+
// In DD mode, the per-ISA TUs use distinct values (NONE/AVX2/AVX512/NEON);
|
|
23
|
+
// in static mode, only hamming.cpp includes this header so uniqueness is
|
|
24
|
+
// trivially satisfied.
|
|
25
|
+
|
|
26
|
+
#pragma once
|
|
27
|
+
|
|
28
|
+
#ifndef THE_SIMD_LEVEL
|
|
29
|
+
#error "Define THE_SIMD_LEVEL before including hamming_impl.h"
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
// Forward declarations and dispatch function.
|
|
33
|
+
#include <faiss/utils/hamming_distance/hamming_computer.h>
|
|
34
|
+
|
|
35
|
+
#include <faiss/utils/hamming.h>
|
|
36
|
+
|
|
37
|
+
#include <algorithm>
|
|
38
|
+
#include <cstdio>
|
|
39
|
+
#include <limits>
|
|
40
|
+
#include <memory>
|
|
41
|
+
#include <vector>
|
|
42
|
+
|
|
43
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
44
|
+
#include <faiss/impl/FaissAssert.h>
|
|
45
|
+
#include <faiss/impl/IDSelector.h>
|
|
46
|
+
#include <faiss/utils/Heap.h>
|
|
47
|
+
#include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h>
|
|
48
|
+
#include <faiss/utils/utils.h>
|
|
49
|
+
|
|
50
|
+
namespace faiss {
|
|
51
|
+
|
|
52
|
+
// All HammingComputer-dependent implementation templates live in an anonymous
|
|
53
|
+
// namespace for ODR safety. Different TUs compile this header with different
|
|
54
|
+
// HammingComputer struct layouts (generic vs AVX2 vs NEON). The anonymous
|
|
55
|
+
// namespace ensures each TU gets its own copy with internal linkage, preventing
|
|
56
|
+
// the linker from merging incompatible instantiations.
|
|
57
|
+
namespace {
|
|
58
|
+
|
|
59
|
+
/******************************************************************
|
|
60
|
+
* HammingComputer-based search templates
|
|
61
|
+
******************************************************************/
|
|
62
|
+
|
|
63
|
+
template <class HammingComputer>
|
|
64
|
+
void hammings_knn_hc_impl(
|
|
65
|
+
int bytes_per_code,
|
|
66
|
+
int_maxheap_array_t* __restrict ha,
|
|
67
|
+
const uint8_t* __restrict bs1,
|
|
68
|
+
const uint8_t* __restrict bs2,
|
|
69
|
+
size_t n2,
|
|
70
|
+
bool order = true,
|
|
71
|
+
bool init_heap = true,
|
|
72
|
+
ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK,
|
|
73
|
+
const faiss::IDSelector* sel = nullptr) {
|
|
74
|
+
size_t k = ha->k;
|
|
75
|
+
if (init_heap) {
|
|
76
|
+
ha->heapify();
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const size_t block_size = hamming_batch_size;
|
|
80
|
+
for (size_t j0 = 0; j0 < n2; j0 += block_size) {
|
|
81
|
+
const size_t j1 = std::min(j0 + block_size, n2);
|
|
82
|
+
#pragma omp parallel for
|
|
83
|
+
for (int64_t i = 0; i < static_cast<int64_t>(ha->nh); i++) {
|
|
84
|
+
HammingComputer hc(bs1 + i * bytes_per_code, bytes_per_code);
|
|
85
|
+
|
|
86
|
+
const uint8_t* __restrict bs2_ = bs2 + j0 * bytes_per_code;
|
|
87
|
+
hamdis_t dis;
|
|
88
|
+
hamdis_t* __restrict bh_val_ = ha->val + i * k;
|
|
89
|
+
int64_t* __restrict bh_ids_ = ha->ids + i * k;
|
|
90
|
+
|
|
91
|
+
#define HANDLE_APPROX(NB, BD) \
|
|
92
|
+
case ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD: \
|
|
93
|
+
FAISS_THROW_IF_NOT_FMT( \
|
|
94
|
+
k <= NB * BD, \
|
|
95
|
+
"The chosen mode (%d) of approximate top-k supports " \
|
|
96
|
+
"up to %d values, but %zd is requested.", \
|
|
97
|
+
(int)(ApproxTopK_mode_t::APPROX_TOPK_BUCKETS_B##NB##_D##BD), \
|
|
98
|
+
NB * BD, \
|
|
99
|
+
k); \
|
|
100
|
+
HeapWithBucketsForHamming32< \
|
|
101
|
+
CMax<hamdis_t, int64_t>, \
|
|
102
|
+
NB, \
|
|
103
|
+
BD, \
|
|
104
|
+
HammingComputer>:: \
|
|
105
|
+
addn(j1 - j0, hc, bs2_, k, bh_val_, bh_ids_, sel); \
|
|
106
|
+
break;
|
|
107
|
+
|
|
108
|
+
switch (approx_topk_mode) {
|
|
109
|
+
HANDLE_APPROX(8, 3)
|
|
110
|
+
HANDLE_APPROX(8, 2)
|
|
111
|
+
HANDLE_APPROX(16, 2)
|
|
112
|
+
HANDLE_APPROX(32, 2)
|
|
113
|
+
default: {
|
|
114
|
+
for (size_t j = j0; j < j1; j++, bs2_ += bytes_per_code) {
|
|
115
|
+
if (sel && !sel->is_member(j)) {
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
dis = hc.hamming(bs2_);
|
|
119
|
+
if (dis < bh_val_[0]) {
|
|
120
|
+
faiss::maxheap_replace_top<hamdis_t>(
|
|
121
|
+
k, bh_val_, bh_ids_, dis, j);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
} break;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
if (order) {
|
|
129
|
+
ha->reorder();
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
#undef HANDLE_APPROX
|
|
134
|
+
|
|
135
|
+
template <class HammingComputer>
|
|
136
|
+
void hammings_knn_mc_impl(
|
|
137
|
+
int bytes_per_code,
|
|
138
|
+
const uint8_t* __restrict a,
|
|
139
|
+
const uint8_t* __restrict b,
|
|
140
|
+
size_t na,
|
|
141
|
+
size_t nb,
|
|
142
|
+
size_t k,
|
|
143
|
+
int32_t* __restrict distances,
|
|
144
|
+
int64_t* __restrict labels,
|
|
145
|
+
const faiss::IDSelector* sel) {
|
|
146
|
+
const int nBuckets = bytes_per_code * 8 + 1;
|
|
147
|
+
std::vector<int> all_counters(na * nBuckets, 0);
|
|
148
|
+
std::unique_ptr<int64_t[]> all_ids_per_dis(new int64_t[na * nBuckets * k]);
|
|
149
|
+
|
|
150
|
+
std::vector<HCounterState<HammingComputer>> cs;
|
|
151
|
+
for (size_t i = 0; i < na; ++i) {
|
|
152
|
+
cs.push_back(
|
|
153
|
+
HCounterState<HammingComputer>(
|
|
154
|
+
all_counters.data() + i * nBuckets,
|
|
155
|
+
all_ids_per_dis.get() + i * nBuckets * k,
|
|
156
|
+
a + i * bytes_per_code,
|
|
157
|
+
8 * bytes_per_code,
|
|
158
|
+
k));
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const size_t block_size = hamming_batch_size;
|
|
162
|
+
for (size_t j0 = 0; j0 < nb; j0 += block_size) {
|
|
163
|
+
const size_t j1 = std::min(j0 + block_size, nb);
|
|
164
|
+
#pragma omp parallel for
|
|
165
|
+
for (int64_t i = 0; i < static_cast<int64_t>(na); ++i) {
|
|
166
|
+
for (size_t j = j0; j < j1; ++j) {
|
|
167
|
+
if (!sel || sel->is_member(j)) {
|
|
168
|
+
cs[i].update_counter(b + j * bytes_per_code, j);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
for (size_t i = 0; i < na; ++i) {
|
|
175
|
+
HCounterState<HammingComputer>& csi = cs[i];
|
|
176
|
+
|
|
177
|
+
size_t nres = 0;
|
|
178
|
+
for (int b_2 = 0; b_2 < nBuckets && nres < k; b_2++) {
|
|
179
|
+
for (int l = 0; l < csi.counters[b_2] && nres < k; l++) {
|
|
180
|
+
labels[i * k + nres] = csi.ids_per_dis[b_2 * k + l];
|
|
181
|
+
distances[i * k + nres] = b_2;
|
|
182
|
+
nres++;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
while (nres < k) {
|
|
186
|
+
labels[i * k + nres] = -1;
|
|
187
|
+
distances[i * k + nres] = std::numeric_limits<int32_t>::max();
|
|
188
|
+
++nres;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
template <class HammingComputer>
|
|
194
|
+
void hamming_range_search_impl(
|
|
195
|
+
const uint8_t* a,
|
|
196
|
+
const uint8_t* b,
|
|
197
|
+
size_t na,
|
|
198
|
+
size_t nb,
|
|
199
|
+
int radius,
|
|
200
|
+
size_t code_size,
|
|
201
|
+
RangeSearchResult* res,
|
|
202
|
+
const faiss::IDSelector* sel) {
|
|
203
|
+
#pragma omp parallel
|
|
204
|
+
{
|
|
205
|
+
RangeSearchPartialResult pres(res);
|
|
206
|
+
|
|
207
|
+
#pragma omp for
|
|
208
|
+
for (int64_t i = 0; i < static_cast<int64_t>(na); i++) {
|
|
209
|
+
HammingComputer hc(a + i * code_size, code_size);
|
|
210
|
+
const uint8_t* yi = b;
|
|
211
|
+
RangeQueryResult& qres = pres.new_result(i);
|
|
212
|
+
|
|
213
|
+
for (size_t j = 0; j < nb; j++) {
|
|
214
|
+
if (!sel || sel->is_member(j)) {
|
|
215
|
+
int dis = hc.hamming(yi);
|
|
216
|
+
if (dis < radius) {
|
|
217
|
+
qres.add(dis, j);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
yi += code_size;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
pres.finalize();
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/******************************************************************
|
|
228
|
+
* Generalized Hamming distances
|
|
229
|
+
******************************************************************/
|
|
230
|
+
|
|
231
|
+
template <class HammingComputer>
|
|
232
|
+
void hamming_dis_inner_loop(
|
|
233
|
+
const uint8_t* __restrict ca,
|
|
234
|
+
const uint8_t* __restrict cb,
|
|
235
|
+
size_t nb,
|
|
236
|
+
size_t code_size,
|
|
237
|
+
int k,
|
|
238
|
+
hamdis_t* __restrict bh_val_,
|
|
239
|
+
int64_t* __restrict bh_ids_) {
|
|
240
|
+
HammingComputer hc(ca, code_size);
|
|
241
|
+
|
|
242
|
+
for (size_t j = 0; j < nb; j++) {
|
|
243
|
+
int ndiff = hc.hamming(cb);
|
|
244
|
+
cb += code_size;
|
|
245
|
+
if (ndiff < bh_val_[0]) {
|
|
246
|
+
maxheap_replace_top<hamdis_t>(k, bh_val_, bh_ids_, ndiff, j);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
void generalized_hammings_knn_hc_impl(
|
|
252
|
+
int_maxheap_array_t* __restrict ha,
|
|
253
|
+
const uint8_t* __restrict a,
|
|
254
|
+
const uint8_t* __restrict b,
|
|
255
|
+
size_t nb,
|
|
256
|
+
size_t code_size,
|
|
257
|
+
int ordered) {
|
|
258
|
+
int na = ha->nh;
|
|
259
|
+
int k = ha->k;
|
|
260
|
+
|
|
261
|
+
if (ordered) {
|
|
262
|
+
ha->heapify();
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
#pragma omp parallel for
|
|
266
|
+
for (int i = 0; i < na; i++) {
|
|
267
|
+
const uint8_t* __restrict ca = a + i * code_size;
|
|
268
|
+
const uint8_t* __restrict cb = b;
|
|
269
|
+
|
|
270
|
+
hamdis_t* __restrict bh_val_ = ha->val + i * k;
|
|
271
|
+
int64_t* __restrict bh_ids_ = ha->ids + i * k;
|
|
272
|
+
|
|
273
|
+
switch (code_size) {
|
|
274
|
+
case 8:
|
|
275
|
+
hamming_dis_inner_loop<GenHammingComputer8_tpl<THE_SIMD_LEVEL>>(
|
|
276
|
+
ca, cb, nb, 8, k, bh_val_, bh_ids_);
|
|
277
|
+
break;
|
|
278
|
+
case 16:
|
|
279
|
+
hamming_dis_inner_loop<
|
|
280
|
+
GenHammingComputer16_tpl<THE_SIMD_LEVEL>>(
|
|
281
|
+
ca, cb, nb, 16, k, bh_val_, bh_ids_);
|
|
282
|
+
break;
|
|
283
|
+
case 32:
|
|
284
|
+
hamming_dis_inner_loop<
|
|
285
|
+
GenHammingComputer32_tpl<THE_SIMD_LEVEL>>(
|
|
286
|
+
ca, cb, nb, 32, k, bh_val_, bh_ids_);
|
|
287
|
+
break;
|
|
288
|
+
default:
|
|
289
|
+
hamming_dis_inner_loop<
|
|
290
|
+
GenHammingComputerM8_tpl<THE_SIMD_LEVEL>>(
|
|
291
|
+
ca, cb, nb, code_size, k, bh_val_, bh_ids_);
|
|
292
|
+
break;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (ordered) {
|
|
297
|
+
ha->reorder();
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
} // anonymous namespace
|
|
302
|
+
|
|
303
|
+
/******************************************************************
|
|
304
|
+
* Entry point template specializations at THE_SIMD_LEVEL
|
|
305
|
+
******************************************************************/
|
|
306
|
+
|
|
307
|
+
#define C64(x) ((uint64_t*)x)
|
|
308
|
+
|
|
309
|
+
template <>
|
|
310
|
+
void hammings_knn_hc_fixSL<THE_SIMD_LEVEL>(
|
|
311
|
+
int_maxheap_array_t* ha,
|
|
312
|
+
const uint8_t* a,
|
|
313
|
+
const uint8_t* b,
|
|
314
|
+
size_t nb,
|
|
315
|
+
size_t ncodes,
|
|
316
|
+
int ordered,
|
|
317
|
+
ApproxTopK_mode_t approx_topk_mode,
|
|
318
|
+
const IDSelector* sel) {
|
|
319
|
+
with_HammingComputer<THE_SIMD_LEVEL>(ncodes, [&]<class HammingComputer>() {
|
|
320
|
+
hammings_knn_hc_impl<HammingComputer>(
|
|
321
|
+
ncodes, ha, a, b, nb, ordered, true, approx_topk_mode, sel);
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
template <>
|
|
326
|
+
void hammings_knn_mc_fixSL<THE_SIMD_LEVEL>(
|
|
327
|
+
const uint8_t* a,
|
|
328
|
+
const uint8_t* b,
|
|
329
|
+
size_t na,
|
|
330
|
+
size_t nb,
|
|
331
|
+
size_t k,
|
|
332
|
+
size_t ncodes,
|
|
333
|
+
int32_t* distances,
|
|
334
|
+
int64_t* labels,
|
|
335
|
+
const IDSelector* sel) {
|
|
336
|
+
with_HammingComputer<THE_SIMD_LEVEL>(ncodes, [&]<class HammingComputer>() {
|
|
337
|
+
hammings_knn_mc_impl<HammingComputer>(
|
|
338
|
+
ncodes, a, b, na, nb, k, distances, labels, sel);
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
template <>
|
|
343
|
+
void hamming_range_search_fixSL<THE_SIMD_LEVEL>(
|
|
344
|
+
const uint8_t* a,
|
|
345
|
+
const uint8_t* b,
|
|
346
|
+
size_t na,
|
|
347
|
+
size_t nb,
|
|
348
|
+
int radius,
|
|
349
|
+
size_t code_size,
|
|
350
|
+
RangeSearchResult* result,
|
|
351
|
+
const IDSelector* sel) {
|
|
352
|
+
with_HammingComputer<THE_SIMD_LEVEL>(
|
|
353
|
+
code_size, [&]<class HammingComputer>() {
|
|
354
|
+
hamming_range_search_impl<HammingComputer>(
|
|
355
|
+
a, b, na, nb, radius, code_size, result, sel);
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
template <>
|
|
360
|
+
void hammings_fixSL<THE_SIMD_LEVEL>(
|
|
361
|
+
const uint8_t* a,
|
|
362
|
+
const uint8_t* b,
|
|
363
|
+
size_t na,
|
|
364
|
+
size_t nb,
|
|
365
|
+
size_t ncodes,
|
|
366
|
+
hamdis_t* dis) {
|
|
367
|
+
FAISS_THROW_IF_NOT(ncodes % 8 == 0);
|
|
368
|
+
switch (ncodes) {
|
|
369
|
+
case 8:
|
|
370
|
+
hammings_impl<64>(C64(a), C64(b), na, nb, dis);
|
|
371
|
+
return;
|
|
372
|
+
case 16:
|
|
373
|
+
hammings_impl<128>(C64(a), C64(b), na, nb, dis);
|
|
374
|
+
return;
|
|
375
|
+
case 32:
|
|
376
|
+
hammings_impl<256>(C64(a), C64(b), na, nb, dis);
|
|
377
|
+
return;
|
|
378
|
+
case 64:
|
|
379
|
+
hammings_impl<512>(C64(a), C64(b), na, nb, dis);
|
|
380
|
+
return;
|
|
381
|
+
default:
|
|
382
|
+
hammings_impl_runtime(C64(a), C64(b), na, nb, ncodes * 8, dis);
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
template <>
|
|
388
|
+
void generalized_hammings_knn_hc_fixSL<THE_SIMD_LEVEL>(
|
|
389
|
+
int_maxheap_array_t* ha,
|
|
390
|
+
const uint8_t* a,
|
|
391
|
+
const uint8_t* b,
|
|
392
|
+
size_t nb,
|
|
393
|
+
size_t code_size,
|
|
394
|
+
int ordered) {
|
|
395
|
+
generalized_hammings_knn_hc_impl(ha, a, b, nb, code_size, ordered);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
template <>
|
|
399
|
+
void hamming_count_thres_fixSL<THE_SIMD_LEVEL>(
|
|
400
|
+
const uint8_t* bs1,
|
|
401
|
+
const uint8_t* bs2,
|
|
402
|
+
size_t n1,
|
|
403
|
+
size_t n2,
|
|
404
|
+
hamdis_t ht,
|
|
405
|
+
size_t ncodes,
|
|
406
|
+
size_t* nptr) {
|
|
407
|
+
switch (ncodes) {
|
|
408
|
+
case 8:
|
|
409
|
+
hamming_count_thres_impl<64>(C64(bs1), C64(bs2), n1, n2, ht, nptr);
|
|
410
|
+
return;
|
|
411
|
+
case 16:
|
|
412
|
+
hamming_count_thres_impl<128>(C64(bs1), C64(bs2), n1, n2, ht, nptr);
|
|
413
|
+
return;
|
|
414
|
+
case 32:
|
|
415
|
+
hamming_count_thres_impl<256>(C64(bs1), C64(bs2), n1, n2, ht, nptr);
|
|
416
|
+
return;
|
|
417
|
+
case 64:
|
|
418
|
+
hamming_count_thres_impl<512>(C64(bs1), C64(bs2), n1, n2, ht, nptr);
|
|
419
|
+
return;
|
|
420
|
+
default:
|
|
421
|
+
FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
template <>
|
|
426
|
+
void crosshamming_count_thres_fixSL<THE_SIMD_LEVEL>(
|
|
427
|
+
const uint8_t* dbs,
|
|
428
|
+
size_t n,
|
|
429
|
+
hamdis_t ht,
|
|
430
|
+
size_t ncodes,
|
|
431
|
+
size_t* nptr) {
|
|
432
|
+
switch (ncodes) {
|
|
433
|
+
case 8:
|
|
434
|
+
crosshamming_count_thres_impl<64>(C64(dbs), n, ht, nptr);
|
|
435
|
+
return;
|
|
436
|
+
case 16:
|
|
437
|
+
crosshamming_count_thres_impl<128>(C64(dbs), n, ht, nptr);
|
|
438
|
+
return;
|
|
439
|
+
case 32:
|
|
440
|
+
crosshamming_count_thres_impl<256>(C64(dbs), n, ht, nptr);
|
|
441
|
+
return;
|
|
442
|
+
case 64:
|
|
443
|
+
crosshamming_count_thres_impl<512>(C64(dbs), n, ht, nptr);
|
|
444
|
+
return;
|
|
445
|
+
default:
|
|
446
|
+
FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
template <>
|
|
451
|
+
size_t match_hamming_thres_fixSL<THE_SIMD_LEVEL>(
|
|
452
|
+
const uint8_t* bs1,
|
|
453
|
+
const uint8_t* bs2,
|
|
454
|
+
size_t n1,
|
|
455
|
+
size_t n2,
|
|
456
|
+
hamdis_t ht,
|
|
457
|
+
size_t ncodes,
|
|
458
|
+
int64_t* idx,
|
|
459
|
+
hamdis_t* dis) {
|
|
460
|
+
switch (ncodes) {
|
|
461
|
+
case 8:
|
|
462
|
+
return match_hamming_thres_impl<64>(
|
|
463
|
+
C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
|
|
464
|
+
case 16:
|
|
465
|
+
return match_hamming_thres_impl<128>(
|
|
466
|
+
C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
|
|
467
|
+
case 32:
|
|
468
|
+
return match_hamming_thres_impl<256>(
|
|
469
|
+
C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
|
|
470
|
+
case 64:
|
|
471
|
+
return match_hamming_thres_impl<512>(
|
|
472
|
+
C64(bs1), C64(bs2), n1, n2, ht, idx, dis);
|
|
473
|
+
default:
|
|
474
|
+
FAISS_THROW_FMT("not implemented for %zu bits", ncodes);
|
|
475
|
+
return 0;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
#undef C64
|
|
480
|
+
|
|
481
|
+
} // namespace faiss
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_ARM_NEON
|
|
9
|
+
|
|
10
|
+
#define THE_SIMD_LEVEL SIMDLevel::ARM_NEON
|
|
11
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
12
|
+
#include <faiss/utils/hamming_distance/hamming_computer-neon.h>
|
|
13
|
+
#include <faiss/utils/hamming_distance/hamming_impl.h>
|
|
14
|
+
|
|
15
|
+
#endif // COMPILE_SIMD_ARM_NEON
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_RISCV_RVV
|
|
9
|
+
|
|
10
|
+
#define THE_SIMD_LEVEL SIMDLevel::RISCV_RVV
|
|
11
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
12
|
+
#include <faiss/utils/hamming_distance/hamming_computer-rvv.h>
|
|
13
|
+
#include <faiss/utils/hamming_distance/hamming_impl.h>
|
|
14
|
+
|
|
15
|
+
#endif // COMPILE_SIMD_RISCV_RVV
|