faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -6,8 +6,11 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
#include <faiss/impl/FaissAssert.h>
|
|
9
|
-
#include <faiss/impl/
|
|
10
|
-
#include <faiss/impl/
|
|
9
|
+
#include <faiss/impl/fast_scan/LookupTableScaler.h>
|
|
10
|
+
#include <faiss/impl/fast_scan/decompose_qbs.h>
|
|
11
|
+
#include <faiss/impl/fast_scan/fast_scan.h>
|
|
12
|
+
#include <faiss/impl/fast_scan/simd_result_handlers.h>
|
|
13
|
+
#include <faiss/impl/simd_dispatch.h>
|
|
11
14
|
|
|
12
15
|
#include <array>
|
|
13
16
|
|
|
@@ -350,4 +353,168 @@ int pq4_pack_LUT_qbs_q_map(
|
|
|
350
353
|
return i0;
|
|
351
354
|
}
|
|
352
355
|
|
|
356
|
+
int pq4_qbs_to_nq(int qbs) {
|
|
357
|
+
int i0 = 0;
|
|
358
|
+
int qi = qbs;
|
|
359
|
+
while (qi) {
|
|
360
|
+
int nq = qi & 15;
|
|
361
|
+
qi >>= 4;
|
|
362
|
+
i0 += nq;
|
|
363
|
+
}
|
|
364
|
+
return i0;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
int pq4_preferred_qbs(int n) {
|
|
368
|
+
// from timings in P141901742, P141902828
|
|
369
|
+
static int map[12] = {
|
|
370
|
+
0, 1, 2, 3, 0x13, 0x23, 0x33, 0x223, 0x233, 0x333, 0x2233, 0x2333};
|
|
371
|
+
if (n <= 11) {
|
|
372
|
+
return map[n];
|
|
373
|
+
} else if (n <= 24) {
|
|
374
|
+
// override qbs: all first stages with 3 steps
|
|
375
|
+
// then 1 stage with the rest
|
|
376
|
+
int nbit = 4 * (n / 3); // nbits with only 3s
|
|
377
|
+
int qbs = 0x33333333 & ((1 << nbit) - 1);
|
|
378
|
+
qbs |= (n % 3) << nbit;
|
|
379
|
+
return qbs;
|
|
380
|
+
} else {
|
|
381
|
+
FAISS_THROW_FMT("number of queries %d too large", n);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
} // namespace faiss
|
|
386
|
+
|
|
387
|
+
/***************************************************************
|
|
388
|
+
* FastScanCodeScanner: NONE specialization + dispatch wrapper.
|
|
389
|
+
*
|
|
390
|
+
* The NONE specialization provides the scalar fallback.
|
|
391
|
+
* Per-SIMD specializations (AVX2, AVX512, ARM_NEON) are in
|
|
392
|
+
* impl-avx2.cpp, impl-avx512.cpp, impl-neon.cpp respectively.
|
|
393
|
+
***************************************************************/
|
|
394
|
+
|
|
395
|
+
#define THE_LEVEL_TO_DISPATCH SIMDLevel::NONE
|
|
396
|
+
#include <faiss/impl/fast_scan/dispatching.h> // IWYU pragma: keep
|
|
397
|
+
#include <faiss/impl/fast_scan/rabitq_dispatching.h> // IWYU pragma: keep
|
|
398
|
+
#undef THE_LEVEL_TO_DISPATCH
|
|
399
|
+
|
|
400
|
+
namespace faiss {
|
|
401
|
+
|
|
402
|
+
using namespace simd_result_handlers;
|
|
403
|
+
|
|
404
|
+
/***************************************************************
|
|
405
|
+
* accumulate_to_mem: NONE specialization + runtime dispatch.
|
|
406
|
+
***************************************************************/
|
|
407
|
+
|
|
408
|
+
template <>
|
|
409
|
+
void accumulate_to_mem_impl<SIMDLevel::NONE>(
|
|
410
|
+
int nq,
|
|
411
|
+
size_t ntotal2,
|
|
412
|
+
int nsq,
|
|
413
|
+
const uint8_t* codes,
|
|
414
|
+
const uint8_t* LUT,
|
|
415
|
+
uint16_t* accu) {
|
|
416
|
+
StoreResultHandler<SIMDLevel::NONE> handler(accu, ntotal2);
|
|
417
|
+
DummyScaler<SIMDLevel::NONE> scaler;
|
|
418
|
+
accumulate<SIMDLevel::NONE>(
|
|
419
|
+
nq, ntotal2, nsq, codes, LUT, handler, scaler, 32 * nsq / 2);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
void accumulate_to_mem(
|
|
423
|
+
int nq,
|
|
424
|
+
size_t ntotal2,
|
|
425
|
+
int nsq,
|
|
426
|
+
const uint8_t* codes,
|
|
427
|
+
const uint8_t* LUT,
|
|
428
|
+
uint16_t* accu) {
|
|
429
|
+
FAISS_THROW_IF_NOT(ntotal2 % 32 == 0);
|
|
430
|
+
with_simd_level([&]<SIMDLevel SL>() {
|
|
431
|
+
accumulate_to_mem_impl<SL>(nq, ntotal2, nsq, codes, LUT, accu);
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
} // namespace faiss
|
|
436
|
+
|
|
437
|
+
namespace faiss {
|
|
438
|
+
|
|
439
|
+
std::unique_ptr<FastScanCodeScanner> make_fast_scan_knn_scanner(
|
|
440
|
+
bool is_max,
|
|
441
|
+
int impl,
|
|
442
|
+
size_t nq,
|
|
443
|
+
size_t ntotal,
|
|
444
|
+
int64_t k,
|
|
445
|
+
float* distances,
|
|
446
|
+
int64_t* ids,
|
|
447
|
+
const IDSelector* sel,
|
|
448
|
+
bool with_id_map) {
|
|
449
|
+
return with_simd_level([&]<SIMDLevel SL>() {
|
|
450
|
+
return make_fast_scan_scanner_impl<SL>(
|
|
451
|
+
is_max, impl, nq, ntotal, k, distances, ids, sel, with_id_map);
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
std::unique_ptr<FastScanCodeScanner> make_range_scanner(
|
|
456
|
+
bool is_max,
|
|
457
|
+
RangeSearchResult& rres,
|
|
458
|
+
float radius,
|
|
459
|
+
size_t ntotal,
|
|
460
|
+
const IDSelector* sel) {
|
|
461
|
+
return with_simd_level([&]<SIMDLevel SL>() {
|
|
462
|
+
return make_range_scanner_impl<SL>(is_max, rres, radius, ntotal, sel);
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
std::unique_ptr<FastScanCodeScanner> make_partial_range_scanner(
|
|
467
|
+
bool is_max,
|
|
468
|
+
RangeSearchPartialResult& pres,
|
|
469
|
+
float radius,
|
|
470
|
+
size_t ntotal,
|
|
471
|
+
size_t q0,
|
|
472
|
+
size_t q1,
|
|
473
|
+
const IDSelector* sel) {
|
|
474
|
+
return with_simd_level([&]<SIMDLevel SL>() {
|
|
475
|
+
return make_partial_range_scanner_impl<SL>(
|
|
476
|
+
is_max, pres, radius, ntotal, q0, q1, sel);
|
|
477
|
+
});
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
std::unique_ptr<FastScanCodeScanner> rabitq_make_knn_scanner(
|
|
481
|
+
const IndexRaBitQFastScan* index,
|
|
482
|
+
bool is_max,
|
|
483
|
+
size_t nq,
|
|
484
|
+
int64_t k,
|
|
485
|
+
float* distances,
|
|
486
|
+
int64_t* ids,
|
|
487
|
+
const IDSelector* sel,
|
|
488
|
+
const FastScanDistancePostProcessing& context,
|
|
489
|
+
bool is_multi_bit) {
|
|
490
|
+
return with_simd_level([&]<SIMDLevel SL>() {
|
|
491
|
+
return rabitq_make_knn_scanner_impl<SL>(
|
|
492
|
+
index,
|
|
493
|
+
is_max,
|
|
494
|
+
nq,
|
|
495
|
+
k,
|
|
496
|
+
distances,
|
|
497
|
+
ids,
|
|
498
|
+
sel,
|
|
499
|
+
context,
|
|
500
|
+
is_multi_bit);
|
|
501
|
+
});
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
std::unique_ptr<FastScanCodeScanner> rabitq_ivf_make_knn_scanner(
|
|
505
|
+
bool is_max,
|
|
506
|
+
const IndexIVFRaBitQFastScan* index,
|
|
507
|
+
size_t nq,
|
|
508
|
+
size_t k,
|
|
509
|
+
float* distances,
|
|
510
|
+
int64_t* ids,
|
|
511
|
+
const IDSelector* sel,
|
|
512
|
+
const FastScanDistancePostProcessing* context,
|
|
513
|
+
bool multi_bit) {
|
|
514
|
+
return with_simd_level([&]<SIMDLevel SL>() {
|
|
515
|
+
return rabitq_ivf_make_knn_scanner_impl<SL>(
|
|
516
|
+
is_max, index, nq, k, distances, ids, sel, context, multi_bit);
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
|
|
353
520
|
} // namespace faiss
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <cstdint>
|
|
11
|
+
#include <cstdlib>
|
|
12
|
+
#include <memory>
|
|
13
|
+
|
|
14
|
+
#include <faiss/impl/CodePacker.h>
|
|
15
|
+
#include <faiss/utils/simd_levels.h>
|
|
16
|
+
|
|
17
|
+
/** PQ4 SIMD packing and accumulation functions
|
|
18
|
+
*
|
|
19
|
+
* The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
|
|
20
|
+
* and produces an output matrix for that. It is interesting for nq * nb <= 4,
|
|
21
|
+
* otherwise register spilling becomes too large.
|
|
22
|
+
*
|
|
23
|
+
* The implementation of these functions is spread over 3 cpp files to reduce
|
|
24
|
+
* parallel compile times. Templates are instantiated explicitly.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
namespace faiss {
|
|
28
|
+
|
|
29
|
+
struct IDSelector;
|
|
30
|
+
struct RangeSearchResult;
|
|
31
|
+
struct RangeSearchPartialResult;
|
|
32
|
+
struct SIMDResultHandlerToFloat;
|
|
33
|
+
|
|
34
|
+
/** Pack codes for consumption by the SIMD kernels.
|
|
35
|
+
* The unused bytes are set to 0.
|
|
36
|
+
*
|
|
37
|
+
* @param codes input codes, size (ntotal, ceil(M / 2))
|
|
38
|
+
* @param ntotal number of input codes
|
|
39
|
+
* @param nb output number of codes (ntotal rounded up to a multiple of
|
|
40
|
+
* bbs)
|
|
41
|
+
* @param nsq number of sub-quantizers (=M rounded up to a multiple of 2)
|
|
42
|
+
* @param bbs size of database blocks (multiple of 32)
|
|
43
|
+
* @param blocks output array, size nb * nsq / 2.
|
|
44
|
+
* @param code_stride optional stride between consecutive codes (0 = use
|
|
45
|
+
default (M + 1) / 2)
|
|
46
|
+
*/
|
|
47
|
+
void pq4_pack_codes(
|
|
48
|
+
const uint8_t* codes,
|
|
49
|
+
size_t ntotal,
|
|
50
|
+
size_t M,
|
|
51
|
+
size_t nb,
|
|
52
|
+
size_t bbs,
|
|
53
|
+
size_t nsq,
|
|
54
|
+
uint8_t* blocks,
|
|
55
|
+
size_t code_stride = 0);
|
|
56
|
+
|
|
57
|
+
/** Same as pack_codes but write in a given range of the output,
|
|
58
|
+
* leaving the rest untouched. Assumes allocated entries are 0 on input.
|
|
59
|
+
*
|
|
60
|
+
* @param codes input codes, size (i1 - i0, ceil(M / 2))
|
|
61
|
+
* @param i0 first output code to write
|
|
62
|
+
* @param i1 last output code to write
|
|
63
|
+
* @param blocks output array, size at least ceil(i1 / bbs) * bbs * nsq / 2
|
|
64
|
+
* @param code_stride optional stride between consecutive codes (0 = use
|
|
65
|
+
* default (M + 1) / 2)
|
|
66
|
+
* @param block_stride stride in bytes between consecutive blocks.
|
|
67
|
+
*/
|
|
68
|
+
void pq4_pack_codes_range(
|
|
69
|
+
const uint8_t* codes,
|
|
70
|
+
size_t M,
|
|
71
|
+
size_t i0,
|
|
72
|
+
size_t i1,
|
|
73
|
+
size_t bbs,
|
|
74
|
+
size_t nsq,
|
|
75
|
+
uint8_t* blocks,
|
|
76
|
+
size_t code_stride,
|
|
77
|
+
size_t block_stride);
|
|
78
|
+
|
|
79
|
+
/** get a single element from a packed codes table
|
|
80
|
+
*
|
|
81
|
+
* @param vector_id vector id
|
|
82
|
+
* @param sq subquantizer (< nsq)
|
|
83
|
+
*/
|
|
84
|
+
uint8_t pq4_get_packed_element(
|
|
85
|
+
const uint8_t* data,
|
|
86
|
+
size_t bbs,
|
|
87
|
+
size_t nsq,
|
|
88
|
+
size_t vector_id,
|
|
89
|
+
size_t sq);
|
|
90
|
+
|
|
91
|
+
/** set a single element "code" into a packed codes table
|
|
92
|
+
*
|
|
93
|
+
* @param vector_id vector id
|
|
94
|
+
* @param sq subquantizer (< nsq)
|
|
95
|
+
*/
|
|
96
|
+
void pq4_set_packed_element(
|
|
97
|
+
uint8_t* data,
|
|
98
|
+
uint8_t code,
|
|
99
|
+
size_t bbs,
|
|
100
|
+
size_t nsq,
|
|
101
|
+
size_t vector_id,
|
|
102
|
+
size_t sq);
|
|
103
|
+
|
|
104
|
+
/** CodePacker API for the PQ4 fast-scan */
|
|
105
|
+
struct CodePackerPQ4 : CodePacker {
|
|
106
|
+
size_t nsq;
|
|
107
|
+
|
|
108
|
+
CodePackerPQ4(size_t nsq, size_t bbs);
|
|
109
|
+
|
|
110
|
+
CodePacker* clone() const final;
|
|
111
|
+
|
|
112
|
+
void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
|
|
113
|
+
const final;
|
|
114
|
+
void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
|
|
115
|
+
const final;
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
/** Pack Look-up table for consumption by the kernel.
|
|
119
|
+
*
|
|
120
|
+
* @param nq number of queries
|
|
121
|
+
* @param nsq number of sub-quantizers (multiple of 2)
|
|
122
|
+
* @param src input array, size (nq, 16)
|
|
123
|
+
* @param dest output array, size (nq, 16)
|
|
124
|
+
*/
|
|
125
|
+
void pq4_pack_LUT(int nq, int nsq, const uint8_t* src, uint8_t* dest);
|
|
126
|
+
|
|
127
|
+
/* compute the number of queries from a base-16 decomposition */
|
|
128
|
+
int pq4_qbs_to_nq(int qbs);
|
|
129
|
+
|
|
130
|
+
/** return the preferred decomposition in blocks for a nb of queries. */
|
|
131
|
+
int pq4_preferred_qbs(int nq);
|
|
132
|
+
|
|
133
|
+
/** Pack Look-up table for consumption by the kernel.
|
|
134
|
+
*
|
|
135
|
+
* @param qbs 4-bit encoded number of query blocks, the total number of
|
|
136
|
+
* queries handled (nq) is deduced from it
|
|
137
|
+
* @param nsq number of sub-quantizers (multiple of 2)
|
|
138
|
+
* @param src input array, size (nq, 16)
|
|
139
|
+
* @param dest output array, size (nq, 16)
|
|
140
|
+
* @return nq
|
|
141
|
+
*/
|
|
142
|
+
int pq4_pack_LUT_qbs(int fqbs, int nsq, const uint8_t* src, uint8_t* dest);
|
|
143
|
+
|
|
144
|
+
/** Same as pq4_pack_LUT_qbs, except the source vectors are remapped with q_map
|
|
145
|
+
*/
|
|
146
|
+
int pq4_pack_LUT_qbs_q_map(
|
|
147
|
+
int qbs,
|
|
148
|
+
int nsq,
|
|
149
|
+
const uint8_t* src,
|
|
150
|
+
const int* q_map,
|
|
151
|
+
uint8_t* dest);
|
|
152
|
+
|
|
153
|
+
/** Wrapper using simple StoreResultHandler
|
|
154
|
+
* and DummyScaler
|
|
155
|
+
*
|
|
156
|
+
* @param nq number of queries
|
|
157
|
+
* @param ntotal2 number of database elements (multiple of 32)
|
|
158
|
+
* @param nsq number of sub-quantizers (muliple of 2)
|
|
159
|
+
* @param codes packed codes array
|
|
160
|
+
* @param LUT packed look-up table
|
|
161
|
+
* @param accu array to store the results
|
|
162
|
+
*/
|
|
163
|
+
void accumulate_to_mem(
|
|
164
|
+
int nq,
|
|
165
|
+
size_t ntotal2,
|
|
166
|
+
int nsq,
|
|
167
|
+
const uint8_t* codes,
|
|
168
|
+
const uint8_t* LUT,
|
|
169
|
+
uint16_t* accu);
|
|
170
|
+
|
|
171
|
+
/// Per-SIMD specialization of accumulate_to_mem (defined in per-SIMD TUs)
|
|
172
|
+
template <SIMDLevel SL>
|
|
173
|
+
void accumulate_to_mem_impl(
|
|
174
|
+
int nq,
|
|
175
|
+
size_t ntotal2,
|
|
176
|
+
int nsq,
|
|
177
|
+
const uint8_t* codes,
|
|
178
|
+
const uint8_t* LUT,
|
|
179
|
+
uint16_t* accu);
|
|
180
|
+
|
|
181
|
+
/***************************************************************
|
|
182
|
+
* FastScanCodeScanner: virtual base that bundles handler + kernel
|
|
183
|
+
* behind the SIMD dispatch boundary. Per-SIMD TUs instantiate this
|
|
184
|
+
* with the correct SIMDLevel so that handler and kernel share the
|
|
185
|
+
* same SIMD types.
|
|
186
|
+
***************************************************************/
|
|
187
|
+
|
|
188
|
+
struct FastScanCodeScanner {
|
|
189
|
+
virtual ~FastScanCodeScanner() = default;
|
|
190
|
+
|
|
191
|
+
/// Access the underlying result handler (for begin/end/normalizer calls)
|
|
192
|
+
virtual SIMDResultHandlerToFloat* handler() = 0;
|
|
193
|
+
|
|
194
|
+
/// Run the search_1 accumulation loop (bbs > 32, multi-BB kernel)
|
|
195
|
+
virtual void accumulate_loop(
|
|
196
|
+
int nq,
|
|
197
|
+
size_t nb,
|
|
198
|
+
int bbs,
|
|
199
|
+
int nsq,
|
|
200
|
+
const uint8_t* codes,
|
|
201
|
+
const uint8_t* LUT,
|
|
202
|
+
int pq2x4_scale,
|
|
203
|
+
size_t block_stride) = 0;
|
|
204
|
+
|
|
205
|
+
/// Run the QBS accumulation loop (bbs == 32)
|
|
206
|
+
virtual void accumulate_loop_qbs(
|
|
207
|
+
int qbs,
|
|
208
|
+
size_t nb,
|
|
209
|
+
int nsq,
|
|
210
|
+
const uint8_t* codes,
|
|
211
|
+
const uint8_t* LUT,
|
|
212
|
+
int pq2x4_scale,
|
|
213
|
+
size_t block_stride) = 0;
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
/// Per-SIMD factory: explicitly specialized in each per-SIMD TU
|
|
217
|
+
/// (impl-avx2.cpp, impl-avx512.cpp, impl-neon.cpp, fast_scan.cpp for NONE).
|
|
218
|
+
/// Not called directly — use make_fast_scan_knn_scanner() instead.
|
|
219
|
+
template <SIMDLevel SL>
|
|
220
|
+
std::unique_ptr<FastScanCodeScanner> make_fast_scan_scanner_impl(
|
|
221
|
+
bool is_max,
|
|
222
|
+
int impl,
|
|
223
|
+
size_t nq,
|
|
224
|
+
size_t ntotal,
|
|
225
|
+
int64_t k,
|
|
226
|
+
float* distances,
|
|
227
|
+
int64_t* ids,
|
|
228
|
+
const IDSelector* sel,
|
|
229
|
+
bool with_id_map);
|
|
230
|
+
|
|
231
|
+
/// Runtime dispatch wrapper: selects the best available SIMD level
|
|
232
|
+
/// (via with_simd_level) and delegates to the corresponding
|
|
233
|
+
/// make_fast_scan_scanner_impl<SL> specialization.
|
|
234
|
+
std::unique_ptr<FastScanCodeScanner> make_fast_scan_knn_scanner(
|
|
235
|
+
bool is_max,
|
|
236
|
+
int impl,
|
|
237
|
+
size_t nq,
|
|
238
|
+
size_t ntotal,
|
|
239
|
+
int64_t k,
|
|
240
|
+
float* distances,
|
|
241
|
+
int64_t* ids,
|
|
242
|
+
const IDSelector* sel,
|
|
243
|
+
bool with_id_map = false);
|
|
244
|
+
|
|
245
|
+
/// Per-SIMD range scanner factories (defined in per-SIMD TUs via dispatching.h)
|
|
246
|
+
template <SIMDLevel SL>
|
|
247
|
+
std::unique_ptr<FastScanCodeScanner> make_range_scanner_impl(
|
|
248
|
+
bool is_max,
|
|
249
|
+
RangeSearchResult& rres,
|
|
250
|
+
float radius,
|
|
251
|
+
size_t ntotal,
|
|
252
|
+
const IDSelector* sel);
|
|
253
|
+
|
|
254
|
+
template <SIMDLevel SL>
|
|
255
|
+
std::unique_ptr<FastScanCodeScanner> make_partial_range_scanner_impl(
|
|
256
|
+
bool is_max,
|
|
257
|
+
RangeSearchPartialResult& pres,
|
|
258
|
+
float radius,
|
|
259
|
+
size_t ntotal,
|
|
260
|
+
size_t q0,
|
|
261
|
+
size_t q1,
|
|
262
|
+
const IDSelector* sel);
|
|
263
|
+
|
|
264
|
+
/// Runtime dispatch: range search scanner.
|
|
265
|
+
std::unique_ptr<FastScanCodeScanner> make_range_scanner(
|
|
266
|
+
bool is_max,
|
|
267
|
+
RangeSearchResult& rres,
|
|
268
|
+
float radius,
|
|
269
|
+
size_t ntotal,
|
|
270
|
+
const IDSelector* sel);
|
|
271
|
+
|
|
272
|
+
/// Runtime dispatch: partial range search scanner (per-thread).
|
|
273
|
+
std::unique_ptr<FastScanCodeScanner> make_partial_range_scanner(
|
|
274
|
+
bool is_max,
|
|
275
|
+
RangeSearchPartialResult& pres,
|
|
276
|
+
float radius,
|
|
277
|
+
size_t ntotal,
|
|
278
|
+
size_t q0,
|
|
279
|
+
size_t q1,
|
|
280
|
+
const IDSelector* sel);
|
|
281
|
+
|
|
282
|
+
/***************************************************************
|
|
283
|
+
* RaBitQ scanner factory: per-SIMD specializations live in
|
|
284
|
+
* rabitq_dispatching.h, included by each per-SIMD TU.
|
|
285
|
+
***************************************************************/
|
|
286
|
+
|
|
287
|
+
struct IndexRaBitQFastScan;
|
|
288
|
+
struct IndexIVFRaBitQFastScan;
|
|
289
|
+
struct FastScanDistancePostProcessing;
|
|
290
|
+
|
|
291
|
+
/// Per-SIMD factory (primary template; specializations in rabitq_dispatching.h)
|
|
292
|
+
template <SIMDLevel SL>
|
|
293
|
+
std::unique_ptr<FastScanCodeScanner> rabitq_make_knn_scanner_impl(
|
|
294
|
+
const IndexRaBitQFastScan* index,
|
|
295
|
+
bool is_max,
|
|
296
|
+
size_t nq,
|
|
297
|
+
int64_t k,
|
|
298
|
+
float* distances,
|
|
299
|
+
int64_t* ids,
|
|
300
|
+
const IDSelector* sel,
|
|
301
|
+
const FastScanDistancePostProcessing& context,
|
|
302
|
+
bool is_multi_bit);
|
|
303
|
+
|
|
304
|
+
/// Runtime dispatch wrapper for rabitq_make_knn_scanner_impl
|
|
305
|
+
std::unique_ptr<FastScanCodeScanner> rabitq_make_knn_scanner(
|
|
306
|
+
const IndexRaBitQFastScan* index,
|
|
307
|
+
bool is_max,
|
|
308
|
+
size_t nq,
|
|
309
|
+
int64_t k,
|
|
310
|
+
float* distances,
|
|
311
|
+
int64_t* ids,
|
|
312
|
+
const IDSelector* sel,
|
|
313
|
+
const FastScanDistancePostProcessing& context,
|
|
314
|
+
bool is_multi_bit);
|
|
315
|
+
|
|
316
|
+
/// Per-SIMD IVF RaBitQ scanner factory.
|
|
317
|
+
template <SIMDLevel SL>
|
|
318
|
+
std::unique_ptr<FastScanCodeScanner> rabitq_ivf_make_knn_scanner_impl(
|
|
319
|
+
bool is_max,
|
|
320
|
+
const IndexIVFRaBitQFastScan* index,
|
|
321
|
+
size_t nq,
|
|
322
|
+
size_t k,
|
|
323
|
+
float* distances,
|
|
324
|
+
int64_t* ids,
|
|
325
|
+
const IDSelector* sel,
|
|
326
|
+
const FastScanDistancePostProcessing* context,
|
|
327
|
+
bool multi_bit);
|
|
328
|
+
|
|
329
|
+
/// Runtime dispatch wrapper for IVF RaBitQ scanner.
|
|
330
|
+
std::unique_ptr<FastScanCodeScanner> rabitq_ivf_make_knn_scanner(
|
|
331
|
+
bool is_max,
|
|
332
|
+
const IndexIVFRaBitQFastScan* index,
|
|
333
|
+
size_t nq,
|
|
334
|
+
size_t k,
|
|
335
|
+
float* distances,
|
|
336
|
+
int64_t* ids,
|
|
337
|
+
const IDSelector* sel,
|
|
338
|
+
const FastScanDistancePostProcessing* context,
|
|
339
|
+
bool multi_bit);
|
|
340
|
+
|
|
341
|
+
} // namespace faiss
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_AVX2
|
|
9
|
+
|
|
10
|
+
#define THE_LEVEL_TO_DISPATCH SIMDLevel::AVX2
|
|
11
|
+
#include <faiss/impl/fast_scan/dispatching.h> // IWYU pragma: keep
|
|
12
|
+
#include <faiss/impl/fast_scan/rabitq_dispatching.h> // IWYU pragma: keep
|
|
13
|
+
|
|
14
|
+
#include <faiss/impl/fast_scan/decompose_qbs.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
using namespace simd_result_handlers;
|
|
19
|
+
|
|
20
|
+
template <>
|
|
21
|
+
void accumulate_to_mem_impl<SIMDLevel::AVX2>(
|
|
22
|
+
int nq,
|
|
23
|
+
size_t ntotal2,
|
|
24
|
+
int nsq,
|
|
25
|
+
const uint8_t* codes,
|
|
26
|
+
const uint8_t* LUT,
|
|
27
|
+
uint16_t* accu) {
|
|
28
|
+
StoreResultHandler<SIMDLevel::AVX2> handler(accu, ntotal2);
|
|
29
|
+
DummyScaler<SIMDLevel::AVX2> scaler;
|
|
30
|
+
accumulate<SIMDLevel::AVX2>(
|
|
31
|
+
nq, ntotal2, nsq, codes, LUT, handler, scaler, 32 * nsq / 2);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
} // namespace faiss
|
|
35
|
+
|
|
36
|
+
#endif // COMPILE_SIMD_AVX2
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_AVX512
|
|
9
|
+
|
|
10
|
+
#define THE_LEVEL_TO_DISPATCH SIMDLevel::AVX512
|
|
11
|
+
#include <faiss/impl/fast_scan/dispatching.h> // IWYU pragma: keep
|
|
12
|
+
#include <faiss/impl/fast_scan/rabitq_dispatching.h> // IWYU pragma: keep
|
|
13
|
+
|
|
14
|
+
#include <faiss/impl/fast_scan/decompose_qbs.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
using namespace simd_result_handlers;
|
|
19
|
+
|
|
20
|
+
template <>
|
|
21
|
+
void accumulate_to_mem_impl<SIMDLevel::AVX512>(
|
|
22
|
+
int nq,
|
|
23
|
+
size_t ntotal2,
|
|
24
|
+
int nsq,
|
|
25
|
+
const uint8_t* codes,
|
|
26
|
+
const uint8_t* LUT,
|
|
27
|
+
uint16_t* accu) {
|
|
28
|
+
// Use AVX2-level handler (256-bit StoreResultHandler) since the 512-bit
|
|
29
|
+
// kernels reduce to AVX2-level simd16uint16 via FixedStorage512.
|
|
30
|
+
StoreResultHandler<SIMDLevel::AVX2> handler(accu, ntotal2);
|
|
31
|
+
DummyScaler<SIMDLevel::AVX512> scaler;
|
|
32
|
+
// kernel_accumulate_block in decompose_qbs.h selects pq4_kernel_qbs_512
|
|
33
|
+
// via #ifdef __AVX512F__ (which is set for this TU).
|
|
34
|
+
accumulate<SIMDLevel::AVX512>(
|
|
35
|
+
nq, ntotal2, nsq, codes, LUT, handler, scaler, 32 * nsq / 2);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
} // namespace faiss
|
|
39
|
+
|
|
40
|
+
#endif // COMPILE_SIMD_AVX512
|