faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_AVX2
|
|
9
|
+
|
|
10
|
+
#define THE_SIMD_LEVEL SIMDLevel::AVX2
|
|
11
|
+
|
|
12
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
13
|
+
#include <faiss/utils/hamming_distance/hamming_computer-avx2.h>
|
|
14
|
+
|
|
15
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
16
|
+
#include <faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h>
|
|
17
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
18
|
+
#include <faiss/impl/binary_hamming/IndexBinaryHash_impl.h>
|
|
19
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
20
|
+
#include <faiss/impl/binary_hamming/IndexBinaryIVF_impl.h>
|
|
21
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
22
|
+
#include <faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h>
|
|
23
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
24
|
+
#include <faiss/impl/binary_hamming/IndexPQ_impl.h>
|
|
25
|
+
|
|
26
|
+
#endif // COMPILE_SIMD_AVX2
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_AVX512
|
|
9
|
+
|
|
10
|
+
#define THE_SIMD_LEVEL SIMDLevel::AVX512
|
|
11
|
+
|
|
12
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
13
|
+
#include <faiss/utils/hamming_distance/hamming_computer-avx512.h>
|
|
14
|
+
|
|
15
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
16
|
+
#include <faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h>
|
|
17
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
18
|
+
#include <faiss/impl/binary_hamming/IndexBinaryHash_impl.h>
|
|
19
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
20
|
+
#include <faiss/impl/binary_hamming/IndexBinaryIVF_impl.h>
|
|
21
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
22
|
+
#include <faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h>
|
|
23
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
24
|
+
#include <faiss/impl/binary_hamming/IndexPQ_impl.h>
|
|
25
|
+
|
|
26
|
+
#endif // COMPILE_SIMD_AVX512
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/utils/simd_levels.h>
|
|
11
|
+
|
|
12
|
+
#include <cstddef>
|
|
13
|
+
#include <cstdint>
|
|
14
|
+
|
|
15
|
+
namespace faiss {
|
|
16
|
+
|
|
17
|
+
// Forward declarations
|
|
18
|
+
struct DistanceComputer;
|
|
19
|
+
struct InvertedListScanner;
|
|
20
|
+
struct BinaryInvertedListScanner;
|
|
21
|
+
struct IndexBinaryFlat;
|
|
22
|
+
struct IndexBinaryHash;
|
|
23
|
+
struct IndexBinaryMultiHash;
|
|
24
|
+
struct IndexBinaryIVF;
|
|
25
|
+
struct IndexIVFSpectralHash;
|
|
26
|
+
struct IndexPQ;
|
|
27
|
+
struct SearchParametersIVF;
|
|
28
|
+
using IVFSearchParameters = SearchParametersIVF;
|
|
29
|
+
struct RangeQueryResult;
|
|
30
|
+
using idx_t = int64_t;
|
|
31
|
+
|
|
32
|
+
/** @name IndexBinaryHNSW dispatch
|
|
33
|
+
* @{ */
|
|
34
|
+
template <SIMDLevel SL>
|
|
35
|
+
DistanceComputer* make_binary_hnsw_distance_computer_fixSL(
|
|
36
|
+
int code_size,
|
|
37
|
+
IndexBinaryFlat* flat_storage);
|
|
38
|
+
/** @} */
|
|
39
|
+
|
|
40
|
+
/** @name IndexBinaryIVF dispatch
|
|
41
|
+
* @{ */
|
|
42
|
+
template <SIMDLevel SL>
|
|
43
|
+
BinaryInvertedListScanner* make_binary_ivf_scanner_fixSL(
|
|
44
|
+
size_t code_size,
|
|
45
|
+
bool store_pairs);
|
|
46
|
+
|
|
47
|
+
template <SIMDLevel SL>
|
|
48
|
+
void search_knn_hamming_per_invlist_fixSL(
|
|
49
|
+
int code_size,
|
|
50
|
+
const IndexBinaryIVF* ivf,
|
|
51
|
+
size_t n,
|
|
52
|
+
const uint8_t* x,
|
|
53
|
+
idx_t k,
|
|
54
|
+
const idx_t* keys_in,
|
|
55
|
+
const int32_t* coarse_dis,
|
|
56
|
+
int32_t* distances,
|
|
57
|
+
idx_t* labels,
|
|
58
|
+
bool store_pairs,
|
|
59
|
+
const IVFSearchParameters* params);
|
|
60
|
+
|
|
61
|
+
template <SIMDLevel SL>
|
|
62
|
+
void search_knn_hamming_count_fixSL(
|
|
63
|
+
int code_size,
|
|
64
|
+
bool store_pairs,
|
|
65
|
+
const IndexBinaryIVF* ivf,
|
|
66
|
+
size_t nx,
|
|
67
|
+
const uint8_t* x,
|
|
68
|
+
const idx_t* keys,
|
|
69
|
+
int k,
|
|
70
|
+
int32_t* distances,
|
|
71
|
+
idx_t* labels,
|
|
72
|
+
const IVFSearchParameters* params);
|
|
73
|
+
/** @} */
|
|
74
|
+
|
|
75
|
+
/** @name IndexBinaryHash dispatch
|
|
76
|
+
* @{ */
|
|
77
|
+
template <SIMDLevel SL>
|
|
78
|
+
void binary_hash_knn_search_fixSL(
|
|
79
|
+
const IndexBinaryHash& index,
|
|
80
|
+
const uint8_t* q,
|
|
81
|
+
idx_t k,
|
|
82
|
+
int32_t* heap_sim,
|
|
83
|
+
idx_t* heap_ids,
|
|
84
|
+
size_t& n0,
|
|
85
|
+
size_t& nlist,
|
|
86
|
+
size_t& ndis);
|
|
87
|
+
|
|
88
|
+
template <SIMDLevel SL>
|
|
89
|
+
void binary_hash_range_search_fixSL(
|
|
90
|
+
const IndexBinaryHash& index,
|
|
91
|
+
const uint8_t* q,
|
|
92
|
+
int radius,
|
|
93
|
+
RangeQueryResult& qres,
|
|
94
|
+
size_t& n0,
|
|
95
|
+
size_t& nlist,
|
|
96
|
+
size_t& ndis);
|
|
97
|
+
|
|
98
|
+
template <SIMDLevel SL>
|
|
99
|
+
void binary_multihash_knn_search_fixSL(
|
|
100
|
+
const IndexBinaryMultiHash& index,
|
|
101
|
+
const uint8_t* q,
|
|
102
|
+
idx_t k,
|
|
103
|
+
int32_t* heap_sim,
|
|
104
|
+
idx_t* heap_ids,
|
|
105
|
+
size_t& n0,
|
|
106
|
+
size_t& nlist,
|
|
107
|
+
size_t& ndis);
|
|
108
|
+
|
|
109
|
+
template <SIMDLevel SL>
|
|
110
|
+
void binary_multihash_range_search_fixSL(
|
|
111
|
+
const IndexBinaryMultiHash& index,
|
|
112
|
+
const uint8_t* q,
|
|
113
|
+
int radius,
|
|
114
|
+
RangeQueryResult& qres,
|
|
115
|
+
size_t& n0,
|
|
116
|
+
size_t& nlist,
|
|
117
|
+
size_t& ndis);
|
|
118
|
+
/** @} */
|
|
119
|
+
|
|
120
|
+
/** @name IndexIVFSpectralHash dispatch
|
|
121
|
+
* @{ */
|
|
122
|
+
template <SIMDLevel SL>
|
|
123
|
+
InvertedListScanner* make_spectral_hash_scanner_fixSL(
|
|
124
|
+
int code_size,
|
|
125
|
+
const IndexIVFSpectralHash* index,
|
|
126
|
+
bool store_pairs);
|
|
127
|
+
/** @} */
|
|
128
|
+
|
|
129
|
+
/** @name IndexPQ polysemous dispatch
|
|
130
|
+
* @{ */
|
|
131
|
+
template <SIMDLevel SL>
|
|
132
|
+
size_t polysemous_inner_loop_fixSL(
|
|
133
|
+
int code_size,
|
|
134
|
+
const IndexPQ* index,
|
|
135
|
+
const float* dis_table_qi,
|
|
136
|
+
const uint8_t* q_code,
|
|
137
|
+
size_t k,
|
|
138
|
+
float* heap_dis,
|
|
139
|
+
int64_t* heap_ids,
|
|
140
|
+
int ht);
|
|
141
|
+
/** @} */
|
|
142
|
+
|
|
143
|
+
} // namespace faiss
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_ARM_NEON
|
|
9
|
+
|
|
10
|
+
#define THE_SIMD_LEVEL SIMDLevel::ARM_NEON
|
|
11
|
+
|
|
12
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
13
|
+
#include <faiss/utils/hamming_distance/hamming_computer-neon.h>
|
|
14
|
+
|
|
15
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
16
|
+
#include <faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h>
|
|
17
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
18
|
+
#include <faiss/impl/binary_hamming/IndexBinaryHash_impl.h>
|
|
19
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
20
|
+
#include <faiss/impl/binary_hamming/IndexBinaryIVF_impl.h>
|
|
21
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
22
|
+
#include <faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h>
|
|
23
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
24
|
+
#include <faiss/impl/binary_hamming/IndexPQ_impl.h>
|
|
25
|
+
|
|
26
|
+
#endif // COMPILE_SIMD_ARM_NEON
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#ifdef COMPILE_SIMD_RISCV_RVV
|
|
9
|
+
|
|
10
|
+
#define THE_SIMD_LEVEL SIMDLevel::RISCV_RVV
|
|
11
|
+
|
|
12
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
13
|
+
#include <faiss/utils/hamming_distance/hamming_computer-rvv.h>
|
|
14
|
+
|
|
15
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
16
|
+
#include <faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h>
|
|
17
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
18
|
+
#include <faiss/impl/binary_hamming/IndexBinaryHash_impl.h>
|
|
19
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
20
|
+
#include <faiss/impl/binary_hamming/IndexBinaryIVF_impl.h>
|
|
21
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
22
|
+
#include <faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h>
|
|
23
|
+
// NOLINTNEXTLINE(facebook-hte-InlineHeader)
|
|
24
|
+
#include <faiss/impl/binary_hamming/IndexPQ_impl.h>
|
|
25
|
+
|
|
26
|
+
#endif // COMPILE_SIMD_RISCV_RVV
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include <cstdio>
|
|
11
11
|
|
|
12
12
|
#include <faiss/IndexIVF.h>
|
|
13
|
+
#include <faiss/impl/InvertedListScannerStats.h>
|
|
13
14
|
#include <faiss/impl/ResultHandler.h>
|
|
14
15
|
|
|
15
16
|
/* This is the inner loop of the inverted list scanners. The default version
|
|
@@ -45,12 +46,16 @@ size_t run_scan_codes1(
|
|
|
45
46
|
}
|
|
46
47
|
}
|
|
47
48
|
|
|
49
|
+
// post-IDSelector: distance is about to be computed for this code.
|
|
50
|
+
handler.stats.scan_cnt++;
|
|
48
51
|
float dis = scanner.distance_to_code(codes); // will be inlined if final
|
|
49
52
|
if (C::cmp(threshold, dis)) {
|
|
50
53
|
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
|
51
|
-
handler.add_result(dis, id)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
+
if (handler.add_result(dis, id)) {
|
|
55
|
+
handler.stats.nheap_updates++;
|
|
56
|
+
nup++;
|
|
57
|
+
threshold = handler.threshold;
|
|
58
|
+
}
|
|
54
59
|
}
|
|
55
60
|
codes += code_size;
|
|
56
61
|
}
|
|
@@ -11,9 +11,6 @@
|
|
|
11
11
|
|
|
12
12
|
namespace faiss {
|
|
13
13
|
|
|
14
|
-
// Forward declarations
|
|
15
|
-
struct NormTableScaler;
|
|
16
|
-
|
|
17
14
|
namespace rabitq_utils {
|
|
18
15
|
struct QueryFactorsData;
|
|
19
16
|
}
|
|
@@ -22,8 +19,10 @@ struct QueryFactorsData;
|
|
|
22
19
|
* Simple context object that holds processors for FastScan operations.
|
|
23
20
|
* */
|
|
24
21
|
struct FastScanDistancePostProcessing {
|
|
25
|
-
/// Norm scaling processor for Additive Quantizers
|
|
26
|
-
|
|
22
|
+
/// Norm scaling processor for Additive Quantizers.
|
|
23
|
+
/// The scale is encoded in a 2x4 bit PQ table, then scaled by this int.
|
|
24
|
+
/// Set to 0 if unused.
|
|
25
|
+
int pq2x4_scale = 0;
|
|
27
26
|
|
|
28
27
|
/// Query factors data pointer for RaBitQ (nullptr if not needed)
|
|
29
28
|
/// This pointer should point to the beginning of the relevant
|
|
@@ -36,12 +35,20 @@ struct FastScanDistancePostProcessing {
|
|
|
36
35
|
/// Set to 0 to use index->nprobe as fallback.
|
|
37
36
|
size_t nprobe = 0;
|
|
38
37
|
|
|
38
|
+
/// RaBitQ query quantization bits override.
|
|
39
|
+
/// Set to 0 to use the index default (index->qb).
|
|
40
|
+
uint8_t qb = 0;
|
|
41
|
+
|
|
42
|
+
/// RaBitQ centered scalar quantizer override.
|
|
43
|
+
/// Only used when qb > 0 (i.e., when params are overridden).
|
|
44
|
+
bool centered = false;
|
|
45
|
+
|
|
39
46
|
/// Default constructor - no processing
|
|
40
47
|
FastScanDistancePostProcessing() = default;
|
|
41
48
|
|
|
42
49
|
/// Check if norm scaling is enabled
|
|
43
50
|
bool has_norm_scaling() const {
|
|
44
|
-
return
|
|
51
|
+
return pq2x4_scale != 0;
|
|
45
52
|
}
|
|
46
53
|
|
|
47
54
|
/// Check if query factors processing is enabled
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
#include <cstdint>
|
|
11
11
|
#include <cstdlib>
|
|
12
12
|
|
|
13
|
-
#include <faiss/
|
|
13
|
+
#include <faiss/impl/simdlib/simdlib_dispatch.h>
|
|
14
14
|
|
|
15
15
|
/*******************************************
|
|
16
16
|
* The Scaler objects are used to specialize the handling of the
|
|
@@ -20,8 +20,15 @@
|
|
|
20
20
|
namespace faiss {
|
|
21
21
|
|
|
22
22
|
/// no-op handler
|
|
23
|
+
template <SIMDLevel SL = SINGLE_SIMD_LEVEL>
|
|
23
24
|
struct DummyScaler {
|
|
24
25
|
static constexpr int nscale = 0;
|
|
26
|
+
static constexpr SIMDLevel SL256 = simd256_level_selector<SL>::value;
|
|
27
|
+
static constexpr SIMDLevel SL512 = simd512_level_selector<SL>::value;
|
|
28
|
+
using simd32uint8 = simd32uint8_tpl<SL256>;
|
|
29
|
+
using simd16uint16 = simd16uint16_tpl<SL256>;
|
|
30
|
+
using simd64uint8 = simd64uint8_tpl<SL512>;
|
|
31
|
+
using simd32uint16 = simd32uint16_tpl<SL512>;
|
|
25
32
|
|
|
26
33
|
inline simd32uint8 lookup(const simd32uint8&, const simd32uint8&) const {
|
|
27
34
|
FAISS_THROW_MSG("DummyScaler::lookup should not be called.");
|
|
@@ -38,7 +45,6 @@ struct DummyScaler {
|
|
|
38
45
|
return simd16uint16(0);
|
|
39
46
|
}
|
|
40
47
|
|
|
41
|
-
#ifdef __AVX512F__
|
|
42
48
|
inline simd64uint8 lookup(const simd64uint8&, const simd64uint8&) const {
|
|
43
49
|
FAISS_THROW_MSG("DummyScaler::lookup should not be called.");
|
|
44
50
|
return simd64uint8(0);
|
|
@@ -53,7 +59,6 @@ struct DummyScaler {
|
|
|
53
59
|
FAISS_THROW_MSG("DummyScaler::scale_hi should not be called.");
|
|
54
60
|
return simd32uint16(0);
|
|
55
61
|
}
|
|
56
|
-
#endif
|
|
57
62
|
|
|
58
63
|
template <class dist_t>
|
|
59
64
|
inline dist_t scale_one(const dist_t&) const {
|
|
@@ -64,8 +69,16 @@ struct DummyScaler {
|
|
|
64
69
|
|
|
65
70
|
/// consumes 2x4 bits to encode a norm as a scalar additive quantizer
|
|
66
71
|
/// the norm is scaled because its range is larger than other components
|
|
72
|
+
template <SIMDLevel SL = SINGLE_SIMD_LEVEL>
|
|
67
73
|
struct NormTableScaler {
|
|
68
74
|
static constexpr int nscale = 2;
|
|
75
|
+
static constexpr SIMDLevel SL256 = simd256_level_selector<SL>::value;
|
|
76
|
+
static constexpr SIMDLevel SL512 = simd512_level_selector<SL>::value;
|
|
77
|
+
using simd32uint8 = simd32uint8_tpl<SL256>;
|
|
78
|
+
using simd16uint16 = simd16uint16_tpl<SL256>;
|
|
79
|
+
using simd64uint8 = simd64uint8_tpl<SL512>;
|
|
80
|
+
using simd32uint16 = simd32uint16_tpl<SL512>;
|
|
81
|
+
|
|
69
82
|
int scale_int;
|
|
70
83
|
simd16uint16 scale_simd;
|
|
71
84
|
|
|
@@ -84,7 +97,6 @@ struct NormTableScaler {
|
|
|
84
97
|
return (simd16uint16(res) >> 8) * scale_simd;
|
|
85
98
|
}
|
|
86
99
|
|
|
87
|
-
#ifdef __AVX512F__
|
|
88
100
|
inline simd64uint8 lookup(const simd64uint8& lut, const simd64uint8& c)
|
|
89
101
|
const {
|
|
90
102
|
return lut.lookup_4_lanes(c);
|
|
@@ -99,7 +111,6 @@ struct NormTableScaler {
|
|
|
99
111
|
auto scale_simd_wide = simd32uint16(scale_simd, scale_simd);
|
|
100
112
|
return (simd32uint16(res) >> 8) * scale_simd_wide;
|
|
101
113
|
}
|
|
102
|
-
#endif
|
|
103
114
|
|
|
104
115
|
// for non-SIMD implem 2, 3, 4
|
|
105
116
|
template <class dist_t>
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @file accumulate_loops.h
|
|
12
|
+
* @brief Shared accumulation loop helpers for fast-scan search paths.
|
|
13
|
+
*
|
|
14
|
+
* Contains:
|
|
15
|
+
* - accumulate_fixed_blocks / pq4_accumulate_loop_fixed_scaler
|
|
16
|
+
* (search_1 multi-BB path, bbs > 32)
|
|
17
|
+
* - accumulate_q_4step_256 / pq4_accumulate_loop_qbs_fixed_scaler_256
|
|
18
|
+
* (QBS path, bbs == 32, 256-bit kernel only)
|
|
19
|
+
*
|
|
20
|
+
* The QBS helpers here use pq4_kernel_qbs_256 exclusively (not
|
|
21
|
+
* decompose_qbs.h) because decompose_qbs.h includes kernels_simd512.h
|
|
22
|
+
* whose 512-bit types need explicit SIMD levels. The 512-bit QBS path
|
|
23
|
+
* lives in accumulate_loops_512.h, used by the AVX512 per-ISA TU.
|
|
24
|
+
*
|
|
25
|
+
* All functions live in `namespace faiss` (not anonymous) so they can be
|
|
26
|
+
* shared by both the per-SIMD TU dispatcher (dispatching.h) and the old
|
|
27
|
+
* free-function search paths (pq4_fast_scan_search_1.cpp).
|
|
28
|
+
*
|
|
29
|
+
* The QBS helpers here always use pq4_kernel_qbs_256 (never 512-bit).
|
|
30
|
+
* This is required for the per-SIMD DD TUs where SINGLE_SIMD_LEVEL=NONE
|
|
31
|
+
* leaves 512-bit types empty. The old pq4_fast_scan_search_qbs.cpp
|
|
32
|
+
* continues to use decompose_qbs.h which includes both 256 and 512 paths.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
#include <cassert>
|
|
36
|
+
|
|
37
|
+
#include <faiss/impl/FaissAssert.h>
|
|
38
|
+
#include <faiss/impl/fast_scan/LookupTableScaler.h>
|
|
39
|
+
#include <faiss/impl/fast_scan/kernels_simd256.h>
|
|
40
|
+
#include <faiss/impl/fast_scan/simd_result_handlers.h>
|
|
41
|
+
|
|
42
|
+
namespace faiss {
|
|
43
|
+
|
|
44
|
+
using namespace simd_result_handlers;
|
|
45
|
+
|
|
46
|
+
/***************************************************************
|
|
47
|
+
* Search_1 path helpers (multi-BB kernel, bbs > 32)
|
|
48
|
+
***************************************************************/
|
|
49
|
+
|
|
50
|
+
template <
|
|
51
|
+
int NQ,
|
|
52
|
+
int BB,
|
|
53
|
+
SIMDLevel KernelSL = SINGLE_SIMD_LEVEL,
|
|
54
|
+
class ResultHandler,
|
|
55
|
+
class Scaler>
|
|
56
|
+
void accumulate_fixed_blocks(
|
|
57
|
+
size_t nb,
|
|
58
|
+
int nsq,
|
|
59
|
+
const uint8_t* codes,
|
|
60
|
+
const uint8_t* LUT,
|
|
61
|
+
ResultHandler& res,
|
|
62
|
+
const Scaler& scaler,
|
|
63
|
+
size_t block_stride) {
|
|
64
|
+
constexpr int bbs = 32 * BB;
|
|
65
|
+
for_each_block<bbs>(nb, codes, block_stride, res, [&](size_t) {
|
|
66
|
+
FixedStorageHandler<NQ, 2 * BB, KernelSL> res2;
|
|
67
|
+
kernel_accumulate_block<NQ, BB, KernelSL>(
|
|
68
|
+
nsq, codes, LUT, res2, scaler);
|
|
69
|
+
res2.to_other_handler(res);
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
template <
|
|
74
|
+
SIMDLevel KernelSL = SINGLE_SIMD_LEVEL,
|
|
75
|
+
class ResultHandler,
|
|
76
|
+
class Scaler>
|
|
77
|
+
void pq4_accumulate_loop_fixed_scaler(
|
|
78
|
+
int nq,
|
|
79
|
+
size_t nb,
|
|
80
|
+
int bbs,
|
|
81
|
+
int nsq,
|
|
82
|
+
const uint8_t* codes,
|
|
83
|
+
const uint8_t* LUT,
|
|
84
|
+
ResultHandler& res,
|
|
85
|
+
const Scaler& scaler,
|
|
86
|
+
size_t block_stride) {
|
|
87
|
+
FAISS_THROW_IF_NOT(is_aligned_pointer(codes));
|
|
88
|
+
FAISS_THROW_IF_NOT(is_aligned_pointer(LUT));
|
|
89
|
+
FAISS_THROW_IF_NOT(bbs % 32 == 0);
|
|
90
|
+
FAISS_THROW_IF_NOT(nb % bbs == 0);
|
|
91
|
+
|
|
92
|
+
#define FAISS_ACCLOOP_DISPATCH(NQ, BB) \
|
|
93
|
+
case NQ * 1000 + BB: \
|
|
94
|
+
accumulate_fixed_blocks<NQ, BB, KernelSL>( \
|
|
95
|
+
nb, nsq, codes, LUT, res, scaler, block_stride); \
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
switch (nq * 1000 + bbs / 32) {
|
|
99
|
+
FAISS_ACCLOOP_DISPATCH(1, 1);
|
|
100
|
+
FAISS_ACCLOOP_DISPATCH(1, 2);
|
|
101
|
+
FAISS_ACCLOOP_DISPATCH(1, 3);
|
|
102
|
+
FAISS_ACCLOOP_DISPATCH(1, 4);
|
|
103
|
+
FAISS_ACCLOOP_DISPATCH(1, 5);
|
|
104
|
+
FAISS_ACCLOOP_DISPATCH(2, 1);
|
|
105
|
+
FAISS_ACCLOOP_DISPATCH(2, 2);
|
|
106
|
+
FAISS_ACCLOOP_DISPATCH(3, 1);
|
|
107
|
+
FAISS_ACCLOOP_DISPATCH(4, 1);
|
|
108
|
+
default:
|
|
109
|
+
FAISS_THROW_FMT("nq=%d bbs=%d not instantiated", nq, bbs);
|
|
110
|
+
}
|
|
111
|
+
#undef FAISS_ACCLOOP_DISPATCH
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/***************************************************************
|
|
115
|
+
* QBS path helpers (bbs == 32, 256-bit kernel only)
|
|
116
|
+
***************************************************************/
|
|
117
|
+
|
|
118
|
+
template <
|
|
119
|
+
int QBS,
|
|
120
|
+
SIMDLevel KernelSL = SINGLE_SIMD_LEVEL,
|
|
121
|
+
class ResultHandler,
|
|
122
|
+
class Scaler>
|
|
123
|
+
void accumulate_q_4step_256(
|
|
124
|
+
size_t ntotal2,
|
|
125
|
+
int nsq,
|
|
126
|
+
const uint8_t* codes,
|
|
127
|
+
const uint8_t* LUT0,
|
|
128
|
+
ResultHandler& res,
|
|
129
|
+
const Scaler& scaler,
|
|
130
|
+
size_t block_stride) {
|
|
131
|
+
constexpr int Q1 = QBS & 15;
|
|
132
|
+
constexpr int Q2 = (QBS >> 4) & 15;
|
|
133
|
+
constexpr int Q3 = (QBS >> 8) & 15;
|
|
134
|
+
constexpr int Q4 = (QBS >> 12) & 15;
|
|
135
|
+
constexpr int SQ = Q1 + Q2 + Q3 + Q4;
|
|
136
|
+
|
|
137
|
+
for_each_block<32>(ntotal2, codes, block_stride, res, [&](size_t) {
|
|
138
|
+
FixedStorageHandler<SQ, 2, KernelSL> res2;
|
|
139
|
+
const uint8_t* LUT = LUT0;
|
|
140
|
+
pq4_kernel_qbs_256<Q1, KernelSL>(nsq, codes, LUT, res2, scaler);
|
|
141
|
+
LUT += Q1 * nsq * 16;
|
|
142
|
+
if (Q2 > 0) {
|
|
143
|
+
res2.set_block_origin(Q1, 0);
|
|
144
|
+
pq4_kernel_qbs_256<Q2, KernelSL>(nsq, codes, LUT, res2, scaler);
|
|
145
|
+
LUT += Q2 * nsq * 16;
|
|
146
|
+
}
|
|
147
|
+
if (Q3 > 0) {
|
|
148
|
+
res2.set_block_origin(Q1 + Q2, 0);
|
|
149
|
+
pq4_kernel_qbs_256<Q3, KernelSL>(nsq, codes, LUT, res2, scaler);
|
|
150
|
+
LUT += Q3 * nsq * 16;
|
|
151
|
+
}
|
|
152
|
+
if (Q4 > 0) {
|
|
153
|
+
res2.set_block_origin(Q1 + Q2 + Q3, 0);
|
|
154
|
+
pq4_kernel_qbs_256<Q4, KernelSL>(nsq, codes, LUT, res2, scaler);
|
|
155
|
+
}
|
|
156
|
+
res2.to_other_handler(res);
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
template <
|
|
161
|
+
SIMDLevel KernelSL = SINGLE_SIMD_LEVEL,
|
|
162
|
+
class ResultHandler,
|
|
163
|
+
class Scaler>
|
|
164
|
+
void pq4_accumulate_loop_qbs_fixed_scaler_256(
|
|
165
|
+
int qbs,
|
|
166
|
+
size_t ntotal2,
|
|
167
|
+
int nsq,
|
|
168
|
+
const uint8_t* codes,
|
|
169
|
+
const uint8_t* LUT0,
|
|
170
|
+
ResultHandler& res,
|
|
171
|
+
const Scaler& scaler,
|
|
172
|
+
size_t block_stride) {
|
|
173
|
+
assert(nsq % 2 == 0);
|
|
174
|
+
assert(is_aligned_pointer(codes));
|
|
175
|
+
assert(is_aligned_pointer(LUT0));
|
|
176
|
+
|
|
177
|
+
switch (qbs) {
|
|
178
|
+
#define FAISS_QBS256_DISPATCH(QBS) \
|
|
179
|
+
case QBS: \
|
|
180
|
+
accumulate_q_4step_256<QBS, KernelSL>( \
|
|
181
|
+
ntotal2, nsq, codes, LUT0, res, scaler, block_stride); \
|
|
182
|
+
return;
|
|
183
|
+
FAISS_QBS256_DISPATCH(0x3333); // 12
|
|
184
|
+
FAISS_QBS256_DISPATCH(0x2333); // 11
|
|
185
|
+
FAISS_QBS256_DISPATCH(0x2233); // 10
|
|
186
|
+
FAISS_QBS256_DISPATCH(0x333); // 9
|
|
187
|
+
FAISS_QBS256_DISPATCH(0x2223); // 9
|
|
188
|
+
FAISS_QBS256_DISPATCH(0x233); // 8
|
|
189
|
+
FAISS_QBS256_DISPATCH(0x1223); // 8
|
|
190
|
+
FAISS_QBS256_DISPATCH(0x223); // 7
|
|
191
|
+
FAISS_QBS256_DISPATCH(0x34); // 7
|
|
192
|
+
FAISS_QBS256_DISPATCH(0x133); // 7
|
|
193
|
+
FAISS_QBS256_DISPATCH(0x6); // 6
|
|
194
|
+
FAISS_QBS256_DISPATCH(0x33); // 6
|
|
195
|
+
FAISS_QBS256_DISPATCH(0x123); // 6
|
|
196
|
+
FAISS_QBS256_DISPATCH(0x222); // 6
|
|
197
|
+
FAISS_QBS256_DISPATCH(0x23); // 5
|
|
198
|
+
FAISS_QBS256_DISPATCH(0x5); // 5
|
|
199
|
+
FAISS_QBS256_DISPATCH(0x13); // 4
|
|
200
|
+
FAISS_QBS256_DISPATCH(0x22); // 4
|
|
201
|
+
FAISS_QBS256_DISPATCH(0x4); // 4
|
|
202
|
+
FAISS_QBS256_DISPATCH(0x3); // 3
|
|
203
|
+
FAISS_QBS256_DISPATCH(0x21); // 3
|
|
204
|
+
FAISS_QBS256_DISPATCH(0x2); // 2
|
|
205
|
+
FAISS_QBS256_DISPATCH(0x1); // 1
|
|
206
|
+
#undef FAISS_QBS256_DISPATCH
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Default: qbs not known at compile time
|
|
210
|
+
for_each_block<32>(ntotal2, codes, block_stride, res, [&](size_t j0) {
|
|
211
|
+
const uint8_t* LUT = LUT0;
|
|
212
|
+
int qi = qbs;
|
|
213
|
+
int i0 = 0;
|
|
214
|
+
while (qi) {
|
|
215
|
+
int nq = qi & 15;
|
|
216
|
+
qi >>= 4;
|
|
217
|
+
res.set_block_origin(i0, j0);
|
|
218
|
+
#define FAISS_NQ256_DISPATCH(NQ) \
|
|
219
|
+
case NQ: \
|
|
220
|
+
pq4_kernel_qbs_256<NQ, KernelSL>(nsq, codes, LUT, res, scaler); \
|
|
221
|
+
break
|
|
222
|
+
switch (nq) {
|
|
223
|
+
FAISS_NQ256_DISPATCH(1);
|
|
224
|
+
FAISS_NQ256_DISPATCH(2);
|
|
225
|
+
FAISS_NQ256_DISPATCH(3);
|
|
226
|
+
FAISS_NQ256_DISPATCH(4);
|
|
227
|
+
#undef FAISS_NQ256_DISPATCH
|
|
228
|
+
default:
|
|
229
|
+
FAISS_THROW_FMT("accumulate nq=%d not instantiated", nq);
|
|
230
|
+
}
|
|
231
|
+
i0 += nq;
|
|
232
|
+
LUT += nq * nsq * 16;
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
} // namespace faiss
|