faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -13,37 +13,34 @@
|
|
|
13
13
|
|
|
14
14
|
#include <faiss/impl/CodePacker.h>
|
|
15
15
|
#include <faiss/impl/FaissAssert.h>
|
|
16
|
-
#include <faiss/impl/FastScanDistancePostProcessing.h>
|
|
17
16
|
#include <faiss/impl/IDSelector.h>
|
|
18
|
-
#include <faiss/impl/LookupTableScaler.h>
|
|
19
17
|
#include <faiss/impl/RaBitQUtils.h>
|
|
20
|
-
#include <faiss/impl/
|
|
21
|
-
#include <faiss/impl/
|
|
18
|
+
#include <faiss/impl/fast_scan/FastScanDistancePostProcessing.h>
|
|
19
|
+
#include <faiss/impl/fast_scan/fast_scan.h>
|
|
20
|
+
#include <faiss/impl/fast_scan/simd_result_handlers.h>
|
|
22
21
|
#include <faiss/utils/hamming.h>
|
|
23
22
|
#include <faiss/utils/quantize_lut.h>
|
|
24
23
|
#include <faiss/utils/utils.h>
|
|
25
24
|
|
|
26
25
|
namespace faiss {
|
|
27
26
|
|
|
28
|
-
using namespace simd_result_handlers;
|
|
29
|
-
|
|
30
27
|
inline size_t roundup(size_t a, size_t b) {
|
|
31
28
|
return (a + b - 1) / b * b;
|
|
32
29
|
}
|
|
33
30
|
|
|
34
31
|
void IndexFastScan::init_fastscan(
|
|
35
|
-
int
|
|
32
|
+
int d_,
|
|
36
33
|
size_t M_init,
|
|
37
34
|
size_t nbits_init,
|
|
38
35
|
MetricType metric,
|
|
39
|
-
int
|
|
36
|
+
int bbs_) {
|
|
40
37
|
FAISS_THROW_IF_NOT(nbits_init == 4);
|
|
41
|
-
FAISS_THROW_IF_NOT(
|
|
42
|
-
this->d =
|
|
38
|
+
FAISS_THROW_IF_NOT(bbs_ % 32 == 0);
|
|
39
|
+
this->d = d_;
|
|
43
40
|
this->M = M_init;
|
|
44
41
|
this->nbits = nbits_init;
|
|
45
42
|
this->metric_type = metric;
|
|
46
|
-
this->bbs =
|
|
43
|
+
this->bbs = bbs_;
|
|
47
44
|
ksub = (1 << nbits_init);
|
|
48
45
|
|
|
49
46
|
code_size = (M_init * nbits_init + 7) / 8;
|
|
@@ -152,7 +149,7 @@ void IndexFastScan::check_compatible_for_merge(const Index& otherIndex) const {
|
|
|
152
149
|
"can only merge indexes of the same type");
|
|
153
150
|
}
|
|
154
151
|
|
|
155
|
-
void IndexFastScan::merge_from(Index& otherIndex, idx_t add_id) {
|
|
152
|
+
void IndexFastScan::merge_from(Index& otherIndex, idx_t /*add_id*/) {
|
|
156
153
|
check_compatible_for_merge(otherIndex);
|
|
157
154
|
IndexFastScan* other = static_cast<IndexFastScan*>(&otherIndex);
|
|
158
155
|
ntotal2 = roundup(ntotal + other->ntotal, bbs);
|
|
@@ -161,7 +158,7 @@ void IndexFastScan::merge_from(Index& otherIndex, idx_t add_id) {
|
|
|
161
158
|
std::unique_ptr<CodePacker> packer(get_CodePacker());
|
|
162
159
|
std::unique_ptr<CodePacker> other_packer(other->get_CodePacker());
|
|
163
160
|
|
|
164
|
-
for (
|
|
161
|
+
for (idx_t i = 0; i < other->ntotal; i++) {
|
|
165
162
|
other_packer->unpack_1(other->codes.data(), i, buffer.data());
|
|
166
163
|
packer->pack_1(buffer.data(), ntotal + i, codes.data());
|
|
167
164
|
}
|
|
@@ -187,18 +184,18 @@ void estimators_from_tables_generic(
|
|
|
187
184
|
BitstringReader bsr(codes + j * index.code_size, index.code_size);
|
|
188
185
|
accu_t dis = 0;
|
|
189
186
|
const dis_t* dt = dis_table;
|
|
190
|
-
int nscale = context.
|
|
187
|
+
int nscale = context.pq2x4_scale ? 2 : 0;
|
|
191
188
|
|
|
192
189
|
for (size_t m = 0; m < index.M - nscale; m++) {
|
|
193
|
-
uint64_t c = bsr.read(index.nbits);
|
|
190
|
+
uint64_t c = bsr.read(static_cast<int>(index.nbits));
|
|
194
191
|
dis += dt[c];
|
|
195
192
|
dt += index.ksub;
|
|
196
193
|
}
|
|
197
194
|
|
|
198
|
-
if (nscale
|
|
195
|
+
if (nscale) {
|
|
199
196
|
for (size_t m = 0; m < nscale; m++) {
|
|
200
|
-
uint64_t c = bsr.read(index.nbits);
|
|
201
|
-
dis +=
|
|
197
|
+
uint64_t c = bsr.read(static_cast<int>(index.nbits));
|
|
198
|
+
dis += dt[c] * context.pq2x4_scale;
|
|
202
199
|
dt += index.ksub;
|
|
203
200
|
}
|
|
204
201
|
}
|
|
@@ -212,43 +209,18 @@ void estimators_from_tables_generic(
|
|
|
212
209
|
|
|
213
210
|
} // anonymous namespace
|
|
214
211
|
|
|
215
|
-
|
|
216
|
-
SIMDResultHandlerToFloat* IndexFastScan::make_knn_handler(
|
|
212
|
+
std::unique_ptr<FastScanCodeScanner> IndexFastScan::make_knn_scanner(
|
|
217
213
|
bool is_max,
|
|
218
|
-
int impl,
|
|
219
214
|
idx_t n,
|
|
220
215
|
idx_t k,
|
|
221
|
-
size_t
|
|
216
|
+
size_t ntotal_,
|
|
222
217
|
float* distances,
|
|
223
218
|
idx_t* labels,
|
|
224
219
|
const IDSelector* sel,
|
|
220
|
+
int impl,
|
|
225
221
|
const FastScanDistancePostProcessing&) const {
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
using HeapHC = HeapHandler<CMax<uint16_t, int>, false>;
|
|
229
|
-
using ReservoirHC = ReservoirHandler<CMax<uint16_t, int>, false>;
|
|
230
|
-
using SingleResultHC = SingleResultHandler<CMax<uint16_t, int>, false>;
|
|
231
|
-
|
|
232
|
-
if (k == 1) {
|
|
233
|
-
return new SingleResultHC(n, ntotal, distances, labels, sel);
|
|
234
|
-
} else if (impl % 2 == 0) {
|
|
235
|
-
return new HeapHC(n, ntotal, k, distances, labels, sel);
|
|
236
|
-
} else {
|
|
237
|
-
return new ReservoirHC(n, ntotal, k, 2 * k, distances, labels, sel);
|
|
238
|
-
}
|
|
239
|
-
} else {
|
|
240
|
-
using HeapHC = HeapHandler<CMin<uint16_t, int>, false>;
|
|
241
|
-
using ReservoirHC = ReservoirHandler<CMin<uint16_t, int>, false>;
|
|
242
|
-
using SingleResultHC = SingleResultHandler<CMin<uint16_t, int>, false>;
|
|
243
|
-
|
|
244
|
-
if (k == 1) {
|
|
245
|
-
return new SingleResultHC(n, ntotal, distances, labels, sel);
|
|
246
|
-
} else if (impl % 2 == 0) {
|
|
247
|
-
return new HeapHC(n, ntotal, k, distances, labels, sel);
|
|
248
|
-
} else {
|
|
249
|
-
return new ReservoirHC(n, ntotal, k, 2 * k, distances, labels, sel);
|
|
250
|
-
}
|
|
251
|
-
}
|
|
222
|
+
return make_fast_scan_knn_scanner(
|
|
223
|
+
is_max, impl, n, ntotal_, k, distances, labels, sel);
|
|
252
224
|
}
|
|
253
225
|
|
|
254
226
|
using namespace quantize_lut;
|
|
@@ -263,7 +235,7 @@ void IndexFastScan::compute_quantized_LUT(
|
|
|
263
235
|
std::unique_ptr<float[]> dis_tables(new float[n * dim12]);
|
|
264
236
|
compute_float_LUT(dis_tables.get(), n, x, context);
|
|
265
237
|
|
|
266
|
-
for (
|
|
238
|
+
for (idx_t i = 0; i < n; i++) {
|
|
267
239
|
round_uint8_per_column(
|
|
268
240
|
dis_tables.get() + i * dim12,
|
|
269
241
|
M,
|
|
@@ -272,11 +244,11 @@ void IndexFastScan::compute_quantized_LUT(
|
|
|
272
244
|
&normalizers[2 * i + 1]);
|
|
273
245
|
}
|
|
274
246
|
|
|
275
|
-
for (
|
|
247
|
+
for (idx_t i = 0; i < n; i++) {
|
|
276
248
|
const float* t_in = dis_tables.get() + i * dim12;
|
|
277
249
|
uint8_t* t_out = lut + i * M2 * ksub;
|
|
278
250
|
|
|
279
|
-
for (
|
|
251
|
+
for (size_t j = 0; j < dim12; j++) {
|
|
280
252
|
t_out[j] = int(t_in[j]);
|
|
281
253
|
}
|
|
282
254
|
memset(t_out + dim12, 0, (M2 - M) * ksub);
|
|
@@ -420,7 +392,7 @@ void IndexFastScan::search_implem_234(
|
|
|
420
392
|
if (implem == 2) {
|
|
421
393
|
// default float
|
|
422
394
|
} else if (implem == 3 || implem == 4) {
|
|
423
|
-
for (
|
|
395
|
+
for (idx_t i = 0; i < n; i++) {
|
|
424
396
|
round_uint8_per_column(
|
|
425
397
|
dis_tables.get() + i * dim12,
|
|
426
398
|
M,
|
|
@@ -453,7 +425,7 @@ void IndexFastScan::search_implem_234(
|
|
|
453
425
|
float a = normalizers[2 * i];
|
|
454
426
|
float b = normalizers[2 * i + 1];
|
|
455
427
|
|
|
456
|
-
for (
|
|
428
|
+
for (idx_t j = 0; j < k; j++) {
|
|
457
429
|
heap_dis[j] = heap_dis[j] / a + b;
|
|
458
430
|
}
|
|
459
431
|
}
|
|
@@ -469,7 +441,6 @@ void IndexFastScan::search_implem_12(
|
|
|
469
441
|
idx_t* labels,
|
|
470
442
|
int impl,
|
|
471
443
|
const FastScanDistancePostProcessing& context) const {
|
|
472
|
-
using RH = ResultHandlerCompare<C, false>;
|
|
473
444
|
FAISS_THROW_IF_NOT(bbs == 32);
|
|
474
445
|
|
|
475
446
|
// handle qbs2 blocking by recursive call
|
|
@@ -510,46 +481,37 @@ void IndexFastScan::search_implem_12(
|
|
|
510
481
|
// block sizes are encoded in qbs, 4 bits at a time
|
|
511
482
|
|
|
512
483
|
// caution: we override an object field
|
|
513
|
-
int
|
|
484
|
+
int qbs_ = this->qbs;
|
|
514
485
|
|
|
515
|
-
if (n != pq4_qbs_to_nq(
|
|
516
|
-
|
|
486
|
+
if (n != pq4_qbs_to_nq(qbs_)) {
|
|
487
|
+
qbs_ = pq4_preferred_qbs(static_cast<int>(n));
|
|
517
488
|
}
|
|
518
489
|
|
|
519
|
-
int LUT_nq =
|
|
520
|
-
|
|
490
|
+
int LUT_nq = pq4_pack_LUT_qbs(
|
|
491
|
+
qbs_, static_cast<int>(M2), quantized_dis_tables.get(), LUT.get());
|
|
521
492
|
FAISS_THROW_IF_NOT(LUT_nq == n);
|
|
522
493
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
494
|
+
auto scanner = make_knn_scanner(
|
|
495
|
+
C::is_max, n, k, ntotal, distances, labels, nullptr, impl, context);
|
|
496
|
+
auto* rh = scanner->handler();
|
|
497
|
+
rh->normalizers = normalizers.get();
|
|
498
|
+
// Note: skip & 2 previously set handler->disable (run kernel,
|
|
499
|
+
// discard results). Through the scanner path, skip & 2 now skips
|
|
500
|
+
// the kernel entirely (same as skip & 4), since disable is not
|
|
501
|
+
// accessible through the SIMDResultHandlerToFloat* interface.
|
|
502
|
+
if (!(skip & (2 | 4))) {
|
|
503
|
+
scanner->accumulate_loop_qbs(
|
|
504
|
+
qbs_,
|
|
534
505
|
|
|
535
|
-
handler->disable = bool(skip & 2);
|
|
536
|
-
handler->normalizers = normalizers.get();
|
|
537
|
-
|
|
538
|
-
if (skip & 4) {
|
|
539
|
-
// pass
|
|
540
|
-
} else {
|
|
541
|
-
pq4_accumulate_loop_qbs(
|
|
542
|
-
qbs,
|
|
543
506
|
ntotal2,
|
|
544
|
-
M2,
|
|
507
|
+
static_cast<int>(M2),
|
|
545
508
|
codes.get(),
|
|
546
509
|
LUT.get(),
|
|
547
|
-
|
|
548
|
-
context.norm_scaler,
|
|
510
|
+
context.pq2x4_scale,
|
|
549
511
|
get_block_stride());
|
|
550
512
|
}
|
|
551
513
|
if (!(skip & 8)) {
|
|
552
|
-
|
|
514
|
+
rh->end();
|
|
553
515
|
}
|
|
554
516
|
}
|
|
555
517
|
|
|
@@ -564,10 +526,16 @@ void IndexFastScan::search_implem_14(
|
|
|
564
526
|
idx_t* labels,
|
|
565
527
|
int impl,
|
|
566
528
|
const FastScanDistancePostProcessing& context) const {
|
|
567
|
-
using RH = ResultHandlerCompare<C, false>;
|
|
568
529
|
FAISS_THROW_IF_NOT(bbs % 32 == 0);
|
|
569
530
|
|
|
570
|
-
|
|
531
|
+
// The accumulate loop dispatch table only instantiates certain
|
|
532
|
+
// (nq, BB) pairs where BB = bbs/32. Cap the query batch size to
|
|
533
|
+
// the maximum nq instantiated for the current BB so the caller
|
|
534
|
+
// doesn't have to know about internal template constraints.
|
|
535
|
+
// BB=1 → nq up to 4, BB=2 → nq up to 2, BB>=3 → nq=1
|
|
536
|
+
int BB = bbs / 32;
|
|
537
|
+
int max_qbs = BB <= 1 ? 4 : BB == 2 ? 2 : 1;
|
|
538
|
+
int qbs2 = std::min(qbs == 0 ? 4 : qbs, max_qbs);
|
|
571
539
|
|
|
572
540
|
// handle qbs2 blocking by recursive call
|
|
573
541
|
if (n > qbs2) {
|
|
@@ -602,38 +570,33 @@ void IndexFastScan::search_implem_14(
|
|
|
602
570
|
}
|
|
603
571
|
|
|
604
572
|
AlignedTable<uint8_t> LUT(n * dim12);
|
|
605
|
-
pq4_pack_LUT(
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
// pass
|
|
623
|
-
} else {
|
|
624
|
-
pq4_accumulate_loop(
|
|
625
|
-
n,
|
|
573
|
+
pq4_pack_LUT(
|
|
574
|
+
static_cast<int>(n),
|
|
575
|
+
static_cast<int>(M2),
|
|
576
|
+
quantized_dis_tables.get(),
|
|
577
|
+
LUT.get());
|
|
578
|
+
|
|
579
|
+
auto scanner = make_knn_scanner(
|
|
580
|
+
C::is_max, n, k, ntotal, distances, labels, nullptr, impl, context);
|
|
581
|
+
auto* rh = scanner->handler();
|
|
582
|
+
rh->normalizers = normalizers.get();
|
|
583
|
+
// Note: skip & 2 previously set handler->disable (run kernel,
|
|
584
|
+
// discard results). Through the scanner path, skip & 2 now skips
|
|
585
|
+
// the kernel entirely (same as skip & 4), since disable is not
|
|
586
|
+
// accessible through the SIMDResultHandlerToFloat* interface.
|
|
587
|
+
if (!(skip & (2 | 4))) {
|
|
588
|
+
scanner->accumulate_loop(
|
|
589
|
+
static_cast<int>(n),
|
|
626
590
|
ntotal2,
|
|
627
591
|
bbs,
|
|
628
|
-
M2,
|
|
592
|
+
static_cast<int>(M2),
|
|
629
593
|
codes.get(),
|
|
630
594
|
LUT.get(),
|
|
631
|
-
|
|
632
|
-
context.norm_scaler,
|
|
595
|
+
context.pq2x4_scale,
|
|
633
596
|
get_block_stride());
|
|
634
597
|
}
|
|
635
598
|
if (!(skip & 8)) {
|
|
636
|
-
|
|
599
|
+
rh->end();
|
|
637
600
|
}
|
|
638
601
|
}
|
|
639
602
|
|
|
@@ -7,16 +7,17 @@
|
|
|
7
7
|
|
|
8
8
|
#pragma once
|
|
9
9
|
|
|
10
|
+
#include <memory>
|
|
11
|
+
|
|
10
12
|
#include <faiss/Index.h>
|
|
11
|
-
#include <faiss/impl/FastScanDistancePostProcessing.h>
|
|
13
|
+
#include <faiss/impl/fast_scan/FastScanDistancePostProcessing.h>
|
|
14
|
+
#include <faiss/impl/fast_scan/fast_scan.h>
|
|
12
15
|
#include <faiss/utils/AlignedTable.h>
|
|
13
16
|
|
|
14
17
|
namespace faiss {
|
|
15
18
|
|
|
16
19
|
struct CodePacker;
|
|
17
|
-
struct NormTableScaler;
|
|
18
20
|
struct IDSelector;
|
|
19
|
-
struct SIMDResultHandlerToFloat;
|
|
20
21
|
|
|
21
22
|
/** Fast scan version of IndexPQ and IndexAQ. Works for 4-bit PQ and AQ for now.
|
|
22
23
|
*
|
|
@@ -43,8 +44,8 @@ struct IndexFastScan : Index {
|
|
|
43
44
|
|
|
44
45
|
// vector quantizer
|
|
45
46
|
size_t M;
|
|
46
|
-
size_t nbits;
|
|
47
|
-
size_t ksub;
|
|
47
|
+
size_t nbits = 0;
|
|
48
|
+
size_t ksub = 0;
|
|
48
49
|
size_t code_size;
|
|
49
50
|
|
|
50
51
|
// packed version of the codes
|
|
@@ -122,33 +123,22 @@ struct IndexFastScan : Index {
|
|
|
122
123
|
const float* x,
|
|
123
124
|
const FastScanDistancePostProcessing& context) const = 0;
|
|
124
125
|
|
|
125
|
-
/** Create a
|
|
126
|
-
*
|
|
127
|
-
* This method can be overridden by derived classes to provide
|
|
128
|
-
* specialized handlers (e.g., RaBitQHeapHandler for RaBitQ indexes).
|
|
129
|
-
* Base implementation creates standard handlers based on k and impl.
|
|
126
|
+
/** Create a SIMD-dispatched scanner for knn search.
|
|
130
127
|
*
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
134
|
-
* @param k number of neighbors to find
|
|
135
|
-
* @param ntotal total number of vectors in database
|
|
136
|
-
* @param distances output distances array
|
|
137
|
-
* @param labels output labels array
|
|
138
|
-
* @param sel optional ID selector
|
|
139
|
-
* @param context processing context for distance post-processing
|
|
140
|
-
* @return pointer to created handler (never returns nullptr)
|
|
128
|
+
* Returns a FastScanCodeScanner that bundles handler + accumulation
|
|
129
|
+
* kernel behind the SIMD dispatch boundary.
|
|
130
|
+
* The scanner's accumulate methods dispatch to the optimal SIMD level.
|
|
141
131
|
*/
|
|
142
|
-
virtual
|
|
132
|
+
virtual std::unique_ptr<FastScanCodeScanner> make_knn_scanner(
|
|
143
133
|
bool is_max,
|
|
144
|
-
int impl,
|
|
145
134
|
idx_t n,
|
|
146
135
|
idx_t k,
|
|
147
136
|
size_t ntotal,
|
|
148
137
|
float* distances,
|
|
149
138
|
idx_t* labels,
|
|
150
139
|
const IDSelector* sel,
|
|
151
|
-
|
|
140
|
+
int impl = 0,
|
|
141
|
+
const FastScanDistancePostProcessing& context = {}) const;
|
|
152
142
|
|
|
153
143
|
// called by search function
|
|
154
144
|
void compute_quantized_LUT(
|
|
@@ -246,6 +236,18 @@ struct IndexFastScan : Index {
|
|
|
246
236
|
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override {
|
|
247
237
|
compute_codes(bytes, n, x);
|
|
248
238
|
}
|
|
239
|
+
|
|
240
|
+
/** Get the size of the code portion packed by pq4_pack_codes.
|
|
241
|
+
*
|
|
242
|
+
* Returns the number of bytes per vector that are interleaved into
|
|
243
|
+
* SIMD blocks by pq4_pack_codes, excluding any embedded metadata
|
|
244
|
+
* (e.g., RaBitQ factors). The meaning of these bytes depends on the
|
|
245
|
+
* quantizer: for PQ/AQ they are 4-bit sub-quantizer nibbles, for
|
|
246
|
+
* RaBitQ they are 1-bit-per-dimension sign bits packed into nibbles.
|
|
247
|
+
*
|
|
248
|
+
* Must be implemented by all derived classes.
|
|
249
|
+
*/
|
|
250
|
+
virtual size_t fast_scan_code_size() const = 0;
|
|
249
251
|
};
|
|
250
252
|
|
|
251
253
|
struct FastScanStats {
|
|
@@ -23,8 +23,8 @@
|
|
|
23
23
|
|
|
24
24
|
namespace faiss {
|
|
25
25
|
|
|
26
|
-
IndexFlat::IndexFlat(idx_t
|
|
27
|
-
: IndexFlatCodes(sizeof(float) *
|
|
26
|
+
IndexFlat::IndexFlat(idx_t d_, MetricType metric)
|
|
27
|
+
: IndexFlatCodes(sizeof(float) * d_, d_, metric) {}
|
|
28
28
|
|
|
29
29
|
void IndexFlat::search(
|
|
30
30
|
idx_t n,
|
|
@@ -65,6 +65,7 @@ void IndexFlat::range_search(
|
|
|
65
65
|
float radius,
|
|
66
66
|
RangeSearchResult* result,
|
|
67
67
|
const SearchParameters* params) const {
|
|
68
|
+
FAISS_THROW_IF_NOT_MSG(result, "RangeSearchResult object must not be null");
|
|
68
69
|
IDSelector* sel = params ? params->sel : nullptr;
|
|
69
70
|
|
|
70
71
|
switch (metric_type) {
|
|
@@ -86,6 +87,7 @@ void IndexFlat::compute_distance_subset(
|
|
|
86
87
|
idx_t k,
|
|
87
88
|
float* distances,
|
|
88
89
|
const idx_t* labels) const {
|
|
90
|
+
FAISS_THROW_IF_NOT(k > 0);
|
|
89
91
|
switch (metric_type) {
|
|
90
92
|
case METRIC_INNER_PRODUCT:
|
|
91
93
|
fvec_inner_products_by_idx(distances, x, get_xb(), labels, d, n, k);
|
|
@@ -126,11 +128,11 @@ struct FlatL2Dis : FlatCodesDistanceComputer {
|
|
|
126
128
|
return fvec_L2sqr<SL>(b + j * d, b + i * d, d);
|
|
127
129
|
}
|
|
128
130
|
|
|
129
|
-
explicit FlatL2Dis(const IndexFlat& storage, const float*
|
|
131
|
+
explicit FlatL2Dis(const IndexFlat& storage, const float* q_ = nullptr)
|
|
130
132
|
: FlatCodesDistanceComputer(
|
|
131
133
|
storage.codes.data(),
|
|
132
134
|
storage.code_size,
|
|
133
|
-
|
|
135
|
+
q_),
|
|
134
136
|
d(storage.d),
|
|
135
137
|
nb(storage.ntotal),
|
|
136
138
|
b(storage.get_xb()),
|
|
@@ -236,13 +238,13 @@ struct FlatIPDis : FlatCodesDistanceComputer {
|
|
|
236
238
|
return fvec_inner_product<SL>(q, (const float*)code, d);
|
|
237
239
|
}
|
|
238
240
|
|
|
239
|
-
explicit FlatIPDis(const IndexFlat& storage, const float*
|
|
241
|
+
explicit FlatIPDis(const IndexFlat& storage, const float* q_in = nullptr)
|
|
240
242
|
: FlatCodesDistanceComputer(
|
|
241
243
|
storage.codes.data(),
|
|
242
244
|
storage.code_size),
|
|
243
245
|
d(storage.d),
|
|
244
246
|
nb(storage.ntotal),
|
|
245
|
-
q(
|
|
247
|
+
q(q_in),
|
|
246
248
|
b(storage.get_xb()),
|
|
247
249
|
ndis(0) {}
|
|
248
250
|
|
|
@@ -361,13 +363,13 @@ struct FlatL2WithNormsDis : FlatCodesDistanceComputer {
|
|
|
361
363
|
|
|
362
364
|
explicit FlatL2WithNormsDis(
|
|
363
365
|
const IndexFlatL2& storage,
|
|
364
|
-
const float*
|
|
366
|
+
const float* q_in = nullptr)
|
|
365
367
|
: FlatCodesDistanceComputer(
|
|
366
368
|
storage.codes.data(),
|
|
367
369
|
storage.code_size),
|
|
368
370
|
d(storage.d),
|
|
369
371
|
nb(storage.ntotal),
|
|
370
|
-
q(
|
|
372
|
+
q(q_in),
|
|
371
373
|
b(storage.get_xb()),
|
|
372
374
|
ndis(0),
|
|
373
375
|
l2norms(storage.cached_l2norms.data()),
|
|
@@ -452,8 +454,8 @@ FlatCodesDistanceComputer* IndexFlatL2::get_FlatCodesDistanceComputer() const {
|
|
|
452
454
|
* IndexFlat1D
|
|
453
455
|
***************************************************/
|
|
454
456
|
|
|
455
|
-
IndexFlat1D::IndexFlat1D(bool
|
|
456
|
-
: IndexFlatL2(1), continuous_update(
|
|
457
|
+
IndexFlat1D::IndexFlat1D(bool continuous_update_in)
|
|
458
|
+
: IndexFlatL2(1), continuous_update(continuous_update_in) {}
|
|
457
459
|
|
|
458
460
|
/// if not continuous_update, call this between the last add and
|
|
459
461
|
/// the first search
|
|
@@ -489,7 +491,8 @@ void IndexFlat1D::search(
|
|
|
489
491
|
!params, "search params not supported for this index");
|
|
490
492
|
FAISS_THROW_IF_NOT(k > 0);
|
|
491
493
|
FAISS_THROW_IF_NOT_MSG(
|
|
492
|
-
perm.size() == ntotal,
|
|
494
|
+
perm.size() == static_cast<size_t>(ntotal),
|
|
495
|
+
"Call update_permutation before search");
|
|
493
496
|
const float* xb = get_xb();
|
|
494
497
|
|
|
495
498
|
#pragma omp parallel for if (n > 10000)
|
|
@@ -624,9 +627,11 @@ inline void flat_pano_search_core(
|
|
|
624
627
|
{
|
|
625
628
|
SingleResultHandler res(handler);
|
|
626
629
|
|
|
627
|
-
std::vector<float> query_cum_norms(index.n_levels + 1);
|
|
628
|
-
std::vector<float> exact_distances(index.batch_size);
|
|
630
|
+
std::vector<float> query_cum_norms(index.pano.n_levels + 1);
|
|
629
631
|
std::vector<uint32_t> active_indices(index.batch_size);
|
|
632
|
+
std::vector<uint8_t> active_byteset(index.batch_size);
|
|
633
|
+
std::vector<float> exact_distances(index.batch_size);
|
|
634
|
+
std::vector<float> dot_buffer(index.batch_size);
|
|
630
635
|
|
|
631
636
|
#pragma omp for
|
|
632
637
|
for (int64_t i = 0; i < n; i++) {
|
|
@@ -661,7 +666,9 @@ inline void flat_pano_search_core(
|
|
|
661
666
|
nullptr,
|
|
662
667
|
use_sel,
|
|
663
668
|
active_indices,
|
|
669
|
+
active_byteset,
|
|
664
670
|
exact_distances,
|
|
671
|
+
dot_buffer,
|
|
665
672
|
threshold,
|
|
666
673
|
local_stats);
|
|
667
674
|
});
|
|
@@ -691,7 +698,7 @@ void IndexFlatPanorama::add(idx_t n, const float* x) {
|
|
|
691
698
|
size_t num_batches = (ntotal + batch_size - 1) / batch_size;
|
|
692
699
|
|
|
693
700
|
codes.resize(num_batches * batch_size * code_size);
|
|
694
|
-
cum_sums.resize(num_batches * batch_size * (n_levels + 1));
|
|
701
|
+
cum_sums.resize(num_batches * batch_size * (pano.n_levels + 1));
|
|
695
702
|
|
|
696
703
|
const uint8_t* code = reinterpret_cast<const uint8_t*>(x);
|
|
697
704
|
pano.copy_codes_to_level_layout(codes.data(), offset, n, code);
|
|
@@ -706,7 +713,7 @@ void IndexFlatPanorama::search(
|
|
|
706
713
|
idx_t* labels,
|
|
707
714
|
const SearchParameters* params) const {
|
|
708
715
|
FAISS_THROW_IF_NOT(k > 0);
|
|
709
|
-
FAISS_THROW_IF_NOT(batch_size >= k);
|
|
716
|
+
FAISS_THROW_IF_NOT(batch_size >= static_cast<size_t>(k));
|
|
710
717
|
|
|
711
718
|
dispatch_metric_compare(metric_type, [&]<typename C>() {
|
|
712
719
|
HeapBlockResultHandler<C, false> handler(
|
|
@@ -764,7 +771,7 @@ size_t IndexFlatPanorama::remove_ids(const IDSelector& sel) {
|
|
|
764
771
|
ntotal = j;
|
|
765
772
|
size_t num_batches = (ntotal + batch_size - 1) / batch_size;
|
|
766
773
|
codes.resize(num_batches * batch_size * code_size);
|
|
767
|
-
cum_sums.resize(num_batches * batch_size * (n_levels + 1));
|
|
774
|
+
cum_sums.resize(num_batches * batch_size * (pano.n_levels + 1));
|
|
768
775
|
}
|
|
769
776
|
return nremove;
|
|
770
777
|
}
|
|
@@ -836,7 +843,7 @@ void IndexFlatPanorama::search_subset(
|
|
|
836
843
|
{
|
|
837
844
|
SingleResultHandler res(handler);
|
|
838
845
|
|
|
839
|
-
std::vector<float> query_cum_norms(n_levels + 1);
|
|
846
|
+
std::vector<float> query_cum_norms(pano.n_levels + 1);
|
|
840
847
|
|
|
841
848
|
// Panorama's optimized point-wise refinement (Algorithm 2):
|
|
842
849
|
// Batch-wise Panorama, as implemented in Panorama.h, incurs
|
|
@@ -867,14 +874,14 @@ void IndexFlatPanorama::search_subset(
|
|
|
867
874
|
|
|
868
875
|
res.begin(i);
|
|
869
876
|
|
|
870
|
-
for (
|
|
877
|
+
for (idx_t j = 0; j < k_base; j++) {
|
|
871
878
|
idx_t idx = idsi[j];
|
|
872
879
|
|
|
873
880
|
if (idx < 0) {
|
|
874
881
|
continue;
|
|
875
882
|
}
|
|
876
883
|
|
|
877
|
-
size_t cum_sum_offset = (n_levels + 1) * idx;
|
|
884
|
+
size_t cum_sum_offset = (pano.n_levels + 1) * idx;
|
|
878
885
|
float cum_sum = cum_sums[cum_sum_offset];
|
|
879
886
|
float exact_distance = 0.0f;
|
|
880
887
|
if constexpr (!is_sim) {
|
|
@@ -890,7 +897,7 @@ void IndexFlatPanorama::search_subset(
|
|
|
890
897
|
local_stats.total_dims += d;
|
|
891
898
|
|
|
892
899
|
bool pruned = false;
|
|
893
|
-
for (size_t level = 0; level < n_levels; level++) {
|
|
900
|
+
for (size_t level = 0; level < pano.n_levels; level++) {
|
|
894
901
|
local_stats.total_dims_scanned +=
|
|
895
902
|
pano.level_width_floats;
|
|
896
903
|
|
|
@@ -74,7 +74,7 @@ struct IndexFlat : IndexFlatCodes {
|
|
|
74
74
|
};
|
|
75
75
|
|
|
76
76
|
struct IndexFlatIP : IndexFlat {
|
|
77
|
-
explicit IndexFlatIP(idx_t
|
|
77
|
+
explicit IndexFlatIP(idx_t d_in) : IndexFlat(d_in, METRIC_INNER_PRODUCT) {}
|
|
78
78
|
IndexFlatIP() {}
|
|
79
79
|
};
|
|
80
80
|
|
|
@@ -88,7 +88,7 @@ struct IndexFlatL2 : IndexFlat {
|
|
|
88
88
|
/**
|
|
89
89
|
* @param d dimensionality of the input vectors
|
|
90
90
|
*/
|
|
91
|
-
explicit IndexFlatL2(idx_t
|
|
91
|
+
explicit IndexFlatL2(idx_t d_in) : IndexFlat(d_in, METRIC_L2) {}
|
|
92
92
|
IndexFlatL2() {}
|
|
93
93
|
|
|
94
94
|
// override for l2 norms cache.
|
|
@@ -113,14 +113,14 @@ struct IndexFlatPanorama : IndexFlat {
|
|
|
113
113
|
* @param batch_size batch size for Panorama storage
|
|
114
114
|
*/
|
|
115
115
|
explicit IndexFlatPanorama(
|
|
116
|
-
idx_t
|
|
116
|
+
idx_t d_in,
|
|
117
117
|
MetricType metric,
|
|
118
|
-
size_t
|
|
119
|
-
size_t
|
|
120
|
-
: IndexFlat(
|
|
121
|
-
batch_size(
|
|
122
|
-
n_levels(
|
|
123
|
-
pano(code_size,
|
|
118
|
+
size_t n_levels_in,
|
|
119
|
+
size_t batch_size_in)
|
|
120
|
+
: IndexFlat(d_in, metric),
|
|
121
|
+
batch_size(batch_size_in),
|
|
122
|
+
n_levels(n_levels_in),
|
|
123
|
+
pano(code_size, n_levels_in, batch_size_in) {
|
|
124
124
|
FAISS_THROW_IF_NOT(
|
|
125
125
|
metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
|
|
126
126
|
}
|
|
@@ -174,10 +174,10 @@ struct IndexFlatL2Panorama : IndexFlatPanorama {
|
|
|
174
174
|
* @param batch_size batch size for Panorama storage
|
|
175
175
|
*/
|
|
176
176
|
explicit IndexFlatL2Panorama(
|
|
177
|
-
idx_t
|
|
178
|
-
size_t
|
|
179
|
-
size_t
|
|
180
|
-
: IndexFlatPanorama(
|
|
177
|
+
idx_t d_in,
|
|
178
|
+
size_t n_levels_in,
|
|
179
|
+
size_t batch_size_in = 512)
|
|
180
|
+
: IndexFlatPanorama(d_in, METRIC_L2, n_levels_in, batch_size_in) {}
|
|
181
181
|
};
|
|
182
182
|
|
|
183
183
|
struct IndexFlatIPPanorama : IndexFlatPanorama {
|
|
@@ -187,11 +187,14 @@ struct IndexFlatIPPanorama : IndexFlatPanorama {
|
|
|
187
187
|
* @param batch_size batch size for Panorama storage
|
|
188
188
|
*/
|
|
189
189
|
explicit IndexFlatIPPanorama(
|
|
190
|
-
idx_t
|
|
191
|
-
size_t
|
|
192
|
-
size_t
|
|
193
|
-
: IndexFlatPanorama(
|
|
194
|
-
|
|
190
|
+
idx_t d_in,
|
|
191
|
+
size_t n_levels_in,
|
|
192
|
+
size_t batch_size_in = 512)
|
|
193
|
+
: IndexFlatPanorama(
|
|
194
|
+
d_in,
|
|
195
|
+
METRIC_INNER_PRODUCT,
|
|
196
|
+
n_levels_in,
|
|
197
|
+
batch_size_in) {}
|
|
195
198
|
};
|
|
196
199
|
|
|
197
200
|
/// optimized version for 1D "vectors".
|