faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -70,23 +70,26 @@ ResidualQuantizer::ResidualQuantizer() {
|
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
ResidualQuantizer::ResidualQuantizer(
|
|
73
|
-
size_t
|
|
74
|
-
const std::vector<size_t>&
|
|
75
|
-
Search_type_t
|
|
73
|
+
size_t d_in,
|
|
74
|
+
const std::vector<size_t>& nbits_in,
|
|
75
|
+
Search_type_t search_type_in)
|
|
76
76
|
: ResidualQuantizer() {
|
|
77
|
-
this->search_type =
|
|
78
|
-
this->d =
|
|
79
|
-
M =
|
|
80
|
-
this->nbits =
|
|
77
|
+
this->search_type = search_type_in;
|
|
78
|
+
this->d = d_in;
|
|
79
|
+
M = nbits_in.size();
|
|
80
|
+
this->nbits = nbits_in;
|
|
81
81
|
set_derived_values();
|
|
82
82
|
}
|
|
83
83
|
|
|
84
84
|
ResidualQuantizer::ResidualQuantizer(
|
|
85
|
-
size_t
|
|
86
|
-
size_t
|
|
87
|
-
size_t
|
|
88
|
-
Search_type_t
|
|
89
|
-
: ResidualQuantizer(
|
|
85
|
+
size_t d_in,
|
|
86
|
+
size_t M_in,
|
|
87
|
+
size_t nbits_in,
|
|
88
|
+
Search_type_t search_type_in)
|
|
89
|
+
: ResidualQuantizer(
|
|
90
|
+
d_in,
|
|
91
|
+
std::vector<size_t>(M_in, nbits_in),
|
|
92
|
+
search_type_in) {}
|
|
90
93
|
|
|
91
94
|
void ResidualQuantizer::initialize_from(
|
|
92
95
|
const ResidualQuantizer& other,
|
|
@@ -142,7 +145,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
|
142
145
|
double t0 = getmillisecs();
|
|
143
146
|
double clustering_time = 0;
|
|
144
147
|
|
|
145
|
-
for (
|
|
148
|
+
for (size_t m = 0; m < M; m++) {
|
|
146
149
|
int K = 1 << nbits[m];
|
|
147
150
|
|
|
148
151
|
// on which residuals to train
|
|
@@ -157,7 +160,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
|
157
160
|
}
|
|
158
161
|
train_residuals = residuals1;
|
|
159
162
|
}
|
|
160
|
-
std::vector<float>
|
|
163
|
+
std::vector<float> cur_codebooks;
|
|
161
164
|
float obj = 0;
|
|
162
165
|
|
|
163
166
|
std::unique_ptr<Index> assign_index;
|
|
@@ -175,7 +178,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
|
175
178
|
train_residuals.size() / d,
|
|
176
179
|
train_residuals.data(),
|
|
177
180
|
*assign_index.get());
|
|
178
|
-
|
|
181
|
+
cur_codebooks.swap(clus.centroids);
|
|
179
182
|
assign_index->reset();
|
|
180
183
|
obj = clus.iteration_stats.back().obj;
|
|
181
184
|
} else { // progressive dim clustering
|
|
@@ -185,14 +188,14 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
|
185
188
|
train_residuals.size() / d,
|
|
186
189
|
train_residuals.data(),
|
|
187
190
|
assign_index_factory ? *assign_index_factory : default_fac);
|
|
188
|
-
|
|
191
|
+
cur_codebooks.swap(clus.centroids);
|
|
189
192
|
obj = clus.iteration_stats.back().obj;
|
|
190
193
|
}
|
|
191
194
|
clustering_time += (getmillisecs() - t1) / 1000;
|
|
192
195
|
|
|
193
196
|
memcpy(this->codebooks.data() + codebook_offsets[m] * d,
|
|
194
|
-
|
|
195
|
-
|
|
197
|
+
cur_codebooks.data(),
|
|
198
|
+
cur_codebooks.size() * sizeof(cur_codebooks[0]));
|
|
196
199
|
|
|
197
200
|
// quantize using the new codebooks
|
|
198
201
|
|
|
@@ -221,7 +224,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
|
221
224
|
beam_search_encode_step(
|
|
222
225
|
d,
|
|
223
226
|
K,
|
|
224
|
-
|
|
227
|
+
cur_codebooks.data(),
|
|
225
228
|
i1 - i0,
|
|
226
229
|
cur_beam_size,
|
|
227
230
|
residuals.data() + i0 * cur_beam_size * d,
|
|
@@ -239,16 +242,16 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
|
239
242
|
distances.swap(new_distances);
|
|
240
243
|
|
|
241
244
|
float sum_distances = 0;
|
|
242
|
-
for (
|
|
245
|
+
for (size_t j = 0; j < distances.size(); j++) {
|
|
243
246
|
sum_distances += distances[j];
|
|
244
247
|
}
|
|
245
248
|
|
|
246
249
|
if (verbose) {
|
|
247
|
-
printf("[%.3f s, %.3f s clustering] train stage %
|
|
250
|
+
printf("[%.3f s, %.3f s clustering] train stage %zd, %d bits, kmeans objective %g, "
|
|
248
251
|
"total distance %g, beam_size %d->%d (batch size %zd)\n",
|
|
249
252
|
(getmillisecs() - t0) / 1000,
|
|
250
253
|
clustering_time,
|
|
251
|
-
m,
|
|
254
|
+
size_t(m),
|
|
252
255
|
int(nbits[m]),
|
|
253
256
|
obj,
|
|
254
257
|
sum_distances,
|
|
@@ -314,7 +317,7 @@ float ResidualQuantizer::retrain_AQ_codebook(size_t n, const float* x) {
|
|
|
314
317
|
std::vector<float> C(n * total_codebook_size);
|
|
315
318
|
for (size_t i = 0; i < n; i++) {
|
|
316
319
|
BitstringReader bsr(codes.data() + i * code_size, code_size);
|
|
317
|
-
for (
|
|
320
|
+
for (size_t m = 0; m < M; m++) {
|
|
318
321
|
int idx = bsr.read(nbits[m]);
|
|
319
322
|
C[i + (codebook_offsets[m] + idx) * n] = 1;
|
|
320
323
|
}
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
#include <faiss/impl/FaissAssert.h>
|
|
16
16
|
#include <faiss/impl/FaissException.h>
|
|
17
17
|
#include <faiss/impl/IDSelector.h>
|
|
18
|
+
#include <faiss/impl/InvertedListScannerStats.h>
|
|
18
19
|
#include <faiss/utils/Heap.h>
|
|
19
20
|
#include <faiss/utils/partitioning.h>
|
|
20
21
|
#include <algorithm>
|
|
@@ -29,6 +30,9 @@ struct ResultHandlerUnordered {
|
|
|
29
30
|
// if not better than threshold, then not necessary to call add_result
|
|
30
31
|
T threshold{};
|
|
31
32
|
|
|
33
|
+
// per-list scan statistics populated by inverted-list scanners
|
|
34
|
+
InvertedListScannerStats stats;
|
|
35
|
+
|
|
32
36
|
// return whether threshold was updated
|
|
33
37
|
virtual bool add_result(T dis, TI idx) = 0;
|
|
34
38
|
|
|
@@ -66,8 +70,8 @@ struct BlockResultHandler {
|
|
|
66
70
|
size_t nq; // number of queries for which we search
|
|
67
71
|
const IDSelector* sel;
|
|
68
72
|
|
|
69
|
-
explicit BlockResultHandler(size_t
|
|
70
|
-
: nq(
|
|
73
|
+
explicit BlockResultHandler(size_t nq_, const IDSelector* sel_ = nullptr)
|
|
74
|
+
: nq(nq_), sel(sel_) {
|
|
71
75
|
assert(!use_sel || sel);
|
|
72
76
|
}
|
|
73
77
|
|
|
@@ -108,10 +112,10 @@ struct SingleQueryBlockResultHandler : BlockResultHandler<C, use_sel> {
|
|
|
108
112
|
ResultHandler& the_handler;
|
|
109
113
|
|
|
110
114
|
explicit SingleQueryBlockResultHandler(
|
|
111
|
-
ResultHandler&
|
|
112
|
-
const IDSelector*
|
|
113
|
-
: BlockResultHandler<C, use_sel>(1,
|
|
114
|
-
the_handler(
|
|
115
|
+
ResultHandler& the_handler_in,
|
|
116
|
+
const IDSelector* sel_in = nullptr)
|
|
117
|
+
: BlockResultHandler<C, use_sel>(1, sel_in),
|
|
118
|
+
the_handler(the_handler_in) {}
|
|
115
119
|
|
|
116
120
|
struct SingleResultHandler : ResultHandlerT<C> {
|
|
117
121
|
ResultHandler& the_handler;
|
|
@@ -121,9 +125,7 @@ struct SingleQueryBlockResultHandler : BlockResultHandler<C, use_sel> {
|
|
|
121
125
|
: the_handler(hr.the_handler) {}
|
|
122
126
|
|
|
123
127
|
/// begin results for query # i
|
|
124
|
-
void begin(const size_t qid) {
|
|
125
|
-
assert(qid == 0);
|
|
126
|
-
}
|
|
128
|
+
void begin(const size_t /* qid */) {}
|
|
127
129
|
|
|
128
130
|
/// add one result for query i
|
|
129
131
|
bool add_result(T dis, TI idx) final {
|
|
@@ -149,15 +151,15 @@ struct TopkBlockResultHandler : BlockResultHandler<C, use_sel> {
|
|
|
149
151
|
int64_t k; // number of results to keep
|
|
150
152
|
|
|
151
153
|
TopkBlockResultHandler(
|
|
152
|
-
size_t
|
|
153
|
-
T*
|
|
154
|
-
TI*
|
|
155
|
-
size_t
|
|
156
|
-
const IDSelector*
|
|
157
|
-
: BlockResultHandler<C, use_sel>(
|
|
158
|
-
dis_tab(
|
|
159
|
-
ids_tab(
|
|
160
|
-
k(
|
|
154
|
+
size_t nq_,
|
|
155
|
+
T* dis_tab_,
|
|
156
|
+
TI* ids_tab_,
|
|
157
|
+
size_t k_,
|
|
158
|
+
const IDSelector* sel_ = nullptr)
|
|
159
|
+
: BlockResultHandler<C, use_sel>(nq_, sel_),
|
|
160
|
+
dis_tab(dis_tab_),
|
|
161
|
+
ids_tab(ids_tab_),
|
|
162
|
+
k(k_) {}
|
|
161
163
|
|
|
162
164
|
~TopkBlockResultHandler() {}
|
|
163
165
|
};
|
|
@@ -176,12 +178,16 @@ struct Top1BlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
176
178
|
using BlockResultHandler<C, use_sel>::i1;
|
|
177
179
|
|
|
178
180
|
Top1BlockResultHandler(
|
|
179
|
-
size_t
|
|
180
|
-
T*
|
|
181
|
-
TI*
|
|
182
|
-
const IDSelector*
|
|
183
|
-
: TopkBlockResultHandler<C, use_sel>(
|
|
184
|
-
|
|
181
|
+
size_t nq_,
|
|
182
|
+
T* dis_tab_,
|
|
183
|
+
TI* ids_tab_,
|
|
184
|
+
const IDSelector* sel_ = nullptr)
|
|
185
|
+
: TopkBlockResultHandler<C, use_sel>(
|
|
186
|
+
nq_,
|
|
187
|
+
dis_tab_,
|
|
188
|
+
ids_tab_,
|
|
189
|
+
1,
|
|
190
|
+
sel_) {}
|
|
185
191
|
|
|
186
192
|
struct SingleResultHandler : ResultHandlerT<C> {
|
|
187
193
|
Top1BlockResultHandler& hr;
|
|
@@ -190,7 +196,7 @@ struct Top1BlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
190
196
|
TI min_idx;
|
|
191
197
|
size_t current_idx = 0;
|
|
192
198
|
|
|
193
|
-
explicit SingleResultHandler(Top1BlockResultHandler&
|
|
199
|
+
explicit SingleResultHandler(Top1BlockResultHandler& hr_) : hr(hr_) {}
|
|
194
200
|
|
|
195
201
|
/// begin results for query # i
|
|
196
202
|
void begin(const size_t current_idx_2) {
|
|
@@ -217,9 +223,9 @@ struct Top1BlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
217
223
|
};
|
|
218
224
|
|
|
219
225
|
/// begin
|
|
220
|
-
void begin_multiple(size_t
|
|
221
|
-
this->i0 =
|
|
222
|
-
this->i1 =
|
|
226
|
+
void begin_multiple(size_t i0_in, size_t i1_in) final {
|
|
227
|
+
this->i0 = i0_in;
|
|
228
|
+
this->i1 = i1_in;
|
|
223
229
|
|
|
224
230
|
for (size_t i = i0; i < i1; i++) {
|
|
225
231
|
this->dis_tab[i] = C::neutral();
|
|
@@ -228,7 +234,7 @@ struct Top1BlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
228
234
|
|
|
229
235
|
/// add results for query i0..i1 and j0..j1
|
|
230
236
|
void add_results(size_t j0, size_t j1, const T* dis_tab_2) final {
|
|
231
|
-
for (
|
|
237
|
+
for (size_t i = i0; i < i1; i++) {
|
|
232
238
|
const T* dis_tab_i = dis_tab_2 + (j1 - j0) * (i - i0) - j0;
|
|
233
239
|
|
|
234
240
|
auto& min_distance = this->dis_tab[i];
|
|
@@ -270,8 +276,8 @@ struct HeapResultHandler : ResultHandlerT<C> {
|
|
|
270
276
|
T* heap_dis;
|
|
271
277
|
TI* heap_ids;
|
|
272
278
|
|
|
273
|
-
HeapResultHandler(size_t
|
|
274
|
-
: k(
|
|
279
|
+
HeapResultHandler(size_t k_, T* heap_dis_, TI* heap_ids_)
|
|
280
|
+
: k(k_), heap_dis(heap_dis_), heap_ids(heap_ids_) {
|
|
275
281
|
if (heap_dis) {
|
|
276
282
|
this->threshold = heap_dis[0];
|
|
277
283
|
}
|
|
@@ -297,13 +303,17 @@ struct HeapBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
297
303
|
using TopkBlockResultHandler<C, use_sel>::k;
|
|
298
304
|
|
|
299
305
|
HeapBlockResultHandler(
|
|
300
|
-
size_t
|
|
301
|
-
T*
|
|
302
|
-
TI*
|
|
303
|
-
size_t
|
|
304
|
-
const IDSelector*
|
|
305
|
-
: TopkBlockResultHandler<C, use_sel>(
|
|
306
|
-
|
|
306
|
+
size_t nq_,
|
|
307
|
+
T* dis_tab_,
|
|
308
|
+
TI* ids_tab_,
|
|
309
|
+
size_t k_,
|
|
310
|
+
const IDSelector* sel_ = nullptr)
|
|
311
|
+
: TopkBlockResultHandler<C, use_sel>(
|
|
312
|
+
nq_,
|
|
313
|
+
dis_tab_,
|
|
314
|
+
ids_tab_,
|
|
315
|
+
k_,
|
|
316
|
+
sel_) {}
|
|
307
317
|
|
|
308
318
|
/******************************************************
|
|
309
319
|
* API for 1 result at a time (each SingleResultHandler is
|
|
@@ -313,9 +323,9 @@ struct HeapBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
313
323
|
struct SingleResultHandler : HeapResultHandler<C, use_sel> {
|
|
314
324
|
HeapBlockResultHandler& hr;
|
|
315
325
|
|
|
316
|
-
explicit SingleResultHandler(HeapBlockResultHandler&
|
|
317
|
-
: HeapResultHandler<C, use_sel>(
|
|
318
|
-
hr(
|
|
326
|
+
explicit SingleResultHandler(HeapBlockResultHandler& hr_)
|
|
327
|
+
: HeapResultHandler<C, use_sel>(hr_.k, nullptr, nullptr),
|
|
328
|
+
hr(hr_) {}
|
|
319
329
|
|
|
320
330
|
/// begin results for query # i
|
|
321
331
|
void begin(size_t i) {
|
|
@@ -346,12 +356,14 @@ struct HeapBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
346
356
|
}
|
|
347
357
|
|
|
348
358
|
/// add results for query i0..i1 and j0..j1
|
|
349
|
-
void add_results(size_t j0, size_t j1, const T*
|
|
359
|
+
void add_results(size_t j0, size_t j1, const T* dis_in) final {
|
|
350
360
|
#pragma omp parallel for
|
|
351
|
-
for (int64_t i = i0; i < i1;
|
|
361
|
+
for (int64_t i = static_cast<int64_t>(i0); i < static_cast<int64_t>(i1);
|
|
362
|
+
i++) {
|
|
352
363
|
T* heap_dis = this->dis_tab + i * k;
|
|
353
364
|
TI* heap_ids = this->ids_tab + i * k;
|
|
354
|
-
const T* dis_tab_i =
|
|
365
|
+
const T* dis_tab_i =
|
|
366
|
+
dis_in + (j1 - j0) * (i - static_cast<int64_t>(i0)) - j0;
|
|
355
367
|
T thresh = heap_dis[0];
|
|
356
368
|
for (size_t j = j0; j < j1; j++) {
|
|
357
369
|
T dis = dis_tab_i[j];
|
|
@@ -365,8 +377,9 @@ struct HeapBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
365
377
|
|
|
366
378
|
/// series of results for queries i0..i1 is done
|
|
367
379
|
void end_multiple() final {
|
|
368
|
-
|
|
369
|
-
for (
|
|
380
|
+
#pragma omp parallel for schedule(static) if ((i1 - i0) * k >= 1024)
|
|
381
|
+
for (int64_t i = static_cast<int64_t>(i0); i < static_cast<int64_t>(i1);
|
|
382
|
+
i++) {
|
|
370
383
|
heap_reorder<C>(k, this->dis_tab + i * k, this->ids_tab + i * k);
|
|
371
384
|
}
|
|
372
385
|
}
|
|
@@ -397,8 +410,8 @@ struct ReservoirTopN : ResultHandlerT<C> {
|
|
|
397
410
|
|
|
398
411
|
ReservoirTopN() {}
|
|
399
412
|
|
|
400
|
-
ReservoirTopN(size_t
|
|
401
|
-
: vals(
|
|
413
|
+
ReservoirTopN(size_t n_, size_t capacity_, T* vals_, TI* ids_)
|
|
414
|
+
: vals(vals_), ids(ids_), i(0), n(n_), capacity(capacity_) {
|
|
402
415
|
assert(n < capacity);
|
|
403
416
|
threshold = C::neutral();
|
|
404
417
|
}
|
|
@@ -436,7 +449,7 @@ struct ReservoirTopN : ResultHandlerT<C> {
|
|
|
436
449
|
}
|
|
437
450
|
|
|
438
451
|
void to_result(T* heap_dis, TI* heap_ids) const {
|
|
439
|
-
for (
|
|
452
|
+
for (size_t j = 0; j < std::min(i, n); j++) {
|
|
440
453
|
heap_push<C>(j + 1, heap_dis, heap_ids, vals[j], ids[j]);
|
|
441
454
|
}
|
|
442
455
|
|
|
@@ -458,16 +471,22 @@ struct ReservoirBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
458
471
|
using TI = typename C::TI;
|
|
459
472
|
using BlockResultHandler<C, use_sel>::i0;
|
|
460
473
|
using BlockResultHandler<C, use_sel>::i1;
|
|
474
|
+
using TopkBlockResultHandler<C, use_sel>::k;
|
|
461
475
|
|
|
462
476
|
size_t capacity; // capacity of the reservoirs
|
|
463
477
|
|
|
464
478
|
ReservoirBlockResultHandler(
|
|
465
|
-
size_t
|
|
466
|
-
T*
|
|
467
|
-
TI*
|
|
468
|
-
size_t
|
|
469
|
-
const IDSelector*
|
|
470
|
-
: TopkBlockResultHandler<C, use_sel>(
|
|
479
|
+
size_t nq_,
|
|
480
|
+
T* dis_tab_,
|
|
481
|
+
TI* ids_tab_,
|
|
482
|
+
size_t k_,
|
|
483
|
+
const IDSelector* sel_ = nullptr)
|
|
484
|
+
: TopkBlockResultHandler<C, use_sel>(
|
|
485
|
+
nq_,
|
|
486
|
+
dis_tab_,
|
|
487
|
+
ids_tab_,
|
|
488
|
+
k_,
|
|
489
|
+
sel_) {
|
|
471
490
|
// double then round up to multiple of 16 (for SIMD alignment)
|
|
472
491
|
capacity = (2 * k + 15) & ~15;
|
|
473
492
|
}
|
|
@@ -483,9 +502,9 @@ struct ReservoirBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
483
502
|
std::vector<T> reservoir_dis;
|
|
484
503
|
std::vector<TI> reservoir_ids;
|
|
485
504
|
|
|
486
|
-
explicit SingleResultHandler(ReservoirBlockResultHandler&
|
|
487
|
-
: ReservoirTopN<C>(
|
|
488
|
-
hr(
|
|
505
|
+
explicit SingleResultHandler(ReservoirBlockResultHandler& hr_)
|
|
506
|
+
: ReservoirTopN<C>(hr_.k, hr_.capacity, nullptr, nullptr),
|
|
507
|
+
hr(hr_) {}
|
|
489
508
|
|
|
490
509
|
size_t qno;
|
|
491
510
|
|
|
@@ -533,11 +552,14 @@ struct ReservoirBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
533
552
|
}
|
|
534
553
|
|
|
535
554
|
/// add results for query i0..i1 and j0..j1
|
|
536
|
-
void add_results(size_t j0, size_t j1, const T*
|
|
555
|
+
void add_results(size_t j0, size_t j1, const T* dis_in) {
|
|
537
556
|
#pragma omp parallel for
|
|
538
|
-
for (int64_t i = i0; i < i1;
|
|
539
|
-
|
|
540
|
-
|
|
557
|
+
for (int64_t i = static_cast<int64_t>(i0); i < static_cast<int64_t>(i1);
|
|
558
|
+
i++) {
|
|
559
|
+
ReservoirTopN<C>& reservoir =
|
|
560
|
+
reservoirs[i - static_cast<int64_t>(i0)];
|
|
561
|
+
const T* dis_tab_i =
|
|
562
|
+
dis_in + (j1 - j0) * (i - static_cast<int64_t>(i0)) - j0;
|
|
541
563
|
for (size_t j = j0; j < j1; j++) {
|
|
542
564
|
T dis = dis_tab_i[j];
|
|
543
565
|
reservoir.add_result(dis, j);
|
|
@@ -547,9 +569,10 @@ struct ReservoirBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
547
569
|
|
|
548
570
|
/// series of results for queries i0..i1 is done
|
|
549
571
|
void end_multiple() final {
|
|
550
|
-
|
|
551
|
-
for (
|
|
552
|
-
|
|
572
|
+
#pragma omp parallel for schedule(static) if ((i1 - i0) * this->k >= 1024)
|
|
573
|
+
for (int64_t i = static_cast<int64_t>(i0); i < static_cast<int64_t>(i1);
|
|
574
|
+
i++) {
|
|
575
|
+
reservoirs[i - static_cast<int64_t>(i0)].to_result(
|
|
553
576
|
this->dis_tab + i * this->k, this->ids_tab + i * this->k);
|
|
554
577
|
}
|
|
555
578
|
}
|
|
@@ -567,14 +590,15 @@ struct RangeResultHandler : ResultHandlerT<C> {
|
|
|
567
590
|
|
|
568
591
|
RangeQueryResult* qr = nullptr;
|
|
569
592
|
|
|
570
|
-
RangeResultHandler(RangeQueryResult*
|
|
571
|
-
this->threshold =
|
|
593
|
+
RangeResultHandler(RangeQueryResult* qr_, T threshold_) : qr(qr_) {
|
|
594
|
+
this->threshold = threshold_;
|
|
572
595
|
}
|
|
573
596
|
|
|
574
597
|
/// add one result for query i
|
|
575
598
|
bool add_result(T dis, TI idx) final {
|
|
576
599
|
if (C::cmp(threshold, dis)) {
|
|
577
600
|
qr->add(dis, idx);
|
|
601
|
+
return true;
|
|
578
602
|
}
|
|
579
603
|
return false;
|
|
580
604
|
}
|
|
@@ -591,12 +615,12 @@ struct RangeSearchBlockResultHandler : BlockResultHandler<C, use_sel> {
|
|
|
591
615
|
T radius;
|
|
592
616
|
|
|
593
617
|
RangeSearchBlockResultHandler(
|
|
594
|
-
RangeSearchResult*
|
|
595
|
-
float
|
|
596
|
-
const IDSelector*
|
|
597
|
-
: BlockResultHandler<C, use_sel>(
|
|
598
|
-
res(
|
|
599
|
-
radius(
|
|
618
|
+
RangeSearchResult* res_,
|
|
619
|
+
float radius_,
|
|
620
|
+
const IDSelector* sel_ = nullptr)
|
|
621
|
+
: BlockResultHandler<C, use_sel>(res_->nq, sel_),
|
|
622
|
+
res(res_),
|
|
623
|
+
radius(radius_) {}
|
|
600
624
|
|
|
601
625
|
/******************************************************
|
|
602
626
|
* API for 1 result at a time (each SingleResultHandler is
|
|
@@ -656,7 +680,7 @@ struct RangeSearchBlockResultHandler : BlockResultHandler<C, use_sel> {
|
|
|
656
680
|
// it is a bit tricky to find the proper PartialResult structure
|
|
657
681
|
// because the inner loop is on db not on queries.
|
|
658
682
|
|
|
659
|
-
if (pr < j0s.size() && j0 == j0s[pr]) {
|
|
683
|
+
if (static_cast<size_t>(pr) < j0s.size() && j0 == j0s[pr]) {
|
|
660
684
|
pres = partial_results[pr];
|
|
661
685
|
pr++;
|
|
662
686
|
} else if (j0 == 0 && j0s.size() > 0) {
|
|
@@ -718,7 +742,7 @@ typename Consumer::T dispatch_knn_ResultHandler(
|
|
|
718
742
|
if (k == 1) { \
|
|
719
743
|
Top1BlockResultHandler<C, use_sel> res(nx, vals, ids, sel); \
|
|
720
744
|
return consumer.template f<>(res, args...); \
|
|
721
|
-
} else if (k < distance_compute_min_k_reservoir) {
|
|
745
|
+
} else if (k < static_cast<size_t>(distance_compute_min_k_reservoir)) { \
|
|
722
746
|
HeapBlockResultHandler<C, use_sel> res(nx, vals, ids, k, sel); \
|
|
723
747
|
return consumer.template f<>(res, args...); \
|
|
724
748
|
} else { \
|
|
@@ -29,8 +29,8 @@ namespace faiss {
|
|
|
29
29
|
* ScalarQuantizer implementation
|
|
30
30
|
********************************************************************/
|
|
31
31
|
|
|
32
|
-
ScalarQuantizer::ScalarQuantizer(size_t
|
|
33
|
-
: Quantizer(
|
|
32
|
+
ScalarQuantizer::ScalarQuantizer(size_t d_in, QuantizerType qtype_in)
|
|
33
|
+
: Quantizer(d_in), qtype(qtype_in) {
|
|
34
34
|
set_derived_sizes();
|
|
35
35
|
}
|
|
36
36
|
|
|
@@ -38,15 +38,29 @@ ScalarQuantizer::ScalarQuantizer() {}
|
|
|
38
38
|
|
|
39
39
|
void ScalarQuantizer::set_derived_sizes() {
|
|
40
40
|
switch (qtype) {
|
|
41
|
+
case QT_1bit_tqmse:
|
|
42
|
+
code_size = (d + 7) / 8;
|
|
43
|
+
bits = 1;
|
|
44
|
+
break;
|
|
45
|
+
case QT_2bit_tqmse:
|
|
46
|
+
code_size = (d * 2 + 7) / 8;
|
|
47
|
+
bits = 2;
|
|
48
|
+
break;
|
|
49
|
+
case QT_3bit_tqmse:
|
|
50
|
+
code_size = (d * 3 + 7) / 8;
|
|
51
|
+
bits = 3;
|
|
52
|
+
break;
|
|
41
53
|
case QT_8bit:
|
|
42
54
|
case QT_8bit_uniform:
|
|
43
55
|
case QT_8bit_direct:
|
|
44
56
|
case QT_8bit_direct_signed:
|
|
57
|
+
case QT_8bit_tqmse:
|
|
45
58
|
code_size = d;
|
|
46
59
|
bits = 8;
|
|
47
60
|
break;
|
|
48
61
|
case QT_4bit:
|
|
49
62
|
case QT_4bit_uniform:
|
|
63
|
+
case QT_4bit_tqmse:
|
|
50
64
|
code_size = (d + 1) / 2;
|
|
51
65
|
bits = 4;
|
|
52
66
|
break;
|
|
@@ -62,6 +76,10 @@ void ScalarQuantizer::set_derived_sizes() {
|
|
|
62
76
|
code_size = d * 2;
|
|
63
77
|
bits = 16;
|
|
64
78
|
break;
|
|
79
|
+
case QT_0bit:
|
|
80
|
+
code_size = 0;
|
|
81
|
+
bits = 0;
|
|
82
|
+
break;
|
|
65
83
|
default:
|
|
66
84
|
break;
|
|
67
85
|
}
|
|
@@ -71,6 +89,10 @@ void ScalarQuantizer::train(size_t n, const float* x) {
|
|
|
71
89
|
using scalar_quantizer::train_NonUniform;
|
|
72
90
|
using scalar_quantizer::train_Uniform;
|
|
73
91
|
|
|
92
|
+
if (qtype == QT_0bit) {
|
|
93
|
+
return; // nothing to train for centroid-only mode
|
|
94
|
+
}
|
|
95
|
+
|
|
74
96
|
int bit_per_dim = qtype == QT_4bit_uniform ? 4
|
|
75
97
|
: qtype == QT_4bit ? 4
|
|
76
98
|
: qtype == QT_6bit ? 6
|
|
@@ -81,6 +103,8 @@ void ScalarQuantizer::train(size_t n, const float* x) {
|
|
|
81
103
|
switch (qtype) {
|
|
82
104
|
case QT_4bit_uniform:
|
|
83
105
|
case QT_8bit_uniform:
|
|
106
|
+
FAISS_THROW_IF_NOT(n > 0);
|
|
107
|
+
FAISS_THROW_IF_NOT(x != nullptr);
|
|
84
108
|
train_Uniform(
|
|
85
109
|
rangestat,
|
|
86
110
|
rangestat_arg,
|
|
@@ -92,6 +116,8 @@ void ScalarQuantizer::train(size_t n, const float* x) {
|
|
|
92
116
|
case QT_4bit:
|
|
93
117
|
case QT_8bit:
|
|
94
118
|
case QT_6bit:
|
|
119
|
+
FAISS_THROW_IF_NOT(n > 0);
|
|
120
|
+
FAISS_THROW_IF_NOT(x != nullptr);
|
|
95
121
|
train_NonUniform(
|
|
96
122
|
rangestat,
|
|
97
123
|
rangestat_arg,
|
|
@@ -107,6 +133,21 @@ void ScalarQuantizer::train(size_t n, const float* x) {
|
|
|
107
133
|
case QT_8bit_direct_signed:
|
|
108
134
|
// no training necessary
|
|
109
135
|
break;
|
|
136
|
+
case QT_1bit_tqmse:
|
|
137
|
+
scalar_quantizer::train_TurboQuantMSE(d, 1, trained);
|
|
138
|
+
break;
|
|
139
|
+
case QT_2bit_tqmse:
|
|
140
|
+
scalar_quantizer::train_TurboQuantMSE(d, 2, trained);
|
|
141
|
+
break;
|
|
142
|
+
case QT_3bit_tqmse:
|
|
143
|
+
scalar_quantizer::train_TurboQuantMSE(d, 3, trained);
|
|
144
|
+
break;
|
|
145
|
+
case QT_4bit_tqmse:
|
|
146
|
+
scalar_quantizer::train_TurboQuantMSE(d, 4, trained);
|
|
147
|
+
break;
|
|
148
|
+
case QT_8bit_tqmse:
|
|
149
|
+
scalar_quantizer::train_TurboQuantMSE(d, 8, trained);
|
|
150
|
+
break;
|
|
110
151
|
default:
|
|
111
152
|
break;
|
|
112
153
|
}
|
|
@@ -128,20 +169,27 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
|
|
|
128
169
|
|
|
129
170
|
void ScalarQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
|
|
130
171
|
const {
|
|
172
|
+
if (code_size == 0) {
|
|
173
|
+
return; // QT_0bit: nothing to encode
|
|
174
|
+
}
|
|
131
175
|
std::unique_ptr<SQuantizer> squant(select_quantizer());
|
|
132
176
|
|
|
133
177
|
memset(codes, 0, code_size * n);
|
|
134
178
|
#pragma omp parallel for
|
|
135
|
-
for (int64_t i = 0; i < n; i++) {
|
|
179
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
136
180
|
squant->encode_vector(x + i * d, codes + i * code_size);
|
|
137
181
|
}
|
|
138
182
|
}
|
|
139
183
|
|
|
140
184
|
void ScalarQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
|
|
185
|
+
if (code_size == 0) {
|
|
186
|
+
memset(x, 0, sizeof(float) * d * n);
|
|
187
|
+
return; // QT_0bit: no per-vector data, zero-fill
|
|
188
|
+
}
|
|
141
189
|
std::unique_ptr<SQuantizer> squant(select_quantizer());
|
|
142
190
|
|
|
143
191
|
#pragma omp parallel for
|
|
144
|
-
for (int64_t i = 0; i < n; i++) {
|
|
192
|
+
for (int64_t i = 0; i < static_cast<int64_t>(n); i++) {
|
|
145
193
|
squant->decode_vector(codes + i * code_size, x + i * d);
|
|
146
194
|
}
|
|
147
195
|
}
|
|
@@ -33,6 +33,13 @@ struct ScalarQuantizer : Quantizer {
|
|
|
33
33
|
QT_bf16,
|
|
34
34
|
QT_8bit_direct_signed, ///< fast indexing of signed int8s ranging from
|
|
35
35
|
///< [-128 to 127]
|
|
36
|
+
QT_0bit, ///< 0 bits per component, centroid-only distance (for IVF)
|
|
37
|
+
QT_1bit_tqmse, ///< TurboQuant MSE-optimized, 1 bit per component
|
|
38
|
+
QT_2bit_tqmse, ///< TurboQuant MSE-optimized, 2 bits per component
|
|
39
|
+
QT_3bit_tqmse, ///< TurboQuant MSE-optimized, 3 bits per component
|
|
40
|
+
QT_4bit_tqmse, ///< TurboQuant MSE-optimized, 4 bits per component
|
|
41
|
+
QT_8bit_tqmse, ///< TurboQuant MSE-optimized, 8 bits per component
|
|
42
|
+
QT_count
|
|
36
43
|
};
|
|
37
44
|
|
|
38
45
|
QuantizerType qtype = QT_8bit;
|
|
@@ -58,7 +65,7 @@ struct ScalarQuantizer : Quantizer {
|
|
|
58
65
|
/// trained values (including the range)
|
|
59
66
|
std::vector<float> trained;
|
|
60
67
|
|
|
61
|
-
ScalarQuantizer(size_t
|
|
68
|
+
ScalarQuantizer(size_t d_in, QuantizerType qtype_in);
|
|
62
69
|
ScalarQuantizer();
|
|
63
70
|
|
|
64
71
|
/// updates internal values based on qtype and d
|
|
@@ -100,6 +107,25 @@ struct ScalarQuantizer : Quantizer {
|
|
|
100
107
|
|
|
101
108
|
virtual float query_to_code(const uint8_t* code) const = 0;
|
|
102
109
|
|
|
110
|
+
/// Compute four query-to-code distances in one call. Default loops
|
|
111
|
+
/// query_to_code four times; per-SIMD specializations may batch the
|
|
112
|
+
/// inner dim loop across the four codes to amortize query state and
|
|
113
|
+
/// expose ILP across independent accumulators.
|
|
114
|
+
virtual void query_to_codes_batch_4(
|
|
115
|
+
const uint8_t* code_0,
|
|
116
|
+
const uint8_t* code_1,
|
|
117
|
+
const uint8_t* code_2,
|
|
118
|
+
const uint8_t* code_3,
|
|
119
|
+
float& dis0,
|
|
120
|
+
float& dis1,
|
|
121
|
+
float& dis2,
|
|
122
|
+
float& dis3) const {
|
|
123
|
+
dis0 = query_to_code(code_0);
|
|
124
|
+
dis1 = query_to_code(code_1);
|
|
125
|
+
dis2 = query_to_code(code_2);
|
|
126
|
+
dis3 = query_to_code(code_3);
|
|
127
|
+
}
|
|
128
|
+
|
|
103
129
|
float distance_to_code(const uint8_t* code) final {
|
|
104
130
|
return query_to_code(code);
|
|
105
131
|
}
|