faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -18,10 +18,190 @@
|
|
|
18
18
|
#include <algorithm>
|
|
19
19
|
#include <cstddef>
|
|
20
20
|
#include <cstdint>
|
|
21
|
+
#include <cstring>
|
|
21
22
|
#include <vector>
|
|
22
23
|
|
|
24
|
+
#if defined(COMPILE_SIMD_AVX2) && defined(__AVX2__) && defined(__BMI2__)
|
|
25
|
+
#include <immintrin.h>
|
|
26
|
+
#endif
|
|
27
|
+
|
|
23
28
|
namespace faiss {
|
|
24
29
|
|
|
30
|
+
#ifndef SWIG
|
|
31
|
+
|
|
32
|
+
/// Compute dot products between query_level and active vectors.
|
|
33
|
+
///
|
|
34
|
+
/// @tparam AllActive If true, vectors are at sequential positions 0..N-1
|
|
35
|
+
/// (first level, full batch). If false, positions come
|
|
36
|
+
/// from active_indices (subsequent levels after pruning).
|
|
37
|
+
/// @tparam LevelWidth Compile-time level width in floats (0 = use runtime
|
|
38
|
+
/// level_width_dims). Enables full loop unrolling.
|
|
39
|
+
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
40
|
+
template <bool AllActive = false, size_t LevelWidth = 0>
|
|
41
|
+
static inline void compute_level_dot_kernel(
|
|
42
|
+
const float* FAISS_RESTRICT query_level,
|
|
43
|
+
const float* FAISS_RESTRICT level_storage,
|
|
44
|
+
const uint32_t* active_indices,
|
|
45
|
+
const size_t num_active,
|
|
46
|
+
const size_t level_width_dims,
|
|
47
|
+
float* FAISS_RESTRICT dot_products) {
|
|
48
|
+
const size_t width = LevelWidth > 0 ? LevelWidth : level_width_dims;
|
|
49
|
+
size_t i = 0;
|
|
50
|
+
for (; i + 4 <= num_active; i += 4) {
|
|
51
|
+
const float* y0 = level_storage +
|
|
52
|
+
(AllActive ? (i + 0) : active_indices[i + 0]) * width;
|
|
53
|
+
const float* y1 = level_storage +
|
|
54
|
+
(AllActive ? (i + 1) : active_indices[i + 1]) * width;
|
|
55
|
+
const float* y2 = level_storage +
|
|
56
|
+
(AllActive ? (i + 2) : active_indices[i + 2]) * width;
|
|
57
|
+
const float* y3 = level_storage +
|
|
58
|
+
(AllActive ? (i + 3) : active_indices[i + 3]) * width;
|
|
59
|
+
|
|
60
|
+
float dp0 = 0, dp1 = 0, dp2 = 0, dp3 = 0;
|
|
61
|
+
FAISS_PRAGMA_IMPRECISE_LOOP
|
|
62
|
+
for (size_t j = 0; j < width; j++) {
|
|
63
|
+
float q = query_level[j];
|
|
64
|
+
dp0 += q * y0[j];
|
|
65
|
+
dp1 += q * y1[j];
|
|
66
|
+
dp2 += q * y2[j];
|
|
67
|
+
dp3 += q * y3[j];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
dot_products[i + 0] = dp0;
|
|
71
|
+
dot_products[i + 1] = dp1;
|
|
72
|
+
dot_products[i + 2] = dp2;
|
|
73
|
+
dot_products[i + 3] = dp3;
|
|
74
|
+
}
|
|
75
|
+
for (; i < num_active; i++) {
|
|
76
|
+
const float* yj =
|
|
77
|
+
level_storage + (AllActive ? i : active_indices[i]) * width;
|
|
78
|
+
float dp = 0;
|
|
79
|
+
FAISS_PRAGMA_IMPRECISE_LOOP
|
|
80
|
+
for (size_t j = 0; j < width; j++) {
|
|
81
|
+
dp += query_level[j] * yj[j];
|
|
82
|
+
}
|
|
83
|
+
dot_products[i] = dp;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
87
|
+
|
|
88
|
+
/// Update exact distances with the current level's dot products, then apply
|
|
89
|
+
/// Panorama pruning: for each active vector, compute a lower bound on
|
|
90
|
+
/// the final distance and mark it for removal if it cannot beat the current
|
|
91
|
+
/// threshold. Writes 0/1 into active_byteset for subsequent compaction.
|
|
92
|
+
///
|
|
93
|
+
/// Uses `if constexpr` on C::is_max rather than C::cmp() to ensure the
|
|
94
|
+
/// comparison autovectorizes (C::cmp generates scalar function calls).
|
|
95
|
+
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
96
|
+
template <bool AllActive, typename C, MetricType M>
|
|
97
|
+
static inline void prune_kernel(
|
|
98
|
+
float* FAISS_RESTRICT exact_distances,
|
|
99
|
+
const float* FAISS_RESTRICT dot_buffer,
|
|
100
|
+
const float* FAISS_RESTRICT level_cum_sums,
|
|
101
|
+
uint8_t* FAISS_RESTRICT active_byteset,
|
|
102
|
+
const uint32_t* FAISS_RESTRICT active_indices,
|
|
103
|
+
const uint32_t num_active,
|
|
104
|
+
const float query_cum_norm,
|
|
105
|
+
const float threshold) {
|
|
106
|
+
FAISS_PRAGMA_IMPRECISE_LOOP
|
|
107
|
+
for (uint32_t i = 0; i < num_active; i++) {
|
|
108
|
+
uint32_t idx = AllActive ? i : active_indices[i];
|
|
109
|
+
if constexpr (M == METRIC_INNER_PRODUCT) {
|
|
110
|
+
exact_distances[idx] += dot_buffer[i];
|
|
111
|
+
} else {
|
|
112
|
+
exact_distances[idx] -= 2.0f * dot_buffer[i];
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
float cum_sum = level_cum_sums[idx];
|
|
116
|
+
float cauchy_schwarz_bound;
|
|
117
|
+
if constexpr (M == METRIC_INNER_PRODUCT) {
|
|
118
|
+
cauchy_schwarz_bound = -cum_sum * query_cum_norm;
|
|
119
|
+
} else {
|
|
120
|
+
cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
float lower_bound = exact_distances[idx] - cauchy_schwarz_bound;
|
|
124
|
+
if constexpr (C::is_max) {
|
|
125
|
+
active_byteset[i] = (threshold > lower_bound) ? 1 : 0;
|
|
126
|
+
} else {
|
|
127
|
+
active_byteset[i] = (threshold < lower_bound) ? 1 : 0;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
132
|
+
|
|
133
|
+
/// Compact active_indices in-place, removing entries where active_byteset[i]
|
|
134
|
+
/// is zero. Returns the new count of active elements. Uses a branchless BMI2 +
|
|
135
|
+
/// AVX2 fast path (8 elements/iteration via _pext_u64 permutation) with a
|
|
136
|
+
/// scalar fallback for the tail and non-x86 platforms.
|
|
137
|
+
inline size_t compact_active_kernel(
|
|
138
|
+
uint32_t* active_indices,
|
|
139
|
+
const uint8_t* FAISS_RESTRICT active_byteset,
|
|
140
|
+
const size_t num_active) {
|
|
141
|
+
size_t next_active = 0;
|
|
142
|
+
size_t i = 0;
|
|
143
|
+
|
|
144
|
+
#if defined(COMPILE_SIMD_AVX2) && defined(__AVX2__) && defined(__BMI2__)
|
|
145
|
+
for (; i + 8 <= num_active; i += 8) {
|
|
146
|
+
uint64_t bytes;
|
|
147
|
+
memcpy(&bytes, &active_byteset[i], 8);
|
|
148
|
+
|
|
149
|
+
uint64_t expanded = bytes * 0xFFULL;
|
|
150
|
+
uint64_t packed = _pext_u64(0x0706050403020100ULL, expanded);
|
|
151
|
+
|
|
152
|
+
__m256i perm = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128((int64_t)packed));
|
|
153
|
+
__m256i data = _mm256_loadu_si256((const __m256i*)&active_indices[i]);
|
|
154
|
+
__m256i compacted = _mm256_permutevar8x32_epi32(data, perm);
|
|
155
|
+
_mm256_storeu_si256((__m256i*)&active_indices[next_active], compacted);
|
|
156
|
+
|
|
157
|
+
next_active += __builtin_popcountll(bytes);
|
|
158
|
+
}
|
|
159
|
+
#endif
|
|
160
|
+
|
|
161
|
+
for (; i < num_active; i++) {
|
|
162
|
+
active_indices[next_active] = active_indices[i];
|
|
163
|
+
next_active += active_byteset[i] ? 1 : 0;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return next_active;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/// Compile-time dispatch: converts a runtime `width` value into a template
|
|
170
|
+
/// parameter by generating an if-else chain over [Lo, Hi] in steps of Step.
|
|
171
|
+
/// Falls through to LevelWidth=0 (runtime path) if no specialization matches.
|
|
172
|
+
/// Allows for specialization of common level widths.
|
|
173
|
+
namespace detail {
|
|
174
|
+
template <size_t Lo, size_t Hi, size_t Step, typename Lambda>
|
|
175
|
+
inline auto dispatch_width(size_t width, Lambda&& fn) {
|
|
176
|
+
if constexpr (Lo > Hi) {
|
|
177
|
+
return fn.template operator()<0>();
|
|
178
|
+
} else {
|
|
179
|
+
if (width == Lo) {
|
|
180
|
+
return fn.template operator()<Lo>();
|
|
181
|
+
}
|
|
182
|
+
return dispatch_width<Lo + Step, Hi, Step>(
|
|
183
|
+
width, std::forward<Lambda>(fn));
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
} // namespace detail
|
|
187
|
+
|
|
188
|
+
/// Specialize for common float level widths (multiples of 8 up to 128).
|
|
189
|
+
template <typename LambdaType>
|
|
190
|
+
inline auto with_level_width(size_t width, LambdaType&& action) {
|
|
191
|
+
return detail::dispatch_width<8, 128, 8>(
|
|
192
|
+
width, std::forward<LambdaType>(action));
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
template <typename Lambda>
|
|
196
|
+
inline auto with_bool(bool value, Lambda&& fn) {
|
|
197
|
+
if (value) {
|
|
198
|
+
return fn.template operator()<true>();
|
|
199
|
+
} else {
|
|
200
|
+
return fn.template operator()<false>();
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
#endif // SWIG
|
|
204
|
+
|
|
25
205
|
/**
|
|
26
206
|
* Implements the core logic of Panorama-based refinement.
|
|
27
207
|
* arXiv: https://arxiv.org/abs/2510.00566
|
|
@@ -42,6 +222,8 @@ namespace faiss {
|
|
|
42
222
|
* accelerating the refinement stage.
|
|
43
223
|
*/
|
|
44
224
|
struct Panorama {
|
|
225
|
+
static constexpr size_t kDefaultBatchSize = 128;
|
|
226
|
+
|
|
45
227
|
size_t d = 0;
|
|
46
228
|
size_t code_size = 0;
|
|
47
229
|
size_t n_levels = 0;
|
|
@@ -98,6 +280,7 @@ struct Panorama {
|
|
|
98
280
|
/// 4. After all levels, survivors are exact distances; update heap.
|
|
99
281
|
/// This achieves early termination while maintaining SIMD-friendly
|
|
100
282
|
/// sequential access patterns in the level-oriented storage layout.
|
|
283
|
+
#ifndef SWIG
|
|
101
284
|
template <typename C, MetricType M>
|
|
102
285
|
size_t progressive_filter_batch(
|
|
103
286
|
const uint8_t* codes_base,
|
|
@@ -110,111 +293,99 @@ struct Panorama {
|
|
|
110
293
|
const idx_t* ids,
|
|
111
294
|
bool use_sel,
|
|
112
295
|
std::vector<uint32_t>& active_indices,
|
|
296
|
+
std::vector<uint8_t>& active_byteset,
|
|
113
297
|
std::vector<float>& exact_distances,
|
|
298
|
+
std::vector<float>& dot_buffer,
|
|
114
299
|
float threshold,
|
|
115
|
-
PanoramaStats& local_stats) const
|
|
300
|
+
PanoramaStats& local_stats) const {
|
|
301
|
+
size_t batch_start = batch_no * batch_size;
|
|
302
|
+
size_t curr_batch_size = std::min(list_size - batch_start, batch_size);
|
|
116
303
|
|
|
117
|
-
|
|
118
|
-
|
|
304
|
+
size_t cumsum_batch_offset = batch_no * batch_size * (n_levels + 1);
|
|
305
|
+
const float* batch_cum_sums = cum_sums + cumsum_batch_offset;
|
|
306
|
+
const float* level_cum_sums = batch_cum_sums + batch_size;
|
|
307
|
+
float q_norm = query_cum_sums[0] * query_cum_sums[0];
|
|
119
308
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
const uint8_t* codes_base,
|
|
123
|
-
const float* cum_sums,
|
|
124
|
-
const float* query,
|
|
125
|
-
const float* query_cum_sums,
|
|
126
|
-
size_t batch_no,
|
|
127
|
-
size_t list_size,
|
|
128
|
-
const IDSelector* sel,
|
|
129
|
-
const idx_t* ids,
|
|
130
|
-
bool use_sel,
|
|
131
|
-
std::vector<uint32_t>& active_indices,
|
|
132
|
-
std::vector<float>& exact_distances,
|
|
133
|
-
float threshold,
|
|
134
|
-
PanoramaStats& local_stats) const {
|
|
135
|
-
size_t batch_start = batch_no * batch_size;
|
|
136
|
-
size_t curr_batch_size = std::min(list_size - batch_start, batch_size);
|
|
137
|
-
|
|
138
|
-
size_t cumsum_batch_offset = batch_no * batch_size * (n_levels + 1);
|
|
139
|
-
const float* batch_cum_sums = cum_sums + cumsum_batch_offset;
|
|
140
|
-
const float* level_cum_sums = batch_cum_sums + batch_size;
|
|
141
|
-
float q_norm = query_cum_sums[0] * query_cum_sums[0];
|
|
142
|
-
|
|
143
|
-
size_t batch_offset = batch_no * batch_size * code_size;
|
|
144
|
-
const uint8_t* storage_base = codes_base + batch_offset;
|
|
145
|
-
|
|
146
|
-
// Initialize active set with ID-filtered vectors.
|
|
147
|
-
size_t num_active = 0;
|
|
148
|
-
for (size_t i = 0; i < curr_batch_size; i++) {
|
|
149
|
-
size_t global_idx = batch_start + i;
|
|
150
|
-
idx_t id = (ids == nullptr) ? global_idx : ids[global_idx];
|
|
151
|
-
bool include = !use_sel || sel->is_member(id);
|
|
152
|
-
|
|
153
|
-
active_indices[num_active] = i;
|
|
154
|
-
float cum_sum = batch_cum_sums[i];
|
|
309
|
+
size_t batch_offset = batch_no * batch_size * code_size;
|
|
310
|
+
const uint8_t* storage_base = codes_base + batch_offset;
|
|
155
311
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
312
|
+
// Initialize active set with ID-filtered vectors.
|
|
313
|
+
size_t num_active = 0;
|
|
314
|
+
for (size_t i = 0; i < curr_batch_size; i++) {
|
|
315
|
+
size_t global_idx = batch_start + i;
|
|
316
|
+
idx_t id = (ids == nullptr) ? global_idx : ids[global_idx];
|
|
317
|
+
bool include = !use_sel || sel->is_member(id);
|
|
161
318
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if (num_active == 0) {
|
|
166
|
-
return 0;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
size_t total_active = num_active;
|
|
170
|
-
for (size_t level = 0; level < n_levels; level++) {
|
|
171
|
-
local_stats.total_dims_scanned += num_active;
|
|
172
|
-
local_stats.total_dims += total_active;
|
|
319
|
+
active_indices[num_active] = i;
|
|
320
|
+
float cum_sum = batch_cum_sums[i];
|
|
173
321
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
322
|
+
if constexpr (M == METRIC_INNER_PRODUCT) {
|
|
323
|
+
exact_distances[i] = 0.0f;
|
|
324
|
+
} else {
|
|
325
|
+
exact_distances[i] = cum_sum * cum_sum + q_norm;
|
|
326
|
+
}
|
|
179
327
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
uint32_t idx = active_indices[i];
|
|
183
|
-
size_t actual_level_width = std::min(
|
|
184
|
-
level_width_floats, d - level * level_width_floats);
|
|
328
|
+
num_active += include;
|
|
329
|
+
}
|
|
185
330
|
|
|
186
|
-
|
|
187
|
-
|
|
331
|
+
size_t total_active = num_active;
|
|
332
|
+
const bool first_level_full = (num_active == curr_batch_size);
|
|
188
333
|
|
|
189
|
-
|
|
190
|
-
fvec_inner_product(query_level, yj, actual_level_width);
|
|
334
|
+
local_stats.total_dims += total_active * n_levels;
|
|
191
335
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
exact_distances[idx] -= 2.0f * dot_product;
|
|
196
|
-
}
|
|
336
|
+
for (size_t level = 0; (level < n_levels) && (num_active > 0);
|
|
337
|
+
level++) {
|
|
338
|
+
local_stats.total_dims_scanned += num_active;
|
|
197
339
|
|
|
198
|
-
float
|
|
199
|
-
float cauchy_schwarz_bound;
|
|
200
|
-
if constexpr (M == METRIC_INNER_PRODUCT) {
|
|
201
|
-
cauchy_schwarz_bound = -cum_sum * query_cum_norm;
|
|
202
|
-
} else {
|
|
203
|
-
cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
|
|
204
|
-
}
|
|
340
|
+
float query_cum_norm = query_cum_sums[level + 1];
|
|
205
341
|
|
|
206
|
-
|
|
342
|
+
size_t level_offset = level * level_width * batch_size;
|
|
343
|
+
const float* level_storage =
|
|
344
|
+
(const float*)(storage_base + level_offset);
|
|
345
|
+
const float* query_level = query + level * level_width_floats;
|
|
346
|
+
size_t actual_level_width = std::min(
|
|
347
|
+
level_width_floats, d - level * level_width_floats);
|
|
207
348
|
|
|
208
|
-
|
|
209
|
-
|
|
349
|
+
num_active = with_bool(
|
|
350
|
+
level == 0 && first_level_full, [&]<bool AllActive>() {
|
|
351
|
+
with_level_width(
|
|
352
|
+
actual_level_width, [&]<size_t LevelWidth>() {
|
|
353
|
+
compute_level_dot_kernel<
|
|
354
|
+
AllActive,
|
|
355
|
+
LevelWidth>(
|
|
356
|
+
query_level,
|
|
357
|
+
level_storage,
|
|
358
|
+
active_indices.data(),
|
|
359
|
+
num_active,
|
|
360
|
+
actual_level_width,
|
|
361
|
+
dot_buffer.data());
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
prune_kernel<AllActive, C, M>(
|
|
365
|
+
exact_distances.data(),
|
|
366
|
+
dot_buffer.data(),
|
|
367
|
+
level_cum_sums,
|
|
368
|
+
active_byteset.data(),
|
|
369
|
+
active_indices.data(),
|
|
370
|
+
(uint32_t)num_active,
|
|
371
|
+
query_cum_norm,
|
|
372
|
+
threshold);
|
|
373
|
+
|
|
374
|
+
return compact_active_kernel(
|
|
375
|
+
active_indices.data(),
|
|
376
|
+
active_byteset.data(),
|
|
377
|
+
num_active);
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
level_cum_sums += batch_size;
|
|
210
381
|
}
|
|
211
382
|
|
|
212
|
-
num_active
|
|
213
|
-
level_cum_sums += batch_size;
|
|
383
|
+
return num_active;
|
|
214
384
|
}
|
|
385
|
+
#endif // SWIG
|
|
215
386
|
|
|
216
|
-
|
|
217
|
-
}
|
|
387
|
+
void reconstruct(idx_t key, float* recons, const uint8_t* codes_base) const;
|
|
388
|
+
};
|
|
218
389
|
} // namespace faiss
|
|
219
390
|
|
|
220
391
|
#endif
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <faiss/impl/PdxLayout.h>
|
|
9
|
+
|
|
10
|
+
#include <cstddef>
|
|
11
|
+
#include <cstring>
|
|
12
|
+
|
|
13
|
+
namespace faiss {
|
|
14
|
+
namespace detail {
|
|
15
|
+
|
|
16
|
+
void pdxify(
|
|
17
|
+
const float* Y,
|
|
18
|
+
int k,
|
|
19
|
+
int d_trail,
|
|
20
|
+
int pdx_block_size,
|
|
21
|
+
float* Y_pdx) {
|
|
22
|
+
const int n_full_blocks = d_trail / pdx_block_size;
|
|
23
|
+
const int tail = d_trail % pdx_block_size;
|
|
24
|
+
size_t offset = 0;
|
|
25
|
+
for (int b = 0; b < n_full_blocks; ++b) {
|
|
26
|
+
const size_t src_start = static_cast<size_t>(b) * pdx_block_size;
|
|
27
|
+
for (int j = 0; j < k; ++j) {
|
|
28
|
+
std::memcpy(
|
|
29
|
+
Y_pdx + offset,
|
|
30
|
+
Y + static_cast<size_t>(j) * d_trail + src_start,
|
|
31
|
+
pdx_block_size * sizeof(float));
|
|
32
|
+
offset += pdx_block_size;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
if (tail > 0) {
|
|
36
|
+
const size_t src_start =
|
|
37
|
+
static_cast<size_t>(n_full_blocks) * pdx_block_size;
|
|
38
|
+
for (int j = 0; j < k; ++j) {
|
|
39
|
+
std::memcpy(
|
|
40
|
+
Y_pdx + offset,
|
|
41
|
+
Y + static_cast<size_t>(j) * d_trail + src_start,
|
|
42
|
+
tail * sizeof(float));
|
|
43
|
+
offset += tail;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
void de_pdxify(
|
|
49
|
+
const float* Y_pdx,
|
|
50
|
+
int k,
|
|
51
|
+
int d_trail,
|
|
52
|
+
int pdx_block_size,
|
|
53
|
+
float* Y) {
|
|
54
|
+
const int n_full_blocks = d_trail / pdx_block_size;
|
|
55
|
+
const int tail = d_trail % pdx_block_size;
|
|
56
|
+
size_t offset = 0;
|
|
57
|
+
for (int b = 0; b < n_full_blocks; ++b) {
|
|
58
|
+
const size_t dst_start = static_cast<size_t>(b) * pdx_block_size;
|
|
59
|
+
for (int j = 0; j < k; ++j) {
|
|
60
|
+
std::memcpy(
|
|
61
|
+
Y + static_cast<size_t>(j) * d_trail + dst_start,
|
|
62
|
+
Y_pdx + offset,
|
|
63
|
+
pdx_block_size * sizeof(float));
|
|
64
|
+
offset += pdx_block_size;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
if (tail > 0) {
|
|
68
|
+
const size_t dst_start =
|
|
69
|
+
static_cast<size_t>(n_full_blocks) * pdx_block_size;
|
|
70
|
+
for (int j = 0; j < k; ++j) {
|
|
71
|
+
std::memcpy(
|
|
72
|
+
Y + static_cast<size_t>(j) * d_trail + dst_start,
|
|
73
|
+
Y_pdx + offset,
|
|
74
|
+
tail * sizeof(float));
|
|
75
|
+
offset += tail;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
void compute_partial_norms(const float* X, int n, int d, int p, float* norms) {
|
|
81
|
+
#pragma omp parallel for
|
|
82
|
+
for (int i = 0; i < n; ++i) {
|
|
83
|
+
float s = 0.0f;
|
|
84
|
+
const float* row = X + static_cast<size_t>(i) * d;
|
|
85
|
+
for (int m = 0; m < p; ++m) {
|
|
86
|
+
s += row[m] * row[m];
|
|
87
|
+
}
|
|
88
|
+
norms[i] = s;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
} // namespace detail
|
|
93
|
+
} // namespace faiss
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
namespace faiss {
|
|
11
|
+
namespace detail {
|
|
12
|
+
|
|
13
|
+
/** Reorder a row-major (k, d_trail) matrix into PDX block-column-major
|
|
14
|
+
* layout. Inside each block of `pdx_block_size` dims the layout is
|
|
15
|
+
* column-major across centroids, so all k centroids' values for the same
|
|
16
|
+
* dim are contiguous — the access pattern that makes progressive pruning
|
|
17
|
+
* cache-friendly. Trailing block (size `d_trail % pdx_block_size`) uses
|
|
18
|
+
* the same convention. `Y_pdx` must already be sized to `k * d_trail`. */
|
|
19
|
+
void pdxify(
|
|
20
|
+
const float* Y,
|
|
21
|
+
int k,
|
|
22
|
+
int d_trail,
|
|
23
|
+
int pdx_block_size,
|
|
24
|
+
float* Y_pdx);
|
|
25
|
+
|
|
26
|
+
/** Inverse of pdxify (used in tests for the bit-identical round-trip
|
|
27
|
+
* check). */
|
|
28
|
+
void de_pdxify(
|
|
29
|
+
const float* Y_pdx,
|
|
30
|
+
int k,
|
|
31
|
+
int d_trail,
|
|
32
|
+
int pdx_block_size,
|
|
33
|
+
float* Y);
|
|
34
|
+
|
|
35
|
+
/** norms[i] = sum_{m<p} X[i, m]^2 for row-major X of shape (n, d).
|
|
36
|
+
* Parallel over rows. Used by SuperKMeans to keep partial-norm caches
|
|
37
|
+
* in sync with the current d_prime. */
|
|
38
|
+
void compute_partial_norms(const float* X, int n, int d, int p, float* norms);
|
|
39
|
+
|
|
40
|
+
} // namespace detail
|
|
41
|
+
} // namespace faiss
|