faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Objective-C++ header (uses Metal types).
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#import <Metal/Metal.h>
|
|
14
|
+
|
|
15
|
+
#include <faiss/Index.h>
|
|
16
|
+
#include <faiss/gpu_metal/MetalIndex.h>
|
|
17
|
+
|
|
18
|
+
namespace faiss {
|
|
19
|
+
struct IndexFlat;
|
|
20
|
+
}
|
|
21
|
+
#include <memory>
|
|
22
|
+
|
|
23
|
+
namespace faiss {
|
|
24
|
+
namespace gpu_metal {
|
|
25
|
+
|
|
26
|
+
/// Flat index that stores vectors in an MTLBuffer. Supports L2 and inner
|
|
27
|
+
/// product. Search runs on GPU via Metal compute (distance + top-k kernels).
|
|
28
|
+
class MetalIndexFlat : public MetalIndex {
|
|
29
|
+
public:
|
|
30
|
+
MetalIndexFlat(
|
|
31
|
+
std::shared_ptr<MetalResources> resources,
|
|
32
|
+
int dims,
|
|
33
|
+
faiss::MetricType metric,
|
|
34
|
+
float metricArg = 0.0f,
|
|
35
|
+
MetalIndexConfig config = MetalIndexConfig());
|
|
36
|
+
|
|
37
|
+
~MetalIndexFlat() override;
|
|
38
|
+
|
|
39
|
+
void add(idx_t n, const float* x) override;
|
|
40
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
|
|
41
|
+
void reset() override;
|
|
42
|
+
void search(
|
|
43
|
+
idx_t n,
|
|
44
|
+
const float* x,
|
|
45
|
+
idx_t k,
|
|
46
|
+
float* distances,
|
|
47
|
+
idx_t* labels,
|
|
48
|
+
const SearchParameters* params = nullptr) const override;
|
|
49
|
+
|
|
50
|
+
/// Copy vectors to a CPU IndexFlat (e.g. for index_metal_gpu_to_cpu).
|
|
51
|
+
void copyTo(::faiss::IndexFlat* index) const;
|
|
52
|
+
|
|
53
|
+
private:
|
|
54
|
+
/// Ensures vector buffer can hold at least \p newNtotal vectors; grows
|
|
55
|
+
/// buffer if necessary.
|
|
56
|
+
void ensureCapacity(idx_t newNtotal);
|
|
57
|
+
|
|
58
|
+
/// Vector storage (row-major, ntotal * d floats). Nil when empty.
|
|
59
|
+
id<MTLBuffer> vectorsBuffer_;
|
|
60
|
+
/// Capacity of vectorsBuffer_ in number of vectors (0 if buffer is nil).
|
|
61
|
+
size_t capacityVecs_;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
} // namespace gpu_metal
|
|
65
|
+
} // namespace faiss
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* MetalKernels: typed wrapper around Metal compute kernels.
|
|
9
|
+
* Owns library compilation, pipeline caching, and dispatch encoding.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#pragma once
|
|
13
|
+
|
|
14
|
+
#import <Metal/Metal.h>
|
|
15
|
+
#include <faiss/MetricType.h>
|
|
16
|
+
#include <string>
|
|
17
|
+
#include <unordered_map>
|
|
18
|
+
|
|
19
|
+
namespace faiss {
|
|
20
|
+
namespace gpu_metal {
|
|
21
|
+
|
|
22
|
+
class MetalKernels {
|
|
23
|
+
public:
|
|
24
|
+
explicit MetalKernels(id<MTLDevice> device);
|
|
25
|
+
~MetalKernels();
|
|
26
|
+
|
|
27
|
+
bool isValid() const;
|
|
28
|
+
static constexpr int kMaxK = 256;
|
|
29
|
+
|
|
30
|
+
void encodeDistanceMatrix(
|
|
31
|
+
id<MTLComputeCommandEncoder> enc,
|
|
32
|
+
id<MTLBuffer> queries,
|
|
33
|
+
id<MTLBuffer> vectors,
|
|
34
|
+
id<MTLBuffer> distances,
|
|
35
|
+
int nq,
|
|
36
|
+
int nb,
|
|
37
|
+
int d,
|
|
38
|
+
MetricType metric);
|
|
39
|
+
|
|
40
|
+
void encodeTopKThreadgroup(
|
|
41
|
+
id<MTLComputeCommandEncoder> enc,
|
|
42
|
+
id<MTLBuffer> distances,
|
|
43
|
+
id<MTLBuffer> outDist,
|
|
44
|
+
id<MTLBuffer> outIdx,
|
|
45
|
+
int nq,
|
|
46
|
+
int nb,
|
|
47
|
+
int k,
|
|
48
|
+
bool wantMin);
|
|
49
|
+
|
|
50
|
+
static int selectTopKVariantIndex(int k);
|
|
51
|
+
|
|
52
|
+
private:
|
|
53
|
+
id<MTLComputePipelineState> pipeline(const char* name);
|
|
54
|
+
|
|
55
|
+
id<MTLDevice> device_;
|
|
56
|
+
id<MTLLibrary> library_;
|
|
57
|
+
std::unordered_map<std::string, id<MTLComputePipelineState>> cache_;
|
|
58
|
+
|
|
59
|
+
static constexpr int kTopKVariantSizes[] = {32, 64, 128, 256};
|
|
60
|
+
static constexpr int kNumTopKVariants = 4;
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
MetalKernels& getMetalKernels(id<MTLDevice> device);
|
|
64
|
+
|
|
65
|
+
} // namespace gpu_metal
|
|
66
|
+
} // namespace faiss
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* This header uses Objective-C types (Metal framework: id, nil, MTLDevice,
|
|
9
|
+
* etc.). For correct IDE/linter behavior, associate this file with
|
|
10
|
+
* "Objective-C++":
|
|
11
|
+
*
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
#pragma once
|
|
15
|
+
|
|
16
|
+
#import <Foundation/Foundation.h>
|
|
17
|
+
#import <Metal/Metal.h>
|
|
18
|
+
|
|
19
|
+
#include <cstddef>
|
|
20
|
+
|
|
21
|
+
namespace faiss {
|
|
22
|
+
namespace gpu_metal {
|
|
23
|
+
|
|
24
|
+
/// Allocation type for Metal buffers (mirrors faiss::gpu::AllocType roles).
|
|
25
|
+
enum MetalAllocType {
|
|
26
|
+
Other = 0,
|
|
27
|
+
FlatData = 1,
|
|
28
|
+
IVFLists = 2,
|
|
29
|
+
Quantizer = 3,
|
|
30
|
+
QuantizerPrecomputedCodes = 4,
|
|
31
|
+
TemporaryMemoryBuffer = 10,
|
|
32
|
+
TemporaryMemoryOverflow = 11,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/// Owns Metal device, command queue, and provides buffer allocation.
|
|
36
|
+
/// Mirrors the roles of faiss::gpu::GpuResources for the Metal backend.
|
|
37
|
+
class MetalResources {
|
|
38
|
+
public:
|
|
39
|
+
MetalResources();
|
|
40
|
+
~MetalResources();
|
|
41
|
+
|
|
42
|
+
MetalResources(const MetalResources&) = delete;
|
|
43
|
+
MetalResources& operator=(const MetalResources&) = delete;
|
|
44
|
+
|
|
45
|
+
/// Returns the Metal device (nil if no Metal-capable device is available).
|
|
46
|
+
id<MTLDevice> getDevice() const {
|
|
47
|
+
return device_;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/// Returns the command queue for the device (nil if device is nil).
|
|
51
|
+
id<MTLCommandQueue> getCommandQueue() const {
|
|
52
|
+
return commandQueue_;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/// Allocates a buffer of the given size (bytes). Caller owns the returned
|
|
56
|
+
/// buffer and must call deallocBuffer when done, or the buffer will leak.
|
|
57
|
+
/// Returns nil on failure (e.g. device nil or allocation failure).
|
|
58
|
+
id<MTLBuffer> allocBuffer(size_t size, MetalAllocType type);
|
|
59
|
+
|
|
60
|
+
/// Releases a buffer previously returned by allocBuffer. The caller must
|
|
61
|
+
/// not use the buffer after this call.
|
|
62
|
+
void deallocBuffer(id<MTLBuffer> buffer, MetalAllocType type);
|
|
63
|
+
|
|
64
|
+
/// Blocks until all work submitted to the default command queue has
|
|
65
|
+
/// completed.
|
|
66
|
+
void synchronize();
|
|
67
|
+
|
|
68
|
+
/// Returns true if the Metal device and queue are available.
|
|
69
|
+
bool isAvailable() const {
|
|
70
|
+
return device_ != nil && commandQueue_ != nil;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
private:
|
|
74
|
+
id<MTLDevice> device_;
|
|
75
|
+
id<MTLCommandQueue> commandQueue_;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
} // namespace gpu_metal
|
|
79
|
+
} // namespace faiss
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Mirrors the role of StandardGpuResources for the Metal backend.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#include <faiss/gpu_metal/MetalResources.h>
|
|
14
|
+
#include <memory>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
namespace gpu_metal {
|
|
18
|
+
|
|
19
|
+
/// Default Metal resources (single device). Use with index_cpu_to_metal_gpu.
|
|
20
|
+
class StandardMetalResources {
|
|
21
|
+
public:
|
|
22
|
+
StandardMetalResources();
|
|
23
|
+
std::shared_ptr<MetalResources> getResources() const {
|
|
24
|
+
return res_;
|
|
25
|
+
}
|
|
26
|
+
bool isAvailable() const {
|
|
27
|
+
return res_ && res_->isAvailable();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
private:
|
|
31
|
+
std::shared_ptr<MetalResources> res_;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
} // namespace gpu_metal
|
|
35
|
+
} // namespace faiss
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <faiss/impl/AdSampling.h>
|
|
9
|
+
|
|
10
|
+
#include <cmath>
|
|
11
|
+
|
|
12
|
+
#include <faiss/impl/FaissAssert.h>
|
|
13
|
+
|
|
14
|
+
namespace faiss {
|
|
15
|
+
namespace detail {
|
|
16
|
+
|
|
17
|
+
double normal_quantile(double p) {
|
|
18
|
+
// Three-branch rational polynomial; branch breakpoint p_low = 0.02425.
|
|
19
|
+
static constexpr double a[] = {
|
|
20
|
+
-3.969683028665376e+01,
|
|
21
|
+
2.209460984245205e+02,
|
|
22
|
+
-2.759285104469687e+02,
|
|
23
|
+
1.383577518672690e+02,
|
|
24
|
+
-3.066479806614716e+01,
|
|
25
|
+
2.506628277459239e+00,
|
|
26
|
+
};
|
|
27
|
+
static constexpr double b[] = {
|
|
28
|
+
-5.447609879822406e+01,
|
|
29
|
+
1.615858368580409e+02,
|
|
30
|
+
-1.556989798598866e+02,
|
|
31
|
+
6.680131188771972e+01,
|
|
32
|
+
-1.328068155288572e+01,
|
|
33
|
+
};
|
|
34
|
+
static constexpr double c[] = {
|
|
35
|
+
-7.784894002430293e-03,
|
|
36
|
+
-3.223964580411365e-01,
|
|
37
|
+
-2.400758277161838e+00,
|
|
38
|
+
-2.549732539343734e+00,
|
|
39
|
+
4.374664141464968e+00,
|
|
40
|
+
2.938163982698783e+00,
|
|
41
|
+
};
|
|
42
|
+
static constexpr double d[] = {
|
|
43
|
+
7.784695709041462e-03,
|
|
44
|
+
3.224671290700398e-01,
|
|
45
|
+
2.445134137142996e+00,
|
|
46
|
+
3.754408661907416e+00,
|
|
47
|
+
};
|
|
48
|
+
constexpr double p_low = 0.02425;
|
|
49
|
+
constexpr double p_high = 1.0 - p_low;
|
|
50
|
+
if (p < p_low) {
|
|
51
|
+
const double q = std::sqrt(-2.0 * std::log(p));
|
|
52
|
+
return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q +
|
|
53
|
+
c[5]) /
|
|
54
|
+
((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0);
|
|
55
|
+
} else if (p <= p_high) {
|
|
56
|
+
const double q = p - 0.5;
|
|
57
|
+
const double r = q * q;
|
|
58
|
+
return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r +
|
|
59
|
+
a[5]) *
|
|
60
|
+
q /
|
|
61
|
+
(((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r +
|
|
62
|
+
1.0);
|
|
63
|
+
} else {
|
|
64
|
+
const double q = std::sqrt(-2.0 * std::log(1.0 - p));
|
|
65
|
+
return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q +
|
|
66
|
+
c[5]) /
|
|
67
|
+
((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
double chi2_quantile_wh(int p, double alpha) {
|
|
72
|
+
FAISS_THROW_IF_NOT(p > 0);
|
|
73
|
+
// Wilson-Hilferty cube-root approximation:
|
|
74
|
+
// ((X/p)^(1/3) - (1 - 2/(9p))) / sqrt(2/(9p)) ~ N(0,1)
|
|
75
|
+
// inverted into a quantile formula.
|
|
76
|
+
//
|
|
77
|
+
// Domain constraint: for very small alpha (< ~0.001) and small p
|
|
78
|
+
// (< 4), t can go negative, producing a negative chi-squared quantile
|
|
79
|
+
// (physically impossible). In practice this cannot happen here:
|
|
80
|
+
// precompute_ad_thresholds calls with alpha = 1 - epsilon where
|
|
81
|
+
// epsilon = ad_epsilon_factor / d, and d_prime_min >= 16, so
|
|
82
|
+
// p >= 16 and alpha >= 1 - 1/16 = 0.9375 — well inside the accurate
|
|
83
|
+
// region of the approximation.
|
|
84
|
+
const double z = normal_quantile(alpha);
|
|
85
|
+
const double t = 1.0 - 2.0 / (9.0 * p) + z * std::sqrt(2.0 / (9.0 * p));
|
|
86
|
+
return p * t * t * t;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
std::vector<float> precompute_ad_thresholds(int d, double epsilon) {
|
|
90
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
91
|
+
epsilon > 0.0 && epsilon < 1.0,
|
|
92
|
+
"precompute_ad_thresholds: epsilon must be in (0, 1)");
|
|
93
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
94
|
+
d > 0, "precompute_ad_thresholds: d must be positive");
|
|
95
|
+
std::vector<float> coeff(d + 1);
|
|
96
|
+
for (int p = 1; p <= d; p++) {
|
|
97
|
+
coeff[p] = static_cast<float>(chi2_quantile_wh(p, 1.0 - epsilon) / d);
|
|
98
|
+
}
|
|
99
|
+
return coeff;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
} // namespace detail
|
|
103
|
+
} // namespace faiss
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <vector>
|
|
11
|
+
|
|
12
|
+
namespace faiss {
|
|
13
|
+
namespace detail {
|
|
14
|
+
|
|
15
|
+
/** Inverse standard normal CDF. Three-branch rational polynomial,
|
|
16
|
+
* absolute error < 1.15e-9 over `p in (0, 1)`. Behavior at the boundaries
|
|
17
|
+
* (p <= 0 or p >= 1) is unspecified — returns NaN or +/-inf. */
|
|
18
|
+
double normal_quantile(double p);
|
|
19
|
+
|
|
20
|
+
/** Chi-squared quantile via cube-root approximation. Validated to within
|
|
21
|
+
* 2% of scipy for `p in [16, d]` and `alpha <= 1 - 1e-6`. Accuracy
|
|
22
|
+
* degrades for smaller `p` or for `alpha` near 1. */
|
|
23
|
+
double chi2_quantile_wh(int p, double alpha);
|
|
24
|
+
|
|
25
|
+
/** Build ADSampling threshold table of size `d + 1`:
|
|
26
|
+
* coeff[p] = chi2_quantile_wh(p, 1 - epsilon) / d.
|
|
27
|
+
*
|
|
28
|
+
* Indexing: coeff[0] is reserved (left at 0.0f). coeff[1..15] are
|
|
29
|
+
* computed but NOT accuracy-bounded — callers requiring the 2% scipy
|
|
30
|
+
* tolerance must consume only coeff[16..d]. SuperKMeans enforces
|
|
31
|
+
* this via its `d_prime_min = 16` parameter. */
|
|
32
|
+
std::vector<float> precompute_ad_thresholds(int d, double epsilon);
|
|
33
|
+
|
|
34
|
+
} // namespace detail
|
|
35
|
+
} // namespace faiss
|
|
@@ -48,13 +48,13 @@ int sgemm_(
|
|
|
48
48
|
namespace faiss {
|
|
49
49
|
|
|
50
50
|
AdditiveQuantizer::AdditiveQuantizer(
|
|
51
|
-
size_t
|
|
52
|
-
const std::vector<size_t>&
|
|
53
|
-
Search_type_t
|
|
54
|
-
: Quantizer(
|
|
55
|
-
M(
|
|
56
|
-
nbits(
|
|
57
|
-
search_type(
|
|
51
|
+
size_t d_in,
|
|
52
|
+
const std::vector<size_t>& nbits_in,
|
|
53
|
+
Search_type_t search_type_in)
|
|
54
|
+
: Quantizer(d_in),
|
|
55
|
+
M(nbits_in.size()),
|
|
56
|
+
nbits(nbits_in),
|
|
57
|
+
search_type(search_type_in) {
|
|
58
58
|
set_derived_values();
|
|
59
59
|
}
|
|
60
60
|
|
|
@@ -65,7 +65,7 @@ void AdditiveQuantizer::set_derived_values() {
|
|
|
65
65
|
tot_bits = 0;
|
|
66
66
|
only_8bit = true;
|
|
67
67
|
codebook_offsets.resize(M + 1, 0);
|
|
68
|
-
for (
|
|
68
|
+
for (size_t i = 0; i < M; i++) {
|
|
69
69
|
int nbit = nbits[i];
|
|
70
70
|
FAISS_CHECK_RANGE(nbit, 0, 31);
|
|
71
71
|
size_t k = (size_t)1 << nbit;
|
|
@@ -107,7 +107,7 @@ void AdditiveQuantizer::set_derived_values() {
|
|
|
107
107
|
void AdditiveQuantizer::train_norm(size_t n, const float* norms) {
|
|
108
108
|
norm_min = HUGE_VALF;
|
|
109
109
|
norm_max = -HUGE_VALF;
|
|
110
|
-
for (
|
|
110
|
+
for (size_t i = 0; i < n; i++) {
|
|
111
111
|
if (norms[i] < norm_min) {
|
|
112
112
|
norm_min = norms[i];
|
|
113
113
|
}
|
|
@@ -167,7 +167,7 @@ void AdditiveQuantizer::compute_codebook_tables() {
|
|
|
167
167
|
fvec_norms_L2sqr(
|
|
168
168
|
centroid_norms.data(), codebooks.data(), d, total_codebook_size);
|
|
169
169
|
size_t cross_table_size = 0;
|
|
170
|
-
for (
|
|
170
|
+
for (size_t m = 0; m < M; m++) {
|
|
171
171
|
FAISS_CHECK_RANGE(nbits[m], 0, 31);
|
|
172
172
|
size_t K = (size_t)1 << nbits[m];
|
|
173
173
|
size_t product =
|
|
@@ -177,7 +177,7 @@ void AdditiveQuantizer::compute_codebook_tables() {
|
|
|
177
177
|
}
|
|
178
178
|
codebook_cross_products.resize(cross_table_size);
|
|
179
179
|
size_t ofs = 0;
|
|
180
|
-
for (
|
|
180
|
+
for (size_t m = 1; m < M; m++) {
|
|
181
181
|
FINTEGER ki = (size_t)1 << nbits[m];
|
|
182
182
|
FINTEGER kk = codebook_offsets[m];
|
|
183
183
|
FINTEGER di = d;
|
|
@@ -186,7 +186,7 @@ void AdditiveQuantizer::compute_codebook_tables() {
|
|
|
186
186
|
FAISS_THROW_IF_NOT_FMT(
|
|
187
187
|
add_no_overflow(ofs, step_size, "cross product table offset") <=
|
|
188
188
|
cross_table_size,
|
|
189
|
-
"cross product table overflow at step %
|
|
189
|
+
"cross product table overflow at step %zd: "
|
|
190
190
|
"%zd + %zd > %zd",
|
|
191
191
|
m,
|
|
192
192
|
ofs,
|
|
@@ -300,11 +300,12 @@ void AdditiveQuantizer::pack_codes(
|
|
|
300
300
|
norms = norm_buf.data();
|
|
301
301
|
}
|
|
302
302
|
}
|
|
303
|
+
int64_t n_signed = n;
|
|
303
304
|
#pragma omp parallel for if (n > 1000)
|
|
304
|
-
for (int64_t i = 0; i <
|
|
305
|
+
for (int64_t i = 0; i < n_signed; i++) {
|
|
305
306
|
const int32_t* codes1 = codes + i * ld_codes;
|
|
306
307
|
BitstringWriter bsw(packed_codes + i * code_size, code_size);
|
|
307
|
-
for (
|
|
308
|
+
for (size_t m = 0; m < M; m++) {
|
|
308
309
|
bsw.write(codes1[m], nbits[m]);
|
|
309
310
|
}
|
|
310
311
|
if (norm_bits != 0) {
|
|
@@ -317,12 +318,13 @@ void AdditiveQuantizer::decode(const uint8_t* code, float* x, size_t n) const {
|
|
|
317
318
|
FAISS_THROW_IF_NOT_MSG(
|
|
318
319
|
is_trained, "The additive quantizer is not trained yet.");
|
|
319
320
|
|
|
321
|
+
int64_t n_signed = n;
|
|
320
322
|
// standard additive quantizer decoding
|
|
321
323
|
#pragma omp parallel for if (n > 100)
|
|
322
|
-
for (int64_t i = 0; i <
|
|
324
|
+
for (int64_t i = 0; i < n_signed; i++) {
|
|
323
325
|
BitstringReader bsr(code + i * code_size, code_size);
|
|
324
326
|
float* xi = x + i * d;
|
|
325
|
-
for (
|
|
327
|
+
for (size_t m = 0; m < M; m++) {
|
|
326
328
|
int idx = bsr.read(nbits[m]);
|
|
327
329
|
const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
|
|
328
330
|
if (m == 0) {
|
|
@@ -346,12 +348,13 @@ void AdditiveQuantizer::decode_unpacked(
|
|
|
346
348
|
ld_codes = M;
|
|
347
349
|
}
|
|
348
350
|
|
|
351
|
+
int64_t n_signed = n;
|
|
349
352
|
// standard additive quantizer decoding
|
|
350
353
|
#pragma omp parallel for if (n > 1000)
|
|
351
|
-
for (int64_t i = 0; i <
|
|
354
|
+
for (int64_t i = 0; i < n_signed; i++) {
|
|
352
355
|
const int32_t* codesi = code + i * ld_codes;
|
|
353
356
|
float* xi = x + i * d;
|
|
354
|
-
for (
|
|
357
|
+
for (size_t m = 0; m < M; m++) {
|
|
355
358
|
int idx = codesi[m];
|
|
356
359
|
const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
|
|
357
360
|
if (m == 0) {
|
|
@@ -371,13 +374,14 @@ AdditiveQuantizer::~AdditiveQuantizer() {}
|
|
|
371
374
|
|
|
372
375
|
void AdditiveQuantizer::compute_centroid_norms(float* norms) const {
|
|
373
376
|
size_t ntotal = (size_t)1 << tot_bits;
|
|
377
|
+
int64_t ntotal_signed = ntotal;
|
|
374
378
|
// TODO: make tree of partial sums
|
|
375
379
|
with_simd_level([&]<SIMDLevel SL>() {
|
|
376
380
|
#pragma omp parallel
|
|
377
381
|
{
|
|
378
382
|
std::vector<float> tmp(d);
|
|
379
383
|
#pragma omp for
|
|
380
|
-
for (int64_t i = 0; i <
|
|
384
|
+
for (int64_t i = 0; i < ntotal_signed; i++) {
|
|
381
385
|
decode_64bit(i, tmp.data());
|
|
382
386
|
norms[i] = fvec_norm_L2sqr<SL>(tmp.data(), d);
|
|
383
387
|
}
|
|
@@ -386,7 +390,7 @@ void AdditiveQuantizer::compute_centroid_norms(float* norms) const {
|
|
|
386
390
|
}
|
|
387
391
|
|
|
388
392
|
void AdditiveQuantizer::decode_64bit(idx_t bits, float* xi) const {
|
|
389
|
-
for (
|
|
393
|
+
for (size_t m = 0; m < M; m++) {
|
|
390
394
|
idx_t idx = bits & (((size_t)1 << nbits[m]) - 1);
|
|
391
395
|
bits >>= nbits[m];
|
|
392
396
|
const float* c = codebooks.data() + d * (codebook_offsets[m] + idx);
|
|
@@ -436,7 +440,7 @@ void compute_inner_prod_with_LUT(
|
|
|
436
440
|
const float* LUT,
|
|
437
441
|
float* ips) {
|
|
438
442
|
size_t prev_size = 1;
|
|
439
|
-
for (
|
|
443
|
+
for (size_t m = 0; m < aq.M; m++) {
|
|
440
444
|
const float* LUTm = LUT + aq.codebook_offsets[m];
|
|
441
445
|
int nb = aq.nbits[m];
|
|
442
446
|
size_t nc = (size_t)1 << nb;
|
|
@@ -509,7 +513,7 @@ void AdditiveQuantizer::knn_centroids_L2(
|
|
|
509
513
|
// ||x - y||^2 = ||x||^2 + ||y||^2 - 2 * <x,y>
|
|
510
514
|
|
|
511
515
|
maxheap_heapify(k, distances_i, labels_i);
|
|
512
|
-
for (
|
|
516
|
+
for (size_t j = 0; j < ntotal; j++) {
|
|
513
517
|
float disj = q_norms[i] + norms[j] - 2 * dis[j];
|
|
514
518
|
if (disj < distances_i[0]) {
|
|
515
519
|
heap_replace_top<CMax<float, int64_t>>(
|
|
@@ -532,7 +536,7 @@ float accumulate_IPs(
|
|
|
532
536
|
BitstringReader& bs,
|
|
533
537
|
const float* LUT) {
|
|
534
538
|
float accu = 0;
|
|
535
|
-
for (
|
|
539
|
+
for (size_t m = 0; m < aq.M; m++) {
|
|
536
540
|
size_t nbit = aq.nbits[m];
|
|
537
541
|
int idx = bs.read(nbit);
|
|
538
542
|
accu += LUT[idx];
|
|
@@ -545,7 +549,7 @@ float compute_norm_from_LUT(const AdditiveQuantizer& aq, BitstringReader& bs) {
|
|
|
545
549
|
float accu = 0;
|
|
546
550
|
std::vector<int> idx(aq.M);
|
|
547
551
|
const float* c = aq.codebook_cross_products.data();
|
|
548
|
-
for (
|
|
552
|
+
for (size_t m = 0; m < aq.M; m++) {
|
|
549
553
|
size_t nbit = aq.nbits[m];
|
|
550
554
|
int i = bs.read(nbit);
|
|
551
555
|
size_t K = 1 << nbit;
|
|
@@ -553,7 +557,7 @@ float compute_norm_from_LUT(const AdditiveQuantizer& aq, BitstringReader& bs) {
|
|
|
553
557
|
|
|
554
558
|
accu += aq.centroid_norms[aq.codebook_offsets[m] + i];
|
|
555
559
|
|
|
556
|
-
for (
|
|
560
|
+
for (size_t l = 0; l < m; l++) {
|
|
557
561
|
int j = idx[l];
|
|
558
562
|
accu += 2 * c[j * K + i];
|
|
559
563
|
c += (1 << aq.nbits[l]) * K;
|
|
@@ -20,10 +20,11 @@ namespace faiss {
|
|
|
20
20
|
* RangeSearchResult
|
|
21
21
|
***********************************************************************/
|
|
22
22
|
|
|
23
|
-
RangeSearchResult::RangeSearchResult(size_t
|
|
23
|
+
RangeSearchResult::RangeSearchResult(size_t nq_in, bool alloc_lims)
|
|
24
|
+
: nq(nq_in) {
|
|
24
25
|
if (alloc_lims) {
|
|
25
|
-
lims = new size_t[
|
|
26
|
-
memset(lims, 0, sizeof(*lims) * (
|
|
26
|
+
lims = new size_t[nq_in + 1];
|
|
27
|
+
memset(lims, 0, sizeof(*lims) * (nq_in + 1));
|
|
27
28
|
} else {
|
|
28
29
|
lims = nullptr;
|
|
29
30
|
}
|
|
@@ -39,7 +40,7 @@ void RangeSearchResult::do_allocation() {
|
|
|
39
40
|
// simultaneously
|
|
40
41
|
FAISS_THROW_IF_NOT(labels == nullptr && distances == nullptr);
|
|
41
42
|
size_t ofs = 0;
|
|
42
|
-
for (
|
|
43
|
+
for (size_t i = 0; i < nq; i++) {
|
|
43
44
|
size_t n = lims[i];
|
|
44
45
|
lims[i] = ofs;
|
|
45
46
|
ofs += n;
|
|
@@ -59,12 +60,12 @@ RangeSearchResult::~RangeSearchResult() {
|
|
|
59
60
|
* BufferList
|
|
60
61
|
***********************************************************************/
|
|
61
62
|
|
|
62
|
-
BufferList::BufferList(size_t
|
|
63
|
-
wp =
|
|
63
|
+
BufferList::BufferList(size_t buffer_size_in) : buffer_size(buffer_size_in) {
|
|
64
|
+
wp = buffer_size_in;
|
|
64
65
|
}
|
|
65
66
|
|
|
66
67
|
BufferList::~BufferList() {
|
|
67
|
-
for (
|
|
68
|
+
for (size_t i = 0; i < buffers.size(); i++) {
|
|
68
69
|
delete[] buffers[i].ids;
|
|
69
70
|
delete[] buffers[i].dis;
|
|
70
71
|
}
|
|
@@ -140,7 +141,7 @@ void RangeSearchPartialResult::finalize() {
|
|
|
140
141
|
|
|
141
142
|
/// called by range_search before do_allocation
|
|
142
143
|
void RangeSearchPartialResult::set_lims() {
|
|
143
|
-
for (
|
|
144
|
+
for (size_t i = 0; i < queries.size(); i++) {
|
|
144
145
|
RangeQueryResult& qres = queries[i];
|
|
145
146
|
res->lims[qres.qno] = qres.nres;
|
|
146
147
|
}
|
|
@@ -149,7 +150,7 @@ void RangeSearchPartialResult::set_lims() {
|
|
|
149
150
|
/// called by range_search after do_allocation
|
|
150
151
|
void RangeSearchPartialResult::copy_result(bool incremental) {
|
|
151
152
|
size_t ofs = 0;
|
|
152
|
-
for (
|
|
153
|
+
for (size_t i = 0; i < queries.size(); i++) {
|
|
153
154
|
RangeQueryResult& qres = queries[i];
|
|
154
155
|
|
|
155
156
|
copy_range(
|
|
@@ -18,6 +18,8 @@
|
|
|
18
18
|
#include <mutex>
|
|
19
19
|
#include <vector>
|
|
20
20
|
|
|
21
|
+
#include <faiss/impl/InvertedListScannerStats.h>
|
|
22
|
+
|
|
21
23
|
#include <faiss/MetricType.h>
|
|
22
24
|
#include <faiss/impl/platform_macros.h>
|
|
23
25
|
|
|
@@ -92,6 +94,7 @@ struct RangeQueryResult {
|
|
|
92
94
|
idx_t qno; //< id of the query
|
|
93
95
|
size_t nres; //< nb of results for this query
|
|
94
96
|
RangeSearchPartialResult* pres;
|
|
97
|
+
InvertedListScannerStats stats;
|
|
95
98
|
|
|
96
99
|
/// called by search function to report a new result
|
|
97
100
|
void add(float dis, idx_t id);
|