faiss 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +88 -97
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +89 -417
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +374 -206
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +467 -364
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +79 -76
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +39 -69
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +56 -33
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +73 -846
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +23 -20
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +30 -52
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +38 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +150 -20
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
- data/vendor/faiss/faiss/factory_tools.cpp +9 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +15 -16
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +5 -4
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +58 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +111 -0
- data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +639 -507
- data/vendor/faiss/faiss/impl/HNSW.h +61 -44
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +53 -32
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +269 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +55 -25
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +302 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +100 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +318 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +77 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
- data/vendor/faiss/faiss/impl/VisitedTable.h +70 -28
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +270 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +83 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +113 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +150 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +142 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1227 -79
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +96 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +58 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +15 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +23 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +45 -107
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +274 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +10 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +70 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +9 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +419 -19
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +387 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +341 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +425 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +290 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +337 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +157 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +90 -18
- data/vendor/faiss/faiss/index_io.h +40 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +28 -15
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +170 -86
- data/vendor/faiss/faiss/invlists/InvertedLists.h +88 -25
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +142 -21
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +33 -7
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +77 -27
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +10 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/bf16.h +34 -0
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -178
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +16 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +210 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -989
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1031 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +29 -7
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +129 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Clone CPU <-> Metal GPU. Mirrors GpuCloner roles for Metal backend.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#include <faiss/Index.h>
|
|
14
|
+
|
|
15
|
+
namespace faiss {
|
|
16
|
+
namespace gpu_metal {
|
|
17
|
+
|
|
18
|
+
class StandardMetalResources;
|
|
19
|
+
|
|
20
|
+
/// Returns the number of Metal "devices" (1 if Metal is available, else 0).
|
|
21
|
+
int get_num_gpus();
|
|
22
|
+
|
|
23
|
+
/// Clone a CPU index to Metal GPU. Supports IndexFlat, IndexFlatL2,
|
|
24
|
+
/// IndexFlatIP. device must be 0. Caller owns the returned index.
|
|
25
|
+
faiss::Index* index_cpu_to_metal_gpu(
|
|
26
|
+
StandardMetalResources* res,
|
|
27
|
+
int device,
|
|
28
|
+
const faiss::Index* index);
|
|
29
|
+
|
|
30
|
+
/// Copy a Metal index back to CPU. Supports MetalIndexFlat -> IndexFlat.
|
|
31
|
+
/// Caller owns the returned index.
|
|
32
|
+
faiss::Index* index_metal_gpu_to_cpu(const faiss::Index* index);
|
|
33
|
+
|
|
34
|
+
} // namespace gpu_metal
|
|
35
|
+
} // namespace faiss
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* IVF distance computation and scan dispatch for Metal backend.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#import <Metal/Metal.h>
|
|
14
|
+
|
|
15
|
+
#include <cstddef>
|
|
16
|
+
#include <cstdint>
|
|
17
|
+
#include <memory>
|
|
18
|
+
|
|
19
|
+
namespace faiss {
|
|
20
|
+
namespace gpu_metal {
|
|
21
|
+
|
|
22
|
+
class MetalResources;
|
|
23
|
+
|
|
24
|
+
int getMetalDistanceMaxK();
|
|
25
|
+
|
|
26
|
+
bool runMetalComputeNorms(
|
|
27
|
+
id<MTLDevice> device,
|
|
28
|
+
id<MTLCommandQueue> queue,
|
|
29
|
+
id<MTLBuffer> vectors,
|
|
30
|
+
int nb,
|
|
31
|
+
int d,
|
|
32
|
+
id<MTLBuffer> normsBuf,
|
|
33
|
+
bool waitForCompletion = true);
|
|
34
|
+
|
|
35
|
+
bool runMetalIVFFlatScan(
|
|
36
|
+
id<MTLDevice> device,
|
|
37
|
+
id<MTLCommandQueue> queue,
|
|
38
|
+
id<MTLBuffer> queries,
|
|
39
|
+
id<MTLBuffer> codes,
|
|
40
|
+
id<MTLBuffer> ids,
|
|
41
|
+
id<MTLBuffer> listOffset,
|
|
42
|
+
id<MTLBuffer> listLength,
|
|
43
|
+
id<MTLBuffer> coarseAssign,
|
|
44
|
+
int nq,
|
|
45
|
+
int d,
|
|
46
|
+
int k,
|
|
47
|
+
int nprobe,
|
|
48
|
+
bool isL2,
|
|
49
|
+
id<MTLBuffer> outDistances,
|
|
50
|
+
id<MTLBuffer> outIndices,
|
|
51
|
+
id<MTLBuffer> perListDistBuf,
|
|
52
|
+
id<MTLBuffer> perListIdxBuf,
|
|
53
|
+
id<MTLBuffer> interleavedCodes = nil,
|
|
54
|
+
id<MTLBuffer> interleavedCodesOffset = nil,
|
|
55
|
+
bool waitForCompletion = true);
|
|
56
|
+
|
|
57
|
+
bool runMetalIVFFlatFullSearch(
|
|
58
|
+
id<MTLDevice> device,
|
|
59
|
+
id<MTLCommandQueue> queue,
|
|
60
|
+
id<MTLBuffer> queries,
|
|
61
|
+
int nq,
|
|
62
|
+
int d,
|
|
63
|
+
int k,
|
|
64
|
+
int nprobe,
|
|
65
|
+
bool isL2,
|
|
66
|
+
id<MTLBuffer> centroids,
|
|
67
|
+
int nlist,
|
|
68
|
+
id<MTLBuffer> codes,
|
|
69
|
+
id<MTLBuffer> ids,
|
|
70
|
+
id<MTLBuffer> listOffset,
|
|
71
|
+
id<MTLBuffer> listLength,
|
|
72
|
+
id<MTLBuffer> outDistances,
|
|
73
|
+
id<MTLBuffer> outIndices,
|
|
74
|
+
id<MTLBuffer> perListDistBuf,
|
|
75
|
+
id<MTLBuffer> perListIdxBuf,
|
|
76
|
+
id<MTLBuffer> coarseDistBuf,
|
|
77
|
+
id<MTLBuffer> coarseIdxBuf,
|
|
78
|
+
id<MTLBuffer> distMatrixBuf,
|
|
79
|
+
id<MTLBuffer> centroidNormsBuf = nil,
|
|
80
|
+
int avgListLen = 256,
|
|
81
|
+
id<MTLBuffer> interleavedCodes = nil,
|
|
82
|
+
id<MTLBuffer> interleavedCodesOffset = nil,
|
|
83
|
+
bool centroidsAreFP16 = false,
|
|
84
|
+
bool waitForCompletion = true);
|
|
85
|
+
|
|
86
|
+
} // namespace gpu_metal
|
|
87
|
+
} // namespace faiss
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Objective-C++ header. Runs L2/IP distance + top-k via Metal compute.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#import <Metal/Metal.h>
|
|
14
|
+
|
|
15
|
+
#include <cstddef>
|
|
16
|
+
|
|
17
|
+
namespace faiss {
|
|
18
|
+
namespace gpu_metal {
|
|
19
|
+
|
|
20
|
+
/// Runs GPU search: distance matrix (L2 or IP) then top-k. Uses shared buffers
|
|
21
|
+
/// (queries, vectors, outDistances, outIndices). outIndices are int32
|
|
22
|
+
/// (0..nb-1). Maximum k supported by the GPU top-k kernel (256).
|
|
23
|
+
int getMetalFlatSearchMaxK();
|
|
24
|
+
|
|
25
|
+
/// Returns true on success; false if pipeline creation failed.
|
|
26
|
+
bool runFlatSearchGPU(
|
|
27
|
+
id<MTLDevice> device,
|
|
28
|
+
id<MTLCommandQueue> queue,
|
|
29
|
+
id<MTLBuffer> queries, // (nq * d) float, row-major
|
|
30
|
+
id<MTLBuffer> vectors, // (nb * d) float, row-major
|
|
31
|
+
int nq,
|
|
32
|
+
int nb,
|
|
33
|
+
int d,
|
|
34
|
+
int k,
|
|
35
|
+
bool isL2, // true = L2 squared, false = inner product
|
|
36
|
+
id<MTLBuffer> outDistances, // (nq * k) float
|
|
37
|
+
id<MTLBuffer> outIndices); // (nq * k) int32
|
|
38
|
+
|
|
39
|
+
} // namespace gpu_metal
|
|
40
|
+
} // namespace faiss
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Objective-C++ header (uses MetalResources).
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#include <faiss/Index.h>
|
|
14
|
+
#include <faiss/gpu/GpuIndicesOptions.h>
|
|
15
|
+
#include <faiss/gpu_metal/MetalResources.h>
|
|
16
|
+
#include <memory>
|
|
17
|
+
|
|
18
|
+
namespace faiss {
|
|
19
|
+
namespace gpu_metal {
|
|
20
|
+
|
|
21
|
+
/// Configuration for Metal index (mirrors GpuIndexConfig roles).
|
|
22
|
+
struct MetalIndexConfig {
|
|
23
|
+
int device = 0;
|
|
24
|
+
|
|
25
|
+
bool useFloat16CoarseQuantizer = false;
|
|
26
|
+
|
|
27
|
+
faiss::gpu::IndicesOptions indicesOptions = faiss::gpu::INDICES_64_BIT;
|
|
28
|
+
|
|
29
|
+
bool interleavedLayout = true;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/// Base class for Metal-backed indexes. Mirrors faiss::gpu::GpuIndex.
|
|
33
|
+
class MetalIndex : public faiss::Index {
|
|
34
|
+
public:
|
|
35
|
+
MetalIndex(
|
|
36
|
+
std::shared_ptr<MetalResources> resources,
|
|
37
|
+
int dims,
|
|
38
|
+
faiss::MetricType metric,
|
|
39
|
+
float metricArg,
|
|
40
|
+
MetalIndexConfig config = MetalIndexConfig());
|
|
41
|
+
|
|
42
|
+
int getDevice() const {
|
|
43
|
+
return config_.device;
|
|
44
|
+
}
|
|
45
|
+
std::shared_ptr<MetalResources> getResources() {
|
|
46
|
+
return resources_;
|
|
47
|
+
}
|
|
48
|
+
std::shared_ptr<const MetalResources> getResources() const {
|
|
49
|
+
return resources_;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
protected:
|
|
53
|
+
std::shared_ptr<MetalResources> resources_;
|
|
54
|
+
MetalIndexConfig config_;
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
} // namespace gpu_metal
|
|
58
|
+
} // namespace faiss
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Objective-C++ header (uses Metal types).
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
#pragma once
|
|
12
|
+
|
|
13
|
+
#import <Metal/Metal.h>
|
|
14
|
+
|
|
15
|
+
#include <faiss/Index.h>
|
|
16
|
+
#include <faiss/gpu_metal/MetalIndex.h>
|
|
17
|
+
|
|
18
|
+
namespace faiss {
|
|
19
|
+
struct IndexFlat;
|
|
20
|
+
}
|
|
21
|
+
#include <memory>
|
|
22
|
+
|
|
23
|
+
namespace faiss {
|
|
24
|
+
namespace gpu_metal {
|
|
25
|
+
|
|
26
|
+
/// Flat index that stores vectors in an MTLBuffer. Supports L2 and inner
|
|
27
|
+
/// product. Search runs on GPU via Metal compute (distance + top-k kernels).
|
|
28
|
+
class MetalIndexFlat : public MetalIndex {
|
|
29
|
+
public:
|
|
30
|
+
MetalIndexFlat(
|
|
31
|
+
std::shared_ptr<MetalResources> resources,
|
|
32
|
+
int dims,
|
|
33
|
+
faiss::MetricType metric,
|
|
34
|
+
float metricArg = 0.0f,
|
|
35
|
+
MetalIndexConfig config = MetalIndexConfig());
|
|
36
|
+
|
|
37
|
+
~MetalIndexFlat() override;
|
|
38
|
+
|
|
39
|
+
void add(idx_t n, const float* x) override;
|
|
40
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
|
|
41
|
+
void reset() override;
|
|
42
|
+
void search(
|
|
43
|
+
idx_t n,
|
|
44
|
+
const float* x,
|
|
45
|
+
idx_t k,
|
|
46
|
+
float* distances,
|
|
47
|
+
idx_t* labels,
|
|
48
|
+
const SearchParameters* params = nullptr) const override;
|
|
49
|
+
|
|
50
|
+
/// Copy vectors to a CPU IndexFlat (e.g. for index_metal_gpu_to_cpu).
|
|
51
|
+
void copyTo(::faiss::IndexFlat* index) const;
|
|
52
|
+
|
|
53
|
+
private:
|
|
54
|
+
/// Ensures vector buffer can hold at least \p newNtotal vectors; grows
|
|
55
|
+
/// buffer if necessary.
|
|
56
|
+
void ensureCapacity(idx_t newNtotal);
|
|
57
|
+
|
|
58
|
+
/// Vector storage (row-major, ntotal * d floats). Nil when empty.
|
|
59
|
+
id<MTLBuffer> vectorsBuffer_;
|
|
60
|
+
/// Capacity of vectorsBuffer_ in number of vectors (0 if buffer is nil).
|
|
61
|
+
size_t capacityVecs_;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
} // namespace gpu_metal
|
|
65
|
+
} // namespace faiss
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* Minimal Metal IVFFlat wrapper.
|
|
9
|
+
*
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#pragma once
|
|
13
|
+
|
|
14
|
+
#import <Metal/Metal.h>
|
|
15
|
+
|
|
16
|
+
#include <faiss/IndexIVFFlat.h>
|
|
17
|
+
#include <faiss/gpu/GpuIndicesOptions.h>
|
|
18
|
+
#include <faiss/gpu_metal/MetalIndex.h>
|
|
19
|
+
|
|
20
|
+
#include <memory>
|
|
21
|
+
|
|
22
|
+
namespace faiss {
|
|
23
|
+
namespace gpu_metal {
|
|
24
|
+
class MetalIVFFlatImpl;
|
|
25
|
+
} // namespace gpu_metal
|
|
26
|
+
} // namespace faiss
|
|
27
|
+
|
|
28
|
+
namespace faiss {
|
|
29
|
+
namespace gpu_metal {
|
|
30
|
+
|
|
31
|
+
/// IVFFlat index wrapper for Metal backend.
|
|
32
|
+
/// Currently delegates to an internal CPU IndexIVFFlat; later phases
|
|
33
|
+
/// may move list scanning to GPU.
|
|
34
|
+
class MetalIndexIVFFlat : public MetalIndex {
|
|
35
|
+
public:
|
|
36
|
+
struct AppendDebugStats {
|
|
37
|
+
size_t relayoutEvents = 0;
|
|
38
|
+
size_t movedLists = 0;
|
|
39
|
+
size_t movedVectors = 0;
|
|
40
|
+
size_t reusedSegmentAllocs = 0;
|
|
41
|
+
size_t tailSegmentAllocs = 0;
|
|
42
|
+
size_t reusedCapacityVecs = 0;
|
|
43
|
+
size_t tailCapacityVecs = 0;
|
|
44
|
+
size_t tailShrinkEvents = 0;
|
|
45
|
+
size_t tailShrunkVecs = 0;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
/// Construct empty IVFFlat index with its own CPU quantizer.
|
|
49
|
+
MetalIndexIVFFlat(
|
|
50
|
+
std::shared_ptr<MetalResources> resources,
|
|
51
|
+
int dims,
|
|
52
|
+
idx_t nlist,
|
|
53
|
+
faiss::MetricType metric,
|
|
54
|
+
float metricArg = 0.0f,
|
|
55
|
+
MetalIndexConfig config = MetalIndexConfig());
|
|
56
|
+
|
|
57
|
+
/// Construct empty IVFFlat index with caller-provided coarse quantizer.
|
|
58
|
+
/// If ownFields is true, this index takes ownership of `coarseQuantizer`.
|
|
59
|
+
MetalIndexIVFFlat(
|
|
60
|
+
std::shared_ptr<MetalResources> resources,
|
|
61
|
+
faiss::Index* coarseQuantizer,
|
|
62
|
+
int dims,
|
|
63
|
+
idx_t nlist,
|
|
64
|
+
faiss::MetricType metric,
|
|
65
|
+
float metricArg = 0.0f,
|
|
66
|
+
MetalIndexConfig config = MetalIndexConfig(),
|
|
67
|
+
bool ownFields = false);
|
|
68
|
+
|
|
69
|
+
/// Construct from an existing CPU IndexIVFFlat (used by cloners later).
|
|
70
|
+
MetalIndexIVFFlat(
|
|
71
|
+
std::shared_ptr<MetalResources> resources,
|
|
72
|
+
const faiss::IndexIVFFlat* cpuIndex,
|
|
73
|
+
MetalIndexConfig config = MetalIndexConfig());
|
|
74
|
+
|
|
75
|
+
~MetalIndexIVFFlat() override;
|
|
76
|
+
|
|
77
|
+
void train(idx_t n, const float* x) override;
|
|
78
|
+
void add(idx_t n, const float* x) override;
|
|
79
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
|
|
80
|
+
void reset() override;
|
|
81
|
+
|
|
82
|
+
void search(
|
|
83
|
+
idx_t n,
|
|
84
|
+
const float* x,
|
|
85
|
+
idx_t k,
|
|
86
|
+
float* distances,
|
|
87
|
+
idx_t* labels,
|
|
88
|
+
const SearchParameters* params = nullptr) const override;
|
|
89
|
+
|
|
90
|
+
/// Search with caller-provided coarse assignments (skips coarse quantizer).
|
|
91
|
+
/// @param assign Coarse list assignments (n x nprobe), row-major idx_t
|
|
92
|
+
/// @param centroid_dis Distances to assigned centroids (n x nprobe); unused
|
|
93
|
+
/// by GPU scan but accepted for API compatibility
|
|
94
|
+
/// @param store_pairs Ignored (always false for GPU path)
|
|
95
|
+
void search_preassigned(
|
|
96
|
+
idx_t n,
|
|
97
|
+
const float* x,
|
|
98
|
+
idx_t k,
|
|
99
|
+
const idx_t* assign,
|
|
100
|
+
const float* centroid_dis,
|
|
101
|
+
float* distances,
|
|
102
|
+
idx_t* labels,
|
|
103
|
+
bool store_pairs,
|
|
104
|
+
const IVFSearchParameters* params = nullptr,
|
|
105
|
+
IndexIVFStats* stats = nullptr) const;
|
|
106
|
+
|
|
107
|
+
/// Copy from a CPU IndexIVFFlat (helper for future cloner support).
|
|
108
|
+
void copyFrom(const faiss::IndexIVFFlat* index);
|
|
109
|
+
|
|
110
|
+
/// Copy to a CPU IndexIVFFlat.
|
|
111
|
+
void copyTo(faiss::IndexIVFFlat* index) const;
|
|
112
|
+
|
|
113
|
+
/// Reconstruct a single stored vector by internal key.
|
|
114
|
+
void reconstruct(idx_t key, float* recons) const override;
|
|
115
|
+
|
|
116
|
+
/// Reconstruct n contiguous stored vectors starting at i0.
|
|
117
|
+
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
|
|
118
|
+
|
|
119
|
+
/// Re-upload coarse quantizer centroids to GPU after external changes.
|
|
120
|
+
void updateQuantizer();
|
|
121
|
+
|
|
122
|
+
/// Return the vector indices in inverted list `listId`.
|
|
123
|
+
std::vector<idx_t> getListIndices(idx_t listId) const;
|
|
124
|
+
|
|
125
|
+
/// Return raw vector data from inverted list `listId`.
|
|
126
|
+
std::vector<float> getListVectorData(idx_t listId) const;
|
|
127
|
+
|
|
128
|
+
/// Release unused GPU memory.
|
|
129
|
+
void reclaimMemory();
|
|
130
|
+
|
|
131
|
+
/// Pre-allocate GPU storage for the given total number of vectors.
|
|
132
|
+
void reserveMemory(idx_t numVecs);
|
|
133
|
+
|
|
134
|
+
/// Accessors (needed by cloner and tests).
|
|
135
|
+
idx_t nlist() const;
|
|
136
|
+
size_t nprobe() const;
|
|
137
|
+
bool interleavedLayout() const;
|
|
138
|
+
faiss::gpu::IndicesOptions indicesOptions() const;
|
|
139
|
+
AppendDebugStats appendDebugStats() const;
|
|
140
|
+
void resetAppendDebugStats();
|
|
141
|
+
|
|
142
|
+
private:
|
|
143
|
+
std::unique_ptr<faiss::IndexIVFFlat> cpuIndex_;
|
|
144
|
+
std::unique_ptr<MetalIVFFlatImpl> gpuIvf_;
|
|
145
|
+
faiss::gpu::IndicesOptions indicesOptions_;
|
|
146
|
+
bool interleavedLayout_;
|
|
147
|
+
|
|
148
|
+
// Persistent search buffers — allocated once, grown lazily.
|
|
149
|
+
// Declared mutable so search() (const) can resize them.
|
|
150
|
+
mutable id<MTLBuffer> searchQueriesBuf_ = nil;
|
|
151
|
+
mutable id<MTLBuffer> searchCoarseBuf_ = nil;
|
|
152
|
+
mutable id<MTLBuffer> searchOutDistBuf_ = nil;
|
|
153
|
+
mutable id<MTLBuffer> searchOutIdxBuf_ = nil;
|
|
154
|
+
mutable size_t searchQueriesCap_ = 0; // bytes
|
|
155
|
+
mutable size_t searchCoarseCap_ = 0;
|
|
156
|
+
mutable size_t searchOutDistCap_ = 0;
|
|
157
|
+
mutable size_t searchOutIdxCap_ = 0;
|
|
158
|
+
mutable id<MTLBuffer> searchPerListDistBuf_ = nil;
|
|
159
|
+
mutable id<MTLBuffer> searchPerListIdxBuf_ = nil;
|
|
160
|
+
mutable size_t searchPerListDistCap_ = 0;
|
|
161
|
+
mutable size_t searchPerListIdxCap_ = 0;
|
|
162
|
+
|
|
163
|
+
// GPU coarse quantizer buffers (cached, rebuilt on train)
|
|
164
|
+
mutable id<MTLBuffer> centroidBuf_ = nil;
|
|
165
|
+
mutable id<MTLBuffer> centroidNormsBuf_ = nil; // pre-computed ||c||²
|
|
166
|
+
mutable id<MTLBuffer> coarseOutDistBuf_ = nil;
|
|
167
|
+
mutable id<MTLBuffer> coarseOutIdxBuf_ = nil;
|
|
168
|
+
mutable size_t coarseOutDistCap_ = 0;
|
|
169
|
+
mutable size_t coarseOutIdxCap_ = 0;
|
|
170
|
+
mutable id<MTLBuffer> distMatrixBuf_ = nil;
|
|
171
|
+
mutable size_t distMatrixCap_ = 0;
|
|
172
|
+
|
|
173
|
+
/// Ensures buf is at least `needed` bytes, reallocating if necessary.
|
|
174
|
+
void ensureSearchBuf_(id<MTLBuffer>& buf, size_t& cap, size_t needed) const;
|
|
175
|
+
|
|
176
|
+
/// (Re)uploads quantizer centroids to centroidBuf_.
|
|
177
|
+
void uploadCentroids_() const;
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
} // namespace gpu_metal
|
|
181
|
+
} // namespace faiss
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* MetalKernels: typed wrapper around Metal compute kernels.
|
|
9
|
+
* Owns library compilation, pipeline caching, and dispatch encoding.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#pragma once
|
|
13
|
+
|
|
14
|
+
#import <Metal/Metal.h>
|
|
15
|
+
#include <faiss/MetricType.h>
|
|
16
|
+
#include <string>
|
|
17
|
+
#include <unordered_map>
|
|
18
|
+
|
|
19
|
+
namespace faiss {
|
|
20
|
+
namespace gpu_metal {
|
|
21
|
+
|
|
22
|
+
enum class IVFScanVariant { Standard, Small, Interleaved };
|
|
23
|
+
|
|
24
|
+
class MetalKernels {
|
|
25
|
+
public:
|
|
26
|
+
explicit MetalKernels(id<MTLDevice> device);
|
|
27
|
+
~MetalKernels();
|
|
28
|
+
|
|
29
|
+
bool isValid() const;
|
|
30
|
+
static constexpr int kMaxK = 2048;
|
|
31
|
+
|
|
32
|
+
void encodeDistanceMatrix(
|
|
33
|
+
id<MTLComputeCommandEncoder> enc,
|
|
34
|
+
id<MTLBuffer> queries,
|
|
35
|
+
id<MTLBuffer> vectors,
|
|
36
|
+
id<MTLBuffer> distances,
|
|
37
|
+
int nq,
|
|
38
|
+
int nb,
|
|
39
|
+
int d,
|
|
40
|
+
MetricType metric);
|
|
41
|
+
|
|
42
|
+
void encodeL2WithNorms(
|
|
43
|
+
id<MTLComputeCommandEncoder> enc,
|
|
44
|
+
id<MTLBuffer> queries,
|
|
45
|
+
id<MTLBuffer> vectors,
|
|
46
|
+
id<MTLBuffer> distances,
|
|
47
|
+
id<MTLBuffer> vecNorms,
|
|
48
|
+
int nq,
|
|
49
|
+
int nb,
|
|
50
|
+
int d);
|
|
51
|
+
|
|
52
|
+
void encodeComputeNorms(
|
|
53
|
+
id<MTLComputeCommandEncoder> enc,
|
|
54
|
+
id<MTLBuffer> vectors,
|
|
55
|
+
id<MTLBuffer> norms,
|
|
56
|
+
int nb,
|
|
57
|
+
int d);
|
|
58
|
+
|
|
59
|
+
void encodeTopKThreadgroup(
|
|
60
|
+
id<MTLComputeCommandEncoder> enc,
|
|
61
|
+
id<MTLBuffer> distances,
|
|
62
|
+
id<MTLBuffer> outDist,
|
|
63
|
+
id<MTLBuffer> outIdx,
|
|
64
|
+
int nq,
|
|
65
|
+
int nb,
|
|
66
|
+
int k,
|
|
67
|
+
bool wantMin);
|
|
68
|
+
|
|
69
|
+
void encodeIVFScanList(
|
|
70
|
+
id<MTLComputeCommandEncoder> enc,
|
|
71
|
+
IVFScanVariant variant,
|
|
72
|
+
id<MTLBuffer> queries,
|
|
73
|
+
id<MTLBuffer> codes,
|
|
74
|
+
id<MTLBuffer> ids,
|
|
75
|
+
id<MTLBuffer> listOffset,
|
|
76
|
+
id<MTLBuffer> listLength,
|
|
77
|
+
id<MTLBuffer> coarseAssign,
|
|
78
|
+
id<MTLBuffer> perListDist,
|
|
79
|
+
id<MTLBuffer> perListIdx,
|
|
80
|
+
id<MTLBuffer> paramsBuf,
|
|
81
|
+
int nq,
|
|
82
|
+
int nprobe,
|
|
83
|
+
id<MTLBuffer> ilCodesOffset = nil);
|
|
84
|
+
|
|
85
|
+
void encodeIVFMergeLists(
|
|
86
|
+
id<MTLComputeCommandEncoder> enc,
|
|
87
|
+
id<MTLBuffer> perListDist,
|
|
88
|
+
id<MTLBuffer> perListIdx,
|
|
89
|
+
id<MTLBuffer> outDist,
|
|
90
|
+
id<MTLBuffer> outIdx,
|
|
91
|
+
id<MTLBuffer> paramsBuf,
|
|
92
|
+
int nq);
|
|
93
|
+
|
|
94
|
+
static int selectTopKVariantIndex(int k);
|
|
95
|
+
|
|
96
|
+
private:
|
|
97
|
+
id<MTLComputePipelineState> pipeline(const char* name);
|
|
98
|
+
|
|
99
|
+
id<MTLDevice> device_;
|
|
100
|
+
id<MTLLibrary> library_;
|
|
101
|
+
std::unordered_map<std::string, id<MTLComputePipelineState>> cache_;
|
|
102
|
+
|
|
103
|
+
static constexpr int kTopKVariantSizes[] =
|
|
104
|
+
{32, 64, 128, 256, 512, 1024, 2048};
|
|
105
|
+
static constexpr int kNumTopKVariants = 7;
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
MetalKernels& getMetalKernels(id<MTLDevice> device);
|
|
109
|
+
|
|
110
|
+
} // namespace gpu_metal
|
|
111
|
+
} // namespace faiss
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and its affiliates.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*
|
|
8
|
+
* C++-only API for Python/SWIG. No Objective-C types so SWIG can
|
|
9
|
+
* parse it. Implemented in MetalPythonBridge.mm.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
#pragma once
|
|
13
|
+
|
|
14
|
+
#include <faiss/Index.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
namespace gpu_metal {
|
|
18
|
+
|
|
19
|
+
/// Opaque holder for Metal resources.
|
|
20
|
+
struct StandardMetalResourcesHolder {
|
|
21
|
+
void* impl = nullptr;
|
|
22
|
+
StandardMetalResourcesHolder();
|
|
23
|
+
~StandardMetalResourcesHolder();
|
|
24
|
+
StandardMetalResourcesHolder(const StandardMetalResourcesHolder&) = delete;
|
|
25
|
+
StandardMetalResourcesHolder& operator=(
|
|
26
|
+
const StandardMetalResourcesHolder&) = delete;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/// Same names as GPU API for unified Python binding.
|
|
30
|
+
int get_num_gpus();
|
|
31
|
+
void gpu_profiler_start();
|
|
32
|
+
void gpu_profiler_stop();
|
|
33
|
+
void gpu_sync_all_devices();
|
|
34
|
+
|
|
35
|
+
/// Clone CPU index to Metal GPU. Caller owns returned index.
|
|
36
|
+
faiss::Index* index_cpu_to_gpu(
|
|
37
|
+
StandardMetalResourcesHolder* res,
|
|
38
|
+
int device,
|
|
39
|
+
const faiss::Index* index);
|
|
40
|
+
|
|
41
|
+
/// Copy Metal index back to CPU. Caller owns returned index.
|
|
42
|
+
faiss::Index* index_gpu_to_cpu(const faiss::Index* index);
|
|
43
|
+
|
|
44
|
+
} // namespace gpu_metal
|
|
45
|
+
} // namespace faiss
|