faiss 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +2 -1
- data/ext/faiss/{index_rb.cpp → index.cpp} +1 -1
- data/ext/faiss/index_binary.cpp +1 -1
- data/ext/faiss/kmeans.cpp +1 -1
- data/ext/faiss/pca_matrix.cpp +1 -1
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/ext/faiss/{utils_rb.cpp → utils.cpp} +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +93 -80
- data/vendor/faiss/faiss/Clustering.cpp +39 -240
- data/vendor/faiss/faiss/Clustering.h +6 -0
- data/vendor/faiss/faiss/IVFlib.cpp +41 -21
- data/vendor/faiss/faiss/Index.cpp +6 -5
- data/vendor/faiss/faiss/Index.h +5 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +37 -53
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +49 -37
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +36 -34
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexBinary.cpp +5 -3
- data/vendor/faiss/faiss/IndexBinary.h +4 -4
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +84 -92
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +9 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +45 -236
- data/vendor/faiss/faiss/IndexBinaryHash.h +6 -6
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +87 -415
- data/vendor/faiss/faiss/IndexFastScan.cpp +72 -109
- data/vendor/faiss/faiss/IndexFastScan.h +25 -23
- data/vendor/faiss/faiss/IndexFlat.cpp +27 -20
- data/vendor/faiss/faiss/IndexFlat.h +21 -18
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +42 -19
- data/vendor/faiss/faiss/IndexHNSW.cpp +283 -145
- data/vendor/faiss/faiss/IndexHNSW.h +16 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +25 -21
- data/vendor/faiss/faiss/IndexIDMap.h +9 -7
- data/vendor/faiss/faiss/IndexIVF.cpp +465 -362
- data/vendor/faiss/faiss/IndexIVF.h +33 -12
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +77 -74
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +96 -93
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +357 -238
- data/vendor/faiss/faiss/IndexIVFFastScan.h +42 -41
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +36 -68
- data/vendor/faiss/faiss/IndexIVFFlat.h +32 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +53 -30
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +3 -1
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +18 -15
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +71 -843
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +151 -121
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +21 -17
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +26 -39
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +475 -476
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +248 -93
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +41 -127
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +36 -19
- data/vendor/faiss/faiss/IndexLattice.cpp +13 -13
- data/vendor/faiss/faiss/IndexNNDescent.cpp +36 -21
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +39 -23
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +31 -11
- data/vendor/faiss/faiss/IndexPQ.cpp +128 -221
- data/vendor/faiss/faiss/IndexPQ.h +3 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +20 -14
- data/vendor/faiss/faiss/IndexPQFastScan.h +3 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -18
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +11 -36
- data/vendor/faiss/faiss/IndexRaBitQ.h +2 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +41 -277
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +183 -27
- data/vendor/faiss/faiss/IndexRefine.cpp +30 -25
- data/vendor/faiss/faiss/IndexRefine.h +4 -4
- data/vendor/faiss/faiss/IndexReplicas.cpp +6 -6
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +15 -14
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +1 -1
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +82 -14
- data/vendor/faiss/faiss/IndexShards.cpp +10 -9
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +21 -15
- data/vendor/faiss/faiss/MatrixStats.cpp +5 -4
- data/vendor/faiss/faiss/MetaIndexes.cpp +19 -17
- data/vendor/faiss/faiss/MetaIndexes.h +1 -1
- data/vendor/faiss/faiss/MetricType.h +14 -7
- data/vendor/faiss/faiss/SuperKMeans.cpp +656 -0
- data/vendor/faiss/faiss/SuperKMeans.h +97 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +237 -149
- data/vendor/faiss/faiss/VectorTransform.h +16 -16
- data/vendor/faiss/faiss/build.cpp +23 -0
- data/vendor/faiss/faiss/build.h +15 -0
- data/vendor/faiss/faiss/clone_index.cpp +48 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +47 -47
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +11 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +38 -38
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +11 -0
- data/vendor/faiss/faiss/factory_tools.cpp +5 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +6 -5
- data/vendor/faiss/faiss/gpu/GpuResources.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +9 -9
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +4 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +46 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +56 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +78 -1
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +72 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +23 -0
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/CuvsUtils.h +21 -10
- data/vendor/faiss/faiss/gpu_metal/GpuIndexFlat.h +22 -0
- data/vendor/faiss/faiss/gpu_metal/MetalCloner.h +35 -0
- data/vendor/faiss/faiss/gpu_metal/MetalFlatKernels.h +40 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +51 -0
- data/vendor/faiss/faiss/gpu_metal/MetalIndexFlat.h +65 -0
- data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +66 -0
- data/vendor/faiss/faiss/gpu_metal/MetalResources.h +79 -0
- data/vendor/faiss/faiss/gpu_metal/StandardMetalResources.h +35 -0
- data/vendor/faiss/faiss/impl/AdSampling.cpp +103 -0
- data/vendor/faiss/faiss/impl/AdSampling.h +35 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +29 -25
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +10 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +3 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.cpp +244 -0
- data/vendor/faiss/faiss/impl/ClusteringHelpers.h +94 -0
- data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +16 -16
- data/vendor/faiss/faiss/impl/CodePacker.cpp +3 -3
- data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +8 -8
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -3
- data/vendor/faiss/faiss/impl/FaissException.h +50 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +92 -317
- data/vendor/faiss/faiss/impl/HNSW.h +13 -34
- data/vendor/faiss/faiss/impl/IDSelector.cpp +15 -11
- data/vendor/faiss/faiss/impl/IDSelector.h +8 -8
- data/vendor/faiss/faiss/impl/InvertedListScannerStats.h +26 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +82 -77
- data/vendor/faiss/faiss/impl/NNDescent.cpp +62 -25
- data/vendor/faiss/faiss/impl/NNDescent.h +6 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +38 -21
- data/vendor/faiss/faiss/impl/NSG.h +4 -4
- data/vendor/faiss/faiss/impl/Panorama.cpp +23 -6
- data/vendor/faiss/faiss/impl/Panorama.h +258 -87
- data/vendor/faiss/faiss/impl/PdxLayout.cpp +93 -0
- data/vendor/faiss/faiss/impl/PdxLayout.h +41 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +46 -32
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +3 -3
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +35 -35
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +21 -16
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +30 -23
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +55 -49
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +65 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +296 -283
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +26 -23
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +99 -75
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +52 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +27 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +14 -11
- data/vendor/faiss/faiss/impl/VisitedTable.h +7 -0
- data/vendor/faiss/faiss/impl/approx_topk/approx_topk.h +276 -0
- data/vendor/faiss/faiss/impl/approx_topk/avx2.cpp +68 -0
- data/vendor/faiss/faiss/{utils → impl}/approx_topk/generic.h +15 -8
- data/vendor/faiss/faiss/impl/approx_topk/neon.cpp +68 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab-inl.h +169 -0
- data/vendor/faiss/faiss/impl/approx_topk/rq_beam_search_tab.h +117 -0
- data/vendor/faiss/faiss/impl/approx_topk/simdlib256-inl.h +146 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHNSW_impl.h +73 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryHash_impl.h +270 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexBinaryIVF_impl.h +460 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexIVFSpectralHash_impl.h +159 -0
- data/vendor/faiss/faiss/impl/binary_hamming/IndexPQ_impl.h +92 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx2.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/avx512.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/dispatch.h +143 -0
- data/vendor/faiss/faiss/impl/binary_hamming/neon.cpp +26 -0
- data/vendor/faiss/faiss/impl/binary_hamming/rvv.cpp +26 -0
- data/vendor/faiss/faiss/impl/expanded_scanners.h +8 -3
- data/vendor/faiss/faiss/impl/{FastScanDistancePostProcessing.h → fast_scan/FastScanDistancePostProcessing.h} +13 -6
- data/vendor/faiss/faiss/impl/{LookupTableScaler.h → fast_scan/LookupTableScaler.h} +16 -5
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops.h +237 -0
- data/vendor/faiss/faiss/impl/fast_scan/accumulate_loops_512.h +185 -0
- data/vendor/faiss/faiss/impl/fast_scan/decompose_qbs.h +229 -0
- data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +268 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan.cpp → fast_scan/fast_scan.cpp} +169 -2
- data/vendor/faiss/faiss/impl/fast_scan/fast_scan.h +341 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx2.cpp +36 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-avx512.cpp +40 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-neon.cpp +120 -0
- data/vendor/faiss/faiss/impl/fast_scan/impl-riscv.cpp +104 -0
- data/vendor/faiss/faiss/impl/fast_scan/kernels_simd256.h +213 -0
- data/vendor/faiss/faiss/impl/{pq4_fast_scan_search_qbs.cpp → fast_scan/kernels_simd512.h} +26 -356
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_dispatching.h +90 -0
- data/vendor/faiss/faiss/impl/fast_scan/rabitq_result_handler.h +108 -0
- data/vendor/faiss/faiss/impl/{simd_result_handlers.h → fast_scan/simd_result_handlers.h} +282 -134
- data/vendor/faiss/faiss/impl/hnsw/LockVector.cpp +54 -0
- data/vendor/faiss/faiss/impl/hnsw/LockVector.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +91 -0
- data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -0
- data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +104 -0
- data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +111 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +1132 -45
- data/vendor/faiss/faiss/impl/index_read_utils.h +1 -1
- data/vendor/faiss/faiss/impl/index_write.cpp +95 -13
- data/vendor/faiss/faiss/impl/io.cpp +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +33 -16
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +37 -23
- data/vendor/faiss/faiss/impl/lattice_Zn.h +6 -6
- data/vendor/faiss/faiss/impl/mapped_io.cpp +6 -6
- data/vendor/faiss/faiss/impl/platform_macros.h +11 -4
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQScanner_impl.h +549 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.cpp +245 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/IVFPQ_QueryTables.h +105 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/PQDistanceComputer_impl.h +106 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +21 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx2.cpp → pq_code_distance-avx2.h} +9 -13
- data/vendor/faiss/faiss/impl/pq_code_distance/{pq_code_distance-avx512.cpp → pq_code_distance-avx512.h} +9 -57
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +29 -111
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.h +96 -0
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +238 -5
- data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -7
- data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +68 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +311 -477
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +1 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +3 -2
- data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +102 -11
- data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +27 -1
- data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +3 -3
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +148 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +167 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +59 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +163 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +311 -0
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +192 -8
- data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +12 -0
- data/vendor/faiss/faiss/impl/simd_dispatch.h +100 -66
- data/vendor/faiss/faiss/impl/simdlib/simdlib.h +57 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_avx2.h +264 -172
- data/vendor/faiss/faiss/impl/simdlib/simdlib_avx512.h +414 -0
- data/vendor/faiss/faiss/impl/simdlib/simdlib_dispatch.h +44 -0
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_emulated.h +231 -166
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_neon.h +270 -218
- data/vendor/faiss/faiss/{utils → impl/simdlib}/simdlib_ppc64.h +201 -160
- data/vendor/faiss/faiss/impl/svs_io.cpp +12 -3
- data/vendor/faiss/faiss/impl/svs_io.h +8 -2
- data/vendor/faiss/faiss/index_factory.cpp +86 -18
- data/vendor/faiss/faiss/index_io.h +24 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +66 -16
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +24 -14
- data/vendor/faiss/faiss/invlists/DirectMap.h +4 -3
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +157 -73
- data/vendor/faiss/faiss/invlists/InvertedLists.h +86 -23
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +4 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +13 -13
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +2 -2
- data/vendor/faiss/faiss/svs/IndexSVSIVF.cpp +350 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVF.h +128 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.cpp +40 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLVQ.h +43 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.cpp +225 -0
- data/vendor/faiss/faiss/svs/IndexSVSIVFLeanVec.h +71 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +25 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +18 -2
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +12 -3
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +7 -2
- data/vendor/faiss/faiss/utils/Heap.cpp +10 -10
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +47 -36
- data/vendor/faiss/faiss/utils/NeuralNet.h +1 -1
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +10 -4
- data/vendor/faiss/faiss/utils/distances.cpp +390 -560
- data/vendor/faiss/faiss/utils/distances.h +20 -1
- data/vendor/faiss/faiss/utils/distances_dispatch.h +117 -37
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +8 -7
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +33 -14
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +12 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +16 -293
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based_neon.cpp +57 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_kernel-inl.h +290 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -177
- data/vendor/faiss/faiss/utils/extra_distances.cpp +9 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +32 -6
- data/vendor/faiss/faiss/utils/hamming-inl.h +13 -11
- data/vendor/faiss/faiss/utils/hamming.cpp +66 -517
- data/vendor/faiss/faiss/utils/hamming.h +92 -2
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +287 -10
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx2.h +142 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +234 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-generic.h +368 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-neon.h +322 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-rvv.h +39 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer.h +146 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_neon.cpp +15 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamming_rvv.cpp +15 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +66 -987
- data/vendor/faiss/faiss/utils/partitioning.h +31 -0
- data/vendor/faiss/faiss/utils/popcount.h +29 -0
- data/vendor/faiss/faiss/utils/pq_code_distance.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +30 -30
- data/vendor/faiss/faiss/utils/quantize_lut.h +1 -1
- data/vendor/faiss/faiss/utils/rabitq_simd.h +57 -536
- data/vendor/faiss/faiss/utils/random.cpp +6 -6
- data/vendor/faiss/faiss/utils/simd_impl/IVFFlatScanner-inl.h +51 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +5 -1
- data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +213 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +163 -10
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +250 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +7 -4
- data/vendor/faiss/faiss/utils/simd_impl/distances_rvv.cpp +189 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_simdlib256.h +195 -0
- data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +2 -1
- data/vendor/faiss/faiss/utils/{distances_fused/simdlib_based.h → simd_impl/exhaustive_L2sqr_blas_cmax.h} +5 -10
- data/vendor/faiss/faiss/utils/simd_impl/hamming_impl.h +481 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_avx2.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_neon.cpp +14 -0
- data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +1085 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx2.cpp +355 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512.cpp +477 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_neon.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/rabitq_rvv.cpp +55 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_dispatch.h +32 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels.h +43 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx2.cpp +57 -0
- data/vendor/faiss/faiss/utils/simd_impl/super_kmeans_kernels_avx512.cpp +45 -0
- data/vendor/faiss/faiss/utils/simd_levels.cpp +17 -5
- data/vendor/faiss/faiss/utils/simd_levels.h +93 -1
- data/vendor/faiss/faiss/utils/sorting.cpp +48 -36
- data/vendor/faiss/faiss/utils/utils.cpp +5 -5
- data/vendor/faiss/faiss/utils/utils.h +3 -3
- metadata +119 -34
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +0 -29
- data/vendor/faiss/faiss/impl/RaBitQStats.h +0 -56
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +0 -224
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +0 -230
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +0 -84
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +0 -196
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +0 -34
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +0 -36
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +0 -235
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +0 -462
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +0 -490
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -449
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +0 -87
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +0 -524
- data/vendor/faiss/faiss/utils/simdlib.h +0 -42
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +0 -365
- /data/ext/faiss/{utils_rb.h → utils.h} +0 -0
|
@@ -26,8 +26,8 @@ struct VectorTransform {
|
|
|
26
26
|
int d_in; ///! input dimension
|
|
27
27
|
int d_out; ///! output dimension
|
|
28
28
|
|
|
29
|
-
explicit VectorTransform(int
|
|
30
|
-
: d_in(
|
|
29
|
+
explicit VectorTransform(int d_in_val = 0, int d_out_val = 0)
|
|
30
|
+
: d_in(d_in_val), d_out(d_out_val), is_trained(true) {}
|
|
31
31
|
|
|
32
32
|
/// set if the VectorTransform does not require training, or if
|
|
33
33
|
/// training is done already
|
|
@@ -82,9 +82,9 @@ struct LinearTransform : VectorTransform {
|
|
|
82
82
|
|
|
83
83
|
/// both d_in > d_out and d_out < d_in are supported
|
|
84
84
|
explicit LinearTransform(
|
|
85
|
-
int
|
|
86
|
-
int
|
|
87
|
-
bool
|
|
85
|
+
int din = 0,
|
|
86
|
+
int dout = 0,
|
|
87
|
+
bool have_bias_in = false);
|
|
88
88
|
|
|
89
89
|
/// same as apply, but result is pre-allocated
|
|
90
90
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
@@ -114,8 +114,8 @@ struct LinearTransform : VectorTransform {
|
|
|
114
114
|
/// Randomly rotate a set of vectors
|
|
115
115
|
struct RandomRotationMatrix : LinearTransform {
|
|
116
116
|
/// both d_in > d_out and d_out < d_in are supported
|
|
117
|
-
RandomRotationMatrix(int
|
|
118
|
-
: LinearTransform(
|
|
117
|
+
RandomRotationMatrix(int d_in_val, int d_out_val)
|
|
118
|
+
: LinearTransform(d_in_val, d_out_val, false) {}
|
|
119
119
|
|
|
120
120
|
/// must be called before the transform is used
|
|
121
121
|
void init(int seed);
|
|
@@ -183,10 +183,10 @@ struct PCAMatrix : LinearTransform {
|
|
|
183
183
|
|
|
184
184
|
// the final matrix is computed after random rotation and/or whitening
|
|
185
185
|
explicit PCAMatrix(
|
|
186
|
-
int
|
|
187
|
-
int
|
|
188
|
-
float
|
|
189
|
-
bool
|
|
186
|
+
int din = 0,
|
|
187
|
+
int dout = 0,
|
|
188
|
+
float eigen_power_in = 0,
|
|
189
|
+
bool random_rotation_in = false);
|
|
190
190
|
|
|
191
191
|
/// train on n vectors. If n < d_in then the eigenvector matrix
|
|
192
192
|
/// will be completed with 0s
|
|
@@ -233,7 +233,7 @@ struct ITQTransform : VectorTransform {
|
|
|
233
233
|
// concatenation of PCA + ITQ transformation
|
|
234
234
|
LinearTransform pca_then_itq;
|
|
235
235
|
|
|
236
|
-
explicit ITQTransform(int
|
|
236
|
+
explicit ITQTransform(int din = 0, int dout = 0, bool do_pca_in = false);
|
|
237
237
|
|
|
238
238
|
void train(idx_t n, const float* x) override;
|
|
239
239
|
|
|
@@ -267,7 +267,7 @@ struct OPQMatrix : LinearTransform {
|
|
|
267
267
|
ProductQuantizer* pq = nullptr;
|
|
268
268
|
|
|
269
269
|
/// if d2 != -1, output vectors of this dimension
|
|
270
|
-
explicit OPQMatrix(int d = 0, int
|
|
270
|
+
explicit OPQMatrix(int d = 0, int M_in = 1, int d2 = -1);
|
|
271
271
|
|
|
272
272
|
void train(idx_t n, const float* x) override;
|
|
273
273
|
};
|
|
@@ -280,12 +280,12 @@ struct RemapDimensionsTransform : VectorTransform {
|
|
|
280
280
|
/// -1 -> set output to 0
|
|
281
281
|
std::vector<int> map;
|
|
282
282
|
|
|
283
|
-
RemapDimensionsTransform(int
|
|
283
|
+
RemapDimensionsTransform(int din, int dout, const int* map);
|
|
284
284
|
|
|
285
285
|
/// remap input to output, skipping or inserting dimensions as needed
|
|
286
286
|
/// if uniform: distribute dimensions uniformly
|
|
287
287
|
/// otherwise just take the d_out first ones.
|
|
288
|
-
RemapDimensionsTransform(int
|
|
288
|
+
RemapDimensionsTransform(int din, int dout, bool uniform = true);
|
|
289
289
|
|
|
290
290
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
291
291
|
|
|
@@ -301,7 +301,7 @@ struct RemapDimensionsTransform : VectorTransform {
|
|
|
301
301
|
struct NormalizationTransform : VectorTransform {
|
|
302
302
|
float norm;
|
|
303
303
|
|
|
304
|
-
explicit NormalizationTransform(int d, float
|
|
304
|
+
explicit NormalizationTransform(int d, float norm_in = 2.0);
|
|
305
305
|
NormalizationTransform();
|
|
306
306
|
|
|
307
307
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include "faiss/build.h"
|
|
9
|
+
|
|
10
|
+
namespace faiss {
|
|
11
|
+
|
|
12
|
+
bool has_omp() {
|
|
13
|
+
int omp_available = 1;
|
|
14
|
+
// Detect whether OpenMP is enabled by using the 'max' reduction to render
|
|
15
|
+
// the below assignment a no-op. This works:
|
|
16
|
+
// 1) without starting any threads
|
|
17
|
+
// 2) irrespective of the current thread limit
|
|
18
|
+
#pragma omp parallel reduction(max : omp_available) num_threads(1)
|
|
19
|
+
omp_available = 0;
|
|
20
|
+
return omp_available != 0;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
} // namespace faiss
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
namespace faiss {
|
|
11
|
+
|
|
12
|
+
// Returns true iff `faiss` was compiled with non-mocked OpenMP support.
|
|
13
|
+
bool has_omp();
|
|
14
|
+
|
|
15
|
+
} // namespace faiss
|
|
@@ -102,8 +102,8 @@ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
|
|
|
102
102
|
TRYCLONE(IndexIVFRaBitQ, ivf)
|
|
103
103
|
|
|
104
104
|
TRYCLONE(IndexIVFFlatDedup, ivf)
|
|
105
|
-
TRYCLONE(IndexIVFFlat, ivf)
|
|
106
105
|
TRYCLONE(IndexIVFFlatPanorama, ivf)
|
|
106
|
+
TRYCLONE(IndexIVFFlat, ivf)
|
|
107
107
|
|
|
108
108
|
TRYCLONE(IndexIVFSpectralHash, ivf)
|
|
109
109
|
|
|
@@ -173,7 +173,7 @@ IndexRowwiseMinMaxBase* clone_IndexRowwiseMinMax(
|
|
|
173
173
|
}
|
|
174
174
|
}
|
|
175
175
|
|
|
176
|
-
#define TRYCAST(classname)
|
|
176
|
+
#define TRYCAST(classname, var) auto* var = dynamic_cast<classname*>(index)
|
|
177
177
|
|
|
178
178
|
void reset_AdditiveQuantizerIndex(Index* index) {
|
|
179
179
|
auto clone_ProductQuantizers =
|
|
@@ -182,50 +182,50 @@ void reset_AdditiveQuantizerIndex(Index* index) {
|
|
|
182
182
|
q = dynamic_cast<AdditiveQuantizer*>(clone_Quantizer(q));
|
|
183
183
|
}
|
|
184
184
|
};
|
|
185
|
-
if (TRYCAST(IndexIVFLocalSearchQuantizerFastScan)) {
|
|
186
|
-
|
|
187
|
-
} else if (TRYCAST(IndexIVFResidualQuantizerFastScan)) {
|
|
188
|
-
|
|
189
|
-
} else if (TRYCAST(IndexIVFProductLocalSearchQuantizerFastScan)) {
|
|
190
|
-
|
|
191
|
-
clone_ProductQuantizers(
|
|
192
|
-
} else if (TRYCAST(IndexIVFProductResidualQuantizerFastScan)) {
|
|
193
|
-
|
|
194
|
-
clone_ProductQuantizers(
|
|
195
|
-
} else if (TRYCAST(IndexIVFLocalSearchQuantizer)) {
|
|
196
|
-
|
|
197
|
-
} else if (TRYCAST(IndexIVFResidualQuantizer)) {
|
|
198
|
-
|
|
199
|
-
} else if (TRYCAST(IndexIVFProductLocalSearchQuantizer)) {
|
|
200
|
-
|
|
201
|
-
clone_ProductQuantizers(
|
|
202
|
-
} else if (TRYCAST(IndexIVFProductResidualQuantizer)) {
|
|
203
|
-
|
|
204
|
-
clone_ProductQuantizers(
|
|
205
|
-
} else if (TRYCAST(IndexLocalSearchQuantizerFastScan)) {
|
|
206
|
-
|
|
207
|
-
} else if (TRYCAST(IndexResidualQuantizerFastScan)) {
|
|
208
|
-
|
|
209
|
-
} else if (TRYCAST(IndexProductLocalSearchQuantizerFastScan)) {
|
|
210
|
-
|
|
211
|
-
clone_ProductQuantizers(
|
|
212
|
-
} else if (TRYCAST(IndexProductResidualQuantizerFastScan)) {
|
|
213
|
-
|
|
214
|
-
clone_ProductQuantizers(
|
|
215
|
-
} else if (TRYCAST(IndexLocalSearchQuantizer)) {
|
|
216
|
-
|
|
217
|
-
} else if (TRYCAST(IndexResidualQuantizer)) {
|
|
218
|
-
|
|
219
|
-
} else if (TRYCAST(IndexProductLocalSearchQuantizer)) {
|
|
220
|
-
|
|
221
|
-
clone_ProductQuantizers(
|
|
222
|
-
} else if (TRYCAST(IndexProductResidualQuantizer)) {
|
|
223
|
-
|
|
224
|
-
clone_ProductQuantizers(
|
|
225
|
-
} else if (TRYCAST(LocalSearchCoarseQuantizer)) {
|
|
226
|
-
|
|
227
|
-
} else if (TRYCAST(ResidualCoarseQuantizer)) {
|
|
228
|
-
|
|
185
|
+
if (TRYCAST(IndexIVFLocalSearchQuantizerFastScan, r1)) {
|
|
186
|
+
r1->aq = &r1->lsq;
|
|
187
|
+
} else if (TRYCAST(IndexIVFResidualQuantizerFastScan, r2)) {
|
|
188
|
+
r2->aq = &r2->rq;
|
|
189
|
+
} else if (TRYCAST(IndexIVFProductLocalSearchQuantizerFastScan, r3)) {
|
|
190
|
+
r3->aq = &r3->plsq;
|
|
191
|
+
clone_ProductQuantizers(r3->plsq.quantizers);
|
|
192
|
+
} else if (TRYCAST(IndexIVFProductResidualQuantizerFastScan, r4)) {
|
|
193
|
+
r4->aq = &r4->prq;
|
|
194
|
+
clone_ProductQuantizers(r4->prq.quantizers);
|
|
195
|
+
} else if (TRYCAST(IndexIVFLocalSearchQuantizer, r5)) {
|
|
196
|
+
r5->aq = &r5->lsq;
|
|
197
|
+
} else if (TRYCAST(IndexIVFResidualQuantizer, r6)) {
|
|
198
|
+
r6->aq = &r6->rq;
|
|
199
|
+
} else if (TRYCAST(IndexIVFProductLocalSearchQuantizer, r7)) {
|
|
200
|
+
r7->aq = &r7->plsq;
|
|
201
|
+
clone_ProductQuantizers(r7->plsq.quantizers);
|
|
202
|
+
} else if (TRYCAST(IndexIVFProductResidualQuantizer, r8)) {
|
|
203
|
+
r8->aq = &r8->prq;
|
|
204
|
+
clone_ProductQuantizers(r8->prq.quantizers);
|
|
205
|
+
} else if (TRYCAST(IndexLocalSearchQuantizerFastScan, r9)) {
|
|
206
|
+
r9->aq = &r9->lsq;
|
|
207
|
+
} else if (TRYCAST(IndexResidualQuantizerFastScan, r10)) {
|
|
208
|
+
r10->aq = &r10->rq;
|
|
209
|
+
} else if (TRYCAST(IndexProductLocalSearchQuantizerFastScan, r11)) {
|
|
210
|
+
r11->aq = &r11->plsq;
|
|
211
|
+
clone_ProductQuantizers(r11->plsq.quantizers);
|
|
212
|
+
} else if (TRYCAST(IndexProductResidualQuantizerFastScan, r12)) {
|
|
213
|
+
r12->aq = &r12->prq;
|
|
214
|
+
clone_ProductQuantizers(r12->prq.quantizers);
|
|
215
|
+
} else if (TRYCAST(IndexLocalSearchQuantizer, r13)) {
|
|
216
|
+
r13->aq = &r13->lsq;
|
|
217
|
+
} else if (TRYCAST(IndexResidualQuantizer, r14)) {
|
|
218
|
+
r14->aq = &r14->rq;
|
|
219
|
+
} else if (TRYCAST(IndexProductLocalSearchQuantizer, r15)) {
|
|
220
|
+
r15->aq = &r15->plsq;
|
|
221
|
+
clone_ProductQuantizers(r15->plsq.quantizers);
|
|
222
|
+
} else if (TRYCAST(IndexProductResidualQuantizer, r16)) {
|
|
223
|
+
r16->aq = &r16->prq;
|
|
224
|
+
clone_ProductQuantizers(r16->prq.quantizers);
|
|
225
|
+
} else if (TRYCAST(LocalSearchCoarseQuantizer, r17)) {
|
|
226
|
+
r17->aq = &r17->lsq;
|
|
227
|
+
} else if (TRYCAST(ResidualCoarseQuantizer, r18)) {
|
|
228
|
+
r18->aq = &r18->rq;
|
|
229
229
|
} else {
|
|
230
230
|
FAISS_THROW_MSG(
|
|
231
231
|
"clone not supported for this type of additive quantizer index");
|
|
@@ -319,7 +319,7 @@ Index* Cloner::clone_Index(const Index* index) {
|
|
|
319
319
|
res->metric_arg = ipt->metric_arg;
|
|
320
320
|
|
|
321
321
|
res->index = clone_Index(ipt->index);
|
|
322
|
-
for (
|
|
322
|
+
for (size_t i = 0; i < ipt->chain.size(); i++) {
|
|
323
323
|
res->chain.push_back(clone_VectorTransform(ipt->chain[i]));
|
|
324
324
|
}
|
|
325
325
|
res->own_fields = true;
|
|
@@ -377,6 +377,7 @@ Index* Cloner::clone_Index(const Index* index) {
|
|
|
377
377
|
IndexRowwiseMinMaxBase* res = clone_IndexRowwiseMinMax(irmmb);
|
|
378
378
|
res->own_fields = true;
|
|
379
379
|
res->index = clone_Index(irmmb->index);
|
|
380
|
+
return res;
|
|
380
381
|
} else if (
|
|
381
382
|
dynamic_cast<const IndexAdditiveQuantizerFastScan*>(index) ||
|
|
382
383
|
dynamic_cast<const IndexAdditiveQuantizer*>(index) ||
|
|
@@ -1785,72 +1785,72 @@ struct Index2LevelDecoderImpl<
|
|
|
1785
1785
|
|
|
1786
1786
|
// process 1 sample
|
|
1787
1787
|
static void store(
|
|
1788
|
-
const float* const __restrict pqCoarseCentroids0
|
|
1789
|
-
const float* const __restrict pqFineCentroids0
|
|
1790
|
-
const uint8_t* const __restrict code0
|
|
1791
|
-
float* const __restrict outputStore) {}
|
|
1788
|
+
const float* const __restrict /*pqCoarseCentroids0*/,
|
|
1789
|
+
const float* const __restrict /*pqFineCentroids0*/,
|
|
1790
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1791
|
+
float* const __restrict /*outputStore*/) {}
|
|
1792
1792
|
|
|
1793
1793
|
// process 1 sample
|
|
1794
1794
|
static void accum(
|
|
1795
|
-
const float* const __restrict pqCoarseCentroids0
|
|
1796
|
-
const float* const __restrict pqFineCentroids0
|
|
1797
|
-
const uint8_t* const __restrict code0
|
|
1798
|
-
const float weight0
|
|
1799
|
-
float* const __restrict outputAccum) {}
|
|
1795
|
+
const float* const __restrict /*pqCoarseCentroids0*/,
|
|
1796
|
+
const float* const __restrict /*pqFineCentroids0*/,
|
|
1797
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1798
|
+
const float /*weight0*/,
|
|
1799
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1800
1800
|
|
|
1801
1801
|
// Process 2 samples.
|
|
1802
1802
|
// Each code uses its own coarse pq centroids table and fine pq centroids table.
|
|
1803
1803
|
static void accum(
|
|
1804
|
-
const float* const __restrict pqCoarseCentroids0
|
|
1805
|
-
const float* const __restrict pqFineCentroids0
|
|
1806
|
-
const uint8_t* const __restrict code0
|
|
1807
|
-
const float weight0
|
|
1808
|
-
const float* const __restrict pqCoarseCentroids1
|
|
1809
|
-
const float* const __restrict pqFineCentroids1
|
|
1810
|
-
const uint8_t* const __restrict code1
|
|
1811
|
-
const float weight1
|
|
1812
|
-
float* const __restrict outputAccum) {}
|
|
1804
|
+
const float* const __restrict /*pqCoarseCentroids0*/,
|
|
1805
|
+
const float* const __restrict /*pqFineCentroids0*/,
|
|
1806
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1807
|
+
const float /*weight0*/,
|
|
1808
|
+
const float* const __restrict /*pqCoarseCentroids1*/,
|
|
1809
|
+
const float* const __restrict /*pqFineCentroids1*/,
|
|
1810
|
+
const uint8_t* const __restrict /*code1*/,
|
|
1811
|
+
const float /*weight1*/,
|
|
1812
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1813
1813
|
|
|
1814
1814
|
// Process 2 samples.
|
|
1815
1815
|
// Coarse pq centroids table and fine pq centroids table are shared among codes.
|
|
1816
1816
|
static void accum(
|
|
1817
|
-
const float* const __restrict pqCoarseCentroids
|
|
1818
|
-
const float* const __restrict pqFineCentroids
|
|
1819
|
-
const uint8_t* const __restrict code0
|
|
1820
|
-
const float weight0
|
|
1821
|
-
const uint8_t* const __restrict code1
|
|
1822
|
-
const float weight1
|
|
1823
|
-
float* const __restrict outputAccum) {}
|
|
1817
|
+
const float* const __restrict /*pqCoarseCentroids*/,
|
|
1818
|
+
const float* const __restrict /*pqFineCentroids*/,
|
|
1819
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1820
|
+
const float /*weight0*/,
|
|
1821
|
+
const uint8_t* const __restrict /*code1*/,
|
|
1822
|
+
const float /*weight1*/,
|
|
1823
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1824
1824
|
|
|
1825
1825
|
// Process 3 samples.
|
|
1826
1826
|
// Each code uses its own coarse pq centroids table and fine pq centroids table.
|
|
1827
1827
|
static void accum(
|
|
1828
|
-
const float* const __restrict pqCoarseCentroids0
|
|
1829
|
-
const float* const __restrict pqFineCentroids0
|
|
1830
|
-
const uint8_t* const __restrict code0
|
|
1831
|
-
const float weight0
|
|
1832
|
-
const float* const __restrict pqCoarseCentroids1
|
|
1833
|
-
const float* const __restrict pqFineCentroids1
|
|
1834
|
-
const uint8_t* const __restrict code1
|
|
1835
|
-
const float weight1
|
|
1836
|
-
const float* const __restrict pqCoarseCentroids2
|
|
1837
|
-
const float* const __restrict pqFineCentroids2
|
|
1838
|
-
const uint8_t* const __restrict code2
|
|
1839
|
-
const float weight2
|
|
1840
|
-
float* const __restrict outputAccum) {}
|
|
1828
|
+
const float* const __restrict /*pqCoarseCentroids0*/,
|
|
1829
|
+
const float* const __restrict /*pqFineCentroids0*/,
|
|
1830
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1831
|
+
const float /*weight0*/,
|
|
1832
|
+
const float* const __restrict /*pqCoarseCentroids1*/,
|
|
1833
|
+
const float* const __restrict /*pqFineCentroids1*/,
|
|
1834
|
+
const uint8_t* const __restrict /*code1*/,
|
|
1835
|
+
const float /*weight1*/,
|
|
1836
|
+
const float* const __restrict /*pqCoarseCentroids2*/,
|
|
1837
|
+
const float* const __restrict /*pqFineCentroids2*/,
|
|
1838
|
+
const uint8_t* const __restrict /*code2*/,
|
|
1839
|
+
const float /*weight2*/,
|
|
1840
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1841
1841
|
|
|
1842
1842
|
// Process 3 samples.
|
|
1843
1843
|
// Coarse pq centroids table and fine pq centroids table are shared among codes.
|
|
1844
1844
|
static void accum(
|
|
1845
|
-
const float* const __restrict pqCoarseCentroids
|
|
1846
|
-
const float* const __restrict pqFineCentroids
|
|
1847
|
-
const uint8_t* const __restrict code0
|
|
1848
|
-
const float weight0
|
|
1849
|
-
const uint8_t* const __restrict code1
|
|
1850
|
-
const float weight1
|
|
1851
|
-
const uint8_t* const __restrict code2
|
|
1852
|
-
const float weight2
|
|
1853
|
-
float* const __restrict outputAccum) {}
|
|
1845
|
+
const float* const __restrict /*pqCoarseCentroids*/,
|
|
1846
|
+
const float* const __restrict /*pqFineCentroids*/,
|
|
1847
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1848
|
+
const float /*weight0*/,
|
|
1849
|
+
const uint8_t* const __restrict /*code1*/,
|
|
1850
|
+
const float /*weight1*/,
|
|
1851
|
+
const uint8_t* const __restrict /*code2*/,
|
|
1852
|
+
const float /*weight2*/,
|
|
1853
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1854
1854
|
|
|
1855
1855
|
// clang-format on
|
|
1856
1856
|
};
|
|
@@ -8,6 +8,12 @@
|
|
|
8
8
|
#ifndef LEVEL2_INL_H
|
|
9
9
|
#define LEVEL2_INL_H
|
|
10
10
|
|
|
11
|
+
// GCC does not recognize #pragma unroll (Clang extension)
|
|
12
|
+
#if defined(__GNUC__) && !defined(__clang__)
|
|
13
|
+
#pragma GCC diagnostic push
|
|
14
|
+
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
|
|
15
|
+
#endif
|
|
16
|
+
|
|
11
17
|
#include <cstddef>
|
|
12
18
|
#include <cstdint>
|
|
13
19
|
|
|
@@ -464,4 +470,9 @@ struct Index2LevelDecoder {
|
|
|
464
470
|
|
|
465
471
|
} // namespace cppcontrib
|
|
466
472
|
} // namespace faiss
|
|
473
|
+
|
|
474
|
+
#if defined(__GNUC__) && !defined(__clang__)
|
|
475
|
+
#pragma GCC diagnostic pop
|
|
476
|
+
#endif
|
|
477
|
+
|
|
467
478
|
#endif // LEVEL2_INL_H
|
|
@@ -1428,63 +1428,63 @@ struct IndexPQDecoderImpl<
|
|
|
1428
1428
|
|
|
1429
1429
|
// process 1 sample
|
|
1430
1430
|
static void store(
|
|
1431
|
-
const float* const __restrict pqFineCentroids0
|
|
1432
|
-
const uint8_t* const __restrict code0
|
|
1433
|
-
float* const __restrict outputStore) {}
|
|
1431
|
+
const float* const __restrict /*pqFineCentroids0*/,
|
|
1432
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1433
|
+
float* const __restrict /*outputStore*/) {}
|
|
1434
1434
|
|
|
1435
1435
|
// process 1 sample
|
|
1436
1436
|
static void accum(
|
|
1437
|
-
const float* const __restrict pqFineCentroids0
|
|
1438
|
-
const uint8_t* const __restrict code0
|
|
1439
|
-
const float weight0
|
|
1440
|
-
float* const __restrict outputAccum) {}
|
|
1437
|
+
const float* const __restrict /*pqFineCentroids0*/,
|
|
1438
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1439
|
+
const float /*weight0*/,
|
|
1440
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1441
1441
|
|
|
1442
1442
|
// Process 2 samples.
|
|
1443
1443
|
// Each code uses its own fine pq centroids table.
|
|
1444
1444
|
static void accum(
|
|
1445
|
-
const float* const __restrict pqFineCentroids0
|
|
1446
|
-
const uint8_t* const __restrict code0
|
|
1447
|
-
const float weight0
|
|
1448
|
-
const float* const __restrict pqFineCentroids1
|
|
1449
|
-
const uint8_t* const __restrict code1
|
|
1450
|
-
const float weight1
|
|
1451
|
-
float* const __restrict outputAccum) {}
|
|
1445
|
+
const float* const __restrict /*pqFineCentroids0*/,
|
|
1446
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1447
|
+
const float /*weight0*/,
|
|
1448
|
+
const float* const __restrict /*pqFineCentroids1*/,
|
|
1449
|
+
const uint8_t* const __restrict /*code1*/,
|
|
1450
|
+
const float /*weight1*/,
|
|
1451
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1452
1452
|
|
|
1453
1453
|
// Process 2 samples.
|
|
1454
1454
|
// Fine pq centroids table is shared among codes.
|
|
1455
1455
|
static void accum(
|
|
1456
|
-
const float* const __restrict pqFineCentroids
|
|
1457
|
-
const uint8_t* const __restrict code0
|
|
1458
|
-
const float weight0
|
|
1459
|
-
const uint8_t* const __restrict code1
|
|
1460
|
-
const float weight1
|
|
1461
|
-
float* const __restrict outputAccum) {}
|
|
1456
|
+
const float* const __restrict /*pqFineCentroids*/,
|
|
1457
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1458
|
+
const float /*weight0*/,
|
|
1459
|
+
const uint8_t* const __restrict /*code1*/,
|
|
1460
|
+
const float /*weight1*/,
|
|
1461
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1462
1462
|
|
|
1463
1463
|
// Process 3 samples.
|
|
1464
1464
|
// Each code uses its own fine pq centroids table.
|
|
1465
1465
|
static void accum(
|
|
1466
|
-
const float* const __restrict pqFineCentroids0
|
|
1467
|
-
const uint8_t* const __restrict code0
|
|
1468
|
-
const float weight0
|
|
1469
|
-
const float* const __restrict pqFineCentroids1
|
|
1470
|
-
const uint8_t* const __restrict code1
|
|
1471
|
-
const float weight1
|
|
1472
|
-
const float* const __restrict pqFineCentroids2
|
|
1473
|
-
const uint8_t* const __restrict code2
|
|
1474
|
-
const float weight2
|
|
1475
|
-
float* const __restrict outputAccum) {}
|
|
1466
|
+
const float* const __restrict /*pqFineCentroids0*/,
|
|
1467
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1468
|
+
const float /*weight0*/,
|
|
1469
|
+
const float* const __restrict /*pqFineCentroids1*/,
|
|
1470
|
+
const uint8_t* const __restrict /*code1*/,
|
|
1471
|
+
const float /*weight1*/,
|
|
1472
|
+
const float* const __restrict /*pqFineCentroids2*/,
|
|
1473
|
+
const uint8_t* const __restrict /*code2*/,
|
|
1474
|
+
const float /*weight2*/,
|
|
1475
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1476
1476
|
|
|
1477
1477
|
// Process 3 samples.
|
|
1478
1478
|
// Fine pq centroids table is shared among codes.
|
|
1479
1479
|
static void accum(
|
|
1480
|
-
const float* const __restrict pqFineCentroids
|
|
1481
|
-
const uint8_t* const __restrict code0
|
|
1482
|
-
const float weight0
|
|
1483
|
-
const uint8_t* const __restrict code1
|
|
1484
|
-
const float weight1
|
|
1485
|
-
const uint8_t* const __restrict code2
|
|
1486
|
-
const float weight2
|
|
1487
|
-
float* const __restrict outputAccum) {}
|
|
1480
|
+
const float* const __restrict /*pqFineCentroids*/,
|
|
1481
|
+
const uint8_t* const __restrict /*code0*/,
|
|
1482
|
+
const float /*weight0*/,
|
|
1483
|
+
const uint8_t* const __restrict /*code1*/,
|
|
1484
|
+
const float /*weight1*/,
|
|
1485
|
+
const uint8_t* const __restrict /*code2*/,
|
|
1486
|
+
const float /*weight2*/,
|
|
1487
|
+
float* const __restrict /*outputAccum*/) {}
|
|
1488
1488
|
|
|
1489
1489
|
// clang-format on
|
|
1490
1490
|
};
|
|
@@ -8,6 +8,12 @@
|
|
|
8
8
|
#ifndef PQ_INL_H
|
|
9
9
|
#define PQ_INL_H
|
|
10
10
|
|
|
11
|
+
// GCC does not recognize #pragma unroll (Clang extension)
|
|
12
|
+
#if defined(__GNUC__) && !defined(__clang__)
|
|
13
|
+
#pragma GCC diagnostic push
|
|
14
|
+
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
|
|
15
|
+
#endif
|
|
16
|
+
|
|
11
17
|
#include <cstddef>
|
|
12
18
|
#include <cstdint>
|
|
13
19
|
|
|
@@ -254,4 +260,9 @@ struct IndexPQDecoder {
|
|
|
254
260
|
|
|
255
261
|
} // namespace cppcontrib
|
|
256
262
|
} // namespace faiss
|
|
263
|
+
|
|
264
|
+
#if defined(__GNUC__) && !defined(__clang__)
|
|
265
|
+
#pragma GCC diagnostic pop
|
|
266
|
+
#endif
|
|
267
|
+
|
|
257
268
|
#endif // PQ_INL_H
|
|
@@ -38,6 +38,11 @@ const std::map<faiss::ScalarQuantizer::QuantizerType, std::string> sq_types = {
|
|
|
38
38
|
{faiss::ScalarQuantizer::QT_bf16, "SQbf16"},
|
|
39
39
|
{faiss::ScalarQuantizer::QT_8bit_direct_signed, "SQ8_direct_signed"},
|
|
40
40
|
{faiss::ScalarQuantizer::QT_8bit_direct, "SQ8_direct"},
|
|
41
|
+
{faiss::ScalarQuantizer::QT_1bit_tqmse, "SQtqmse1"},
|
|
42
|
+
{faiss::ScalarQuantizer::QT_2bit_tqmse, "SQtqmse2"},
|
|
43
|
+
{faiss::ScalarQuantizer::QT_3bit_tqmse, "SQtqmse3"},
|
|
44
|
+
{faiss::ScalarQuantizer::QT_4bit_tqmse, "SQtqmse4"},
|
|
45
|
+
{faiss::ScalarQuantizer::QT_8bit_tqmse, "SQtqmse8"},
|
|
41
46
|
};
|
|
42
47
|
|
|
43
48
|
int get_hnsw_M(const faiss::IndexHNSW* index) {
|
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
|
29
29
|
|
|
30
30
|
#include <variant>
|
|
31
|
+
#include <vector>
|
|
31
32
|
#include "faiss/Index.h"
|
|
32
33
|
|
|
33
34
|
namespace faiss {
|
|
@@ -193,14 +194,14 @@ struct GpuIndexCagraConfig : public GpuIndexConfig {
|
|
|
193
194
|
|
|
194
195
|
enum class search_algo {
|
|
195
196
|
/// For large batch sizes.
|
|
196
|
-
SINGLE_CTA,
|
|
197
|
+
SINGLE_CTA = 0,
|
|
197
198
|
/// For small batch sizes.
|
|
198
|
-
MULTI_CTA,
|
|
199
|
-
MULTI_KERNEL,
|
|
200
|
-
AUTO
|
|
199
|
+
MULTI_CTA = 1,
|
|
200
|
+
MULTI_KERNEL = 2,
|
|
201
|
+
AUTO = 100
|
|
201
202
|
};
|
|
202
203
|
|
|
203
|
-
enum class hash_mode { HASH, SMALL, AUTO };
|
|
204
|
+
enum class hash_mode { HASH = 0, SMALL = 1, AUTO = 100 };
|
|
204
205
|
|
|
205
206
|
struct SearchParametersCagra : SearchParameters {
|
|
206
207
|
/// Maximum number of queries to search at the same time (batch size). Auto
|
|
@@ -23,9 +23,9 @@
|
|
|
23
23
|
|
|
24
24
|
#if defined USE_NVIDIA_CUVS
|
|
25
25
|
#include <raft/core/device_resources.hpp>
|
|
26
|
-
#include <rmm/mr/
|
|
27
|
-
#include <rmm/mr/
|
|
28
|
-
#include <rmm/mr/
|
|
26
|
+
#include <rmm/mr/managed_memory_resource.hpp>
|
|
27
|
+
#include <rmm/mr/per_device_resource.hpp>
|
|
28
|
+
#include <rmm/mr/pinned_host_memory_resource.hpp>
|
|
29
29
|
#include <memory>
|
|
30
30
|
#endif
|
|
31
31
|
|
|
@@ -93,7 +93,7 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl()
|
|
|
93
93
|
:
|
|
94
94
|
#if defined USE_NVIDIA_CUVS
|
|
95
95
|
mmr_(new rmm::mr::managed_memory_resource),
|
|
96
|
-
pmr_(new rmm::mr::
|
|
96
|
+
pmr_(new rmm::mr::pinned_host_memory_resource),
|
|
97
97
|
#endif
|
|
98
98
|
pinnedMemAlloc_(nullptr),
|
|
99
99
|
pinnedMemAllocSize_(0),
|
|
@@ -164,7 +164,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
|
|
164
164
|
|
|
165
165
|
if (pinnedMemAlloc_) {
|
|
166
166
|
#if defined USE_NVIDIA_CUVS
|
|
167
|
-
pmr_->
|
|
167
|
+
pmr_->deallocate_sync(pinnedMemAlloc_, pinnedMemAllocSize_);
|
|
168
168
|
#else
|
|
169
169
|
auto err = cudaFreeHost(pinnedMemAlloc_);
|
|
170
170
|
FAISS_ASSERT_FMT(
|
|
@@ -350,7 +350,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
|
350
350
|
// pinned memory allocation
|
|
351
351
|
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
|
|
352
352
|
try {
|
|
353
|
-
pinnedMemAlloc_ = pmr_->
|
|
353
|
+
pinnedMemAlloc_ = pmr_->allocate_sync(pinnedMemSize_);
|
|
354
354
|
} catch (const std::bad_alloc& rmm_ex) {
|
|
355
355
|
FAISS_THROW_MSG("CUDA memory allocation error");
|
|
356
356
|
}
|
|
@@ -549,7 +549,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
|
549
549
|
rmm::mr::device_memory_resource* current_mr =
|
|
550
550
|
rmm::mr::get_per_device_resource(
|
|
551
551
|
rmm::cuda_device_id{adjReq.device});
|
|
552
|
-
p = current_mr->
|
|
552
|
+
p = current_mr->allocate(adjReq.stream, adjReq.size);
|
|
553
553
|
adjReq.mr = current_mr;
|
|
554
554
|
} catch (const std::bad_alloc& rmm_ex) {
|
|
555
555
|
FAISS_THROW_MSG("CUDA memory allocation error");
|
|
@@ -584,7 +584,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
|
584
584
|
// TODO: change this to use the current device resource once RMM has
|
|
585
585
|
// a way to retrieve a "guaranteed" managed memory resource for a
|
|
586
586
|
// device.
|
|
587
|
-
p = mmr_->
|
|
587
|
+
p = mmr_->allocate(adjReq.stream, adjReq.size);
|
|
588
588
|
adjReq.mr = mmr_.get();
|
|
589
589
|
} catch (const std::bad_alloc& rmm_ex) {
|
|
590
590
|
FAISS_THROW_MSG("CUDA memory allocation error");
|
|
@@ -648,7 +648,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
|
|
|
648
648
|
req.space == MemorySpace::Device ||
|
|
649
649
|
req.space == MemorySpace::Unified) {
|
|
650
650
|
#if defined USE_NVIDIA_CUVS
|
|
651
|
-
req.mr->
|
|
651
|
+
req.mr->deallocate(req.stream, p, req.size);
|
|
652
652
|
#else
|
|
653
653
|
auto err = cudaFree(p);
|
|
654
654
|
FAISS_ASSERT_FMT(
|