faiss 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +2 -2
- data/vendor/faiss/faiss/AutoTune.h +3 -3
- data/vendor/faiss/faiss/Clustering.cpp +37 -6
- data/vendor/faiss/faiss/Clustering.h +12 -3
- data/vendor/faiss/faiss/IVFlib.cpp +6 -3
- data/vendor/faiss/faiss/IVFlib.h +2 -2
- data/vendor/faiss/faiss/Index.cpp +6 -2
- data/vendor/faiss/faiss/Index.h +30 -8
- data/vendor/faiss/faiss/Index2Layer.cpp +2 -2
- data/vendor/faiss/faiss/Index2Layer.h +2 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +2 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +14 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +2 -2
- data/vendor/faiss/faiss/IndexBinary.cpp +13 -2
- data/vendor/faiss/faiss/IndexBinary.h +8 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.h +2 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -2
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +2 -7
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +3 -3
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -3
- data/vendor/faiss/faiss/IndexBinaryHash.h +2 -2
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +3 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.h +2 -2
- data/vendor/faiss/faiss/IndexFastScan.cpp +32 -18
- data/vendor/faiss/faiss/IndexFastScan.h +11 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +13 -10
- data/vendor/faiss/faiss/IndexFlat.h +2 -2
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -7
- data/vendor/faiss/faiss/IndexFlatCodes.h +25 -5
- data/vendor/faiss/faiss/IndexHNSW.cpp +156 -96
- data/vendor/faiss/faiss/IndexHNSW.h +54 -5
- data/vendor/faiss/faiss/IndexIDMap.cpp +19 -3
- data/vendor/faiss/faiss/IndexIDMap.h +5 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +5 -6
- data/vendor/faiss/faiss/IndexIVF.h +13 -4
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +21 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +5 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +3 -14
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +2 -4
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +201 -91
- data/vendor/faiss/faiss/IndexIVFFastScan.h +33 -9
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +2 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -6
- data/vendor/faiss/faiss/IndexIVFPQ.h +2 -2
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +7 -14
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -4
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +2 -3
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -2
- data/vendor/faiss/faiss/IndexLSH.cpp +2 -3
- data/vendor/faiss/faiss/IndexLSH.h +2 -2
- data/vendor/faiss/faiss/IndexLattice.cpp +3 -21
- data/vendor/faiss/faiss/IndexLattice.h +5 -24
- data/vendor/faiss/faiss/IndexNNDescent.cpp +2 -31
- data/vendor/faiss/faiss/IndexNNDescent.h +3 -3
- data/vendor/faiss/faiss/IndexNSG.cpp +2 -5
- data/vendor/faiss/faiss/IndexNSG.h +3 -3
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +26 -26
- data/vendor/faiss/faiss/IndexPQ.h +2 -2
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +2 -5
- data/vendor/faiss/faiss/IndexPQFastScan.h +2 -11
- data/vendor/faiss/faiss/IndexPreTransform.cpp +2 -2
- data/vendor/faiss/faiss/IndexPreTransform.h +3 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +46 -9
- data/vendor/faiss/faiss/IndexRefine.h +9 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +2 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -2
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +2 -2
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +5 -4
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -2
- data/vendor/faiss/faiss/IndexShards.cpp +2 -2
- data/vendor/faiss/faiss/IndexShards.h +2 -2
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +2 -2
- data/vendor/faiss/faiss/IndexShardsIVF.h +2 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +2 -2
- data/vendor/faiss/faiss/MatrixStats.h +2 -2
- data/vendor/faiss/faiss/MetaIndexes.cpp +2 -3
- data/vendor/faiss/faiss/MetaIndexes.h +2 -2
- data/vendor/faiss/faiss/MetricType.h +9 -4
- data/vendor/faiss/faiss/VectorTransform.cpp +2 -2
- data/vendor/faiss/faiss/VectorTransform.h +2 -2
- data/vendor/faiss/faiss/clone_index.cpp +2 -2
- data/vendor/faiss/faiss/clone_index.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +97 -19
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +192 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +29 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +85 -32
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +2 -2
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +2 -5
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +45 -13
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +12 -6
- data/vendor/faiss/faiss/gpu/GpuDistance.h +11 -7
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +10 -15
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +285 -0
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +7 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +11 -4
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +66 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +15 -5
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -2
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +28 -23
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +2 -2
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +2 -2
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +8 -2
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +2 -3
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +2 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +10 -7
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +2 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +54 -54
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +144 -77
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +51 -51
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +3 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuResidualQuantizer.cpp +70 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +74 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +2 -2
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +3 -3
- data/vendor/faiss/faiss/gpu/utils/{RaftUtils.h → CuvsUtils.h} +12 -11
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +8 -2
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +2 -2
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +2 -2
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +6 -3
- data/vendor/faiss/faiss/gpu/utils/Timer.h +3 -3
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +79 -11
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +17 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +27 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +11 -3
- data/vendor/faiss/faiss/impl/CodePacker.cpp +2 -2
- data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +48 -2
- data/vendor/faiss/faiss/impl/FaissAssert.h +6 -4
- data/vendor/faiss/faiss/impl/FaissException.cpp +2 -2
- data/vendor/faiss/faiss/impl/FaissException.h +2 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +378 -205
- data/vendor/faiss/faiss/impl/HNSW.h +55 -24
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
- data/vendor/faiss/faiss/impl/IDSelector.h +2 -2
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +10 -10
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +2 -2
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +36 -2
- data/vendor/faiss/faiss/impl/NNDescent.cpp +15 -10
- data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +26 -49
- data/vendor/faiss/faiss/impl/NSG.h +20 -8
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +2 -2
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +2 -2
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +2 -4
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +2 -2
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -2
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +3 -2
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +7 -3
- data/vendor/faiss/faiss/impl/Quantizer.h +2 -2
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +2 -36
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +3 -13
- data/vendor/faiss/faiss/impl/ResultHandler.h +153 -34
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +721 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +5 -2
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +2 -2
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +2 -2
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +7 -2
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +2 -2
- data/vendor/faiss/faiss/impl/code_distance/code_distance-sve.h +440 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +55 -2
- data/vendor/faiss/faiss/impl/index_read.cpp +31 -20
- data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
- data/vendor/faiss/faiss/impl/index_write.cpp +30 -16
- data/vendor/faiss/faiss/impl/io.cpp +15 -7
- data/vendor/faiss/faiss/impl/io.h +6 -6
- data/vendor/faiss/faiss/impl/io_macros.h +8 -9
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +2 -3
- data/vendor/faiss/faiss/impl/kmeans1d.h +2 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +2 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
- data/vendor/faiss/faiss/impl/platform_macros.h +34 -2
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +13 -2
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +20 -2
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +3 -3
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +450 -3
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +8 -8
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +3 -3
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +151 -67
- data/vendor/faiss/faiss/index_factory.cpp +51 -34
- data/vendor/faiss/faiss/index_factory.h +2 -2
- data/vendor/faiss/faiss/index_io.h +14 -7
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +30 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +5 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +11 -3
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -2
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +57 -19
- data/vendor/faiss/faiss/invlists/InvertedLists.h +20 -11
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +2 -2
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +2 -2
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +23 -9
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +4 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +5 -5
- data/vendor/faiss/faiss/python/python_callbacks.h +2 -2
- data/vendor/faiss/faiss/utils/AlignedTable.h +5 -3
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
- data/vendor/faiss/faiss/utils/Heap.h +107 -2
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +346 -0
- data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +2 -2
- data/vendor/faiss/faiss/utils/WorkerThread.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +2 -2
- data/vendor/faiss/faiss/utils/bf16.h +36 -0
- data/vendor/faiss/faiss/utils/distances.cpp +249 -90
- data/vendor/faiss/faiss/utils/distances.h +8 -8
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +2 -2
- data/vendor/faiss/faiss/utils/distances_simd.cpp +1543 -56
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +72 -2
- data/vendor/faiss/faiss/utils/extra_distances.cpp +87 -140
- data/vendor/faiss/faiss/utils/extra_distances.h +5 -4
- data/vendor/faiss/faiss/utils/fp16-arm.h +2 -2
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +2 -2
- data/vendor/faiss/faiss/utils/fp16-inl.h +2 -2
- data/vendor/faiss/faiss/utils/fp16.h +2 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +2 -2
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -4
- data/vendor/faiss/faiss/utils/hamming.h +2 -2
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +2 -2
- data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +490 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +2 -2
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +6 -3
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +7 -3
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +5 -5
- data/vendor/faiss/faiss/utils/ordered_key_value.h +2 -2
- data/vendor/faiss/faiss/utils/partitioning.cpp +2 -2
- data/vendor/faiss/faiss/utils/partitioning.h +2 -2
- data/vendor/faiss/faiss/utils/prefetch.h +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +2 -2
- data/vendor/faiss/faiss/utils/quantize_lut.h +2 -2
- data/vendor/faiss/faiss/utils/random.cpp +45 -2
- data/vendor/faiss/faiss/utils/random.h +27 -2
- data/vendor/faiss/faiss/utils/simdlib.h +12 -3
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +2 -2
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +2 -2
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -4
- data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +2 -2
- data/vendor/faiss/faiss/utils/sorting.h +2 -2
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +2 -2
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
- data/vendor/faiss/faiss/utils/utils.cpp +17 -10
- data/vendor/faiss/faiss/utils/utils.h +7 -3
- metadata +22 -11
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
* Copyright (c)
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
*
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
@@ -17,10 +17,7 @@
|
|
17
17
|
#include <faiss/gpu/GpuIndexFlat.h>
|
18
18
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
19
19
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
20
|
-
#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
|
21
20
|
#include <faiss/gpu/impl/IndexUtils.h>
|
22
|
-
#include <faiss/gpu/utils/DeviceUtils.h>
|
23
|
-
#include <faiss/impl/FaissAssert.h>
|
24
21
|
|
25
22
|
namespace faiss {
|
26
23
|
namespace gpu {
|
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
* Copyright (c)
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
*
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
@@ -14,6 +14,9 @@
|
|
14
14
|
|
15
15
|
#include <faiss/IndexBinaryFlat.h>
|
16
16
|
#include <faiss/IndexFlat.h>
|
17
|
+
#if defined USE_NVIDIA_CUVS
|
18
|
+
#include <faiss/IndexHNSW.h>
|
19
|
+
#endif
|
17
20
|
#include <faiss/IndexIVF.h>
|
18
21
|
#include <faiss/IndexIVFFlat.h>
|
19
22
|
#include <faiss/IndexIVFPQ.h>
|
@@ -24,6 +27,9 @@
|
|
24
27
|
#include <faiss/MetaIndexes.h>
|
25
28
|
#include <faiss/gpu/GpuIndex.h>
|
26
29
|
#include <faiss/gpu/GpuIndexBinaryFlat.h>
|
30
|
+
#if defined USE_NVIDIA_CUVS
|
31
|
+
#include <faiss/gpu/GpuIndexCagra.h>
|
32
|
+
#endif
|
27
33
|
#include <faiss/gpu/GpuIndexFlat.h>
|
28
34
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
29
35
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
@@ -85,7 +91,15 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
|
|
85
91
|
// objective is to make a single component out of them
|
86
92
|
// (inverse op of ToGpuClonerMultiple)
|
87
93
|
|
88
|
-
}
|
94
|
+
}
|
95
|
+
#if defined USE_NVIDIA_CUVS
|
96
|
+
else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
|
97
|
+
IndexHNSWCagra* res = new IndexHNSWCagra();
|
98
|
+
icg->copyTo(res);
|
99
|
+
return res;
|
100
|
+
}
|
101
|
+
#endif
|
102
|
+
else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
|
89
103
|
int nshard = ish->count();
|
90
104
|
FAISS_ASSERT(nshard > 0);
|
91
105
|
Index* res = clone_Index(ish->at(0));
|
@@ -124,7 +138,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
124
138
|
GpuIndexFlatConfig config;
|
125
139
|
config.device = device;
|
126
140
|
config.useFloat16 = useFloat16;
|
127
|
-
config.
|
141
|
+
config.use_cuvs = use_cuvs;
|
128
142
|
return new GpuIndexFlat(provider, ifl, config);
|
129
143
|
} else if (
|
130
144
|
dynamic_cast<const IndexScalarQuantizer*>(index) &&
|
@@ -134,7 +148,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
134
148
|
config.device = device;
|
135
149
|
config.useFloat16 = true;
|
136
150
|
FAISS_THROW_IF_NOT_MSG(
|
137
|
-
!
|
151
|
+
!use_cuvs, "this type of index is not implemented for cuVS");
|
138
152
|
GpuIndexFlat* gif = new GpuIndexFlat(
|
139
153
|
provider, index->d, index->metric_type, config);
|
140
154
|
// transfer data by blocks
|
@@ -152,7 +166,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
152
166
|
config.device = device;
|
153
167
|
config.indicesOptions = indicesOptions;
|
154
168
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
155
|
-
config.
|
169
|
+
config.use_cuvs = use_cuvs;
|
170
|
+
config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer;
|
156
171
|
|
157
172
|
GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
|
158
173
|
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
|
@@ -170,7 +185,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
170
185
|
config.indicesOptions = indicesOptions;
|
171
186
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
172
187
|
FAISS_THROW_IF_NOT_MSG(
|
173
|
-
!
|
188
|
+
!use_cuvs, "this type of index is not implemented for cuVS");
|
174
189
|
|
175
190
|
GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
|
176
191
|
provider,
|
@@ -203,8 +218,9 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
203
218
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
204
219
|
config.useFloat16LookupTables = useFloat16;
|
205
220
|
config.usePrecomputedTables = usePrecomputed;
|
206
|
-
config.
|
207
|
-
config.interleavedLayout =
|
221
|
+
config.use_cuvs = use_cuvs;
|
222
|
+
config.interleavedLayout = use_cuvs;
|
223
|
+
config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer;
|
208
224
|
|
209
225
|
GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
|
210
226
|
|
@@ -213,9 +229,25 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
213
229
|
}
|
214
230
|
|
215
231
|
return res;
|
216
|
-
}
|
217
|
-
|
218
|
-
|
232
|
+
}
|
233
|
+
#if defined USE_NVIDIA_CUVS
|
234
|
+
else if (auto icg = dynamic_cast<const faiss::IndexHNSWCagra*>(index)) {
|
235
|
+
GpuIndexCagraConfig config;
|
236
|
+
config.device = device;
|
237
|
+
GpuIndexCagra* res =
|
238
|
+
new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
|
239
|
+
res->copyFrom(icg);
|
240
|
+
return res;
|
241
|
+
}
|
242
|
+
#endif
|
243
|
+
else {
|
244
|
+
// use CPU cloner for IDMap and PreTransform
|
245
|
+
auto index_idmap = dynamic_cast<const IndexIDMap*>(index);
|
246
|
+
auto index_pt = dynamic_cast<const IndexPreTransform*>(index);
|
247
|
+
if (index_idmap || index_pt) {
|
248
|
+
return Cloner::clone_Index(index);
|
249
|
+
}
|
250
|
+
FAISS_THROW_MSG("This index type is not implemented on GPU.");
|
219
251
|
}
|
220
252
|
}
|
221
253
|
|
@@ -509,7 +541,7 @@ faiss::IndexBinary* index_binary_cpu_to_gpu(
|
|
509
541
|
GpuIndexBinaryFlatConfig config;
|
510
542
|
config.device = device;
|
511
543
|
if (options) {
|
512
|
-
config.
|
544
|
+
config.use_cuvs = options->use_cuvs;
|
513
545
|
}
|
514
546
|
return new GpuIndexBinaryFlat(provider, ii, config);
|
515
547
|
} else {
|
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
* Copyright (c)
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
*
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
@@ -37,12 +37,18 @@ struct GpuClonerOptions {
|
|
37
37
|
/// Set verbose options on the index
|
38
38
|
bool verbose = false;
|
39
39
|
|
40
|
-
/// use the
|
41
|
-
#if defined
|
42
|
-
bool
|
40
|
+
/// use the cuVS implementation
|
41
|
+
#if defined USE_NVIDIA_CUVS
|
42
|
+
bool use_cuvs = true;
|
43
43
|
#else
|
44
|
-
bool
|
44
|
+
bool use_cuvs = false;
|
45
45
|
#endif
|
46
|
+
|
47
|
+
/// This flag controls the CPU fallback logic for coarse quantizer
|
48
|
+
/// component of the index. When set to false (default), the cloner will
|
49
|
+
/// throw an exception for indices not implemented on GPU. When set to
|
50
|
+
/// true, it will fallback to a CPU implementation.
|
51
|
+
bool allowCpuCoarseQuantizer = false;
|
46
52
|
};
|
47
53
|
|
48
54
|
struct GpuMultipleClonerOptions : public GpuClonerOptions {
|
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
* Copyright (c)
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
*
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
@@ -19,6 +19,7 @@ class GpuResourcesProvider;
|
|
19
19
|
enum class DistanceDataType {
|
20
20
|
F32 = 1,
|
21
21
|
F16,
|
22
|
+
BF16,
|
22
23
|
};
|
23
24
|
|
24
25
|
// Scalar type of the indices data
|
@@ -106,14 +107,17 @@ struct GpuDistanceParams {
|
|
106
107
|
/// execution
|
107
108
|
int device = -1;
|
108
109
|
|
109
|
-
/// Should the index dispatch down to
|
110
|
-
|
111
|
-
bool
|
110
|
+
/// Should the index dispatch down to cuVS?
|
111
|
+
#if defined USE_NVIDIA_CUVS
|
112
|
+
bool use_cuvs = true;
|
113
|
+
#else
|
114
|
+
bool use_cuvs = false;
|
115
|
+
#endif
|
112
116
|
};
|
113
117
|
|
114
|
-
/// A function that determines whether
|
118
|
+
/// A function that determines whether cuVS should be used based on various
|
115
119
|
/// conditions (such as unsupported architecture)
|
116
|
-
bool
|
120
|
+
bool should_use_cuvs(GpuDistanceParams args);
|
117
121
|
|
118
122
|
/// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
|
119
123
|
/// neighbor searches on an externally-provided region of memory (e.g., from a
|
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
* Copyright (c)
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
*
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
@@ -15,7 +15,7 @@
|
|
15
15
|
/// Assertions
|
16
16
|
///
|
17
17
|
|
18
|
-
#
|
18
|
+
#if defined(__CUDA_ARCH__) || defined(USE_AMD_ROCM)
|
19
19
|
#define GPU_FAISS_ASSERT(X) assert(X)
|
20
20
|
#define GPU_FAISS_ASSERT_MSG(X, MSG) assert(X)
|
21
21
|
#define GPU_FAISS_ASSERT_FMT(X, FMT, ...) assert(X)
|
@@ -1,3 +1,4 @@
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
1
2
|
/**
|
2
3
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
4
|
*
|
@@ -5,7 +6,7 @@
|
|
5
6
|
* LICENSE file in the root directory of this source tree.
|
6
7
|
*/
|
7
8
|
/*
|
8
|
-
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
|
9
10
|
*
|
10
11
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
12
|
* you may not use this file except in compliance with the License.
|
@@ -37,17 +38,17 @@ struct GpuIndexConfig {
|
|
37
38
|
/// more memory than is available on the GPU.
|
38
39
|
MemorySpace memorySpace = MemorySpace::Device;
|
39
40
|
|
40
|
-
/// Should the index dispatch down to
|
41
|
-
#if defined
|
42
|
-
bool
|
41
|
+
/// Should the index dispatch down to cuVS?
|
42
|
+
#if defined USE_NVIDIA_CUVS
|
43
|
+
bool use_cuvs = true;
|
43
44
|
#else
|
44
|
-
bool
|
45
|
+
bool use_cuvs = false;
|
45
46
|
#endif
|
46
47
|
};
|
47
48
|
|
48
|
-
/// A centralized function that determines whether
|
49
|
+
/// A centralized function that determines whether cuVS should
|
49
50
|
/// be used based on various conditions (such as unsupported architecture)
|
50
|
-
bool
|
51
|
+
bool should_use_cuvs(GpuIndexConfig config_);
|
51
52
|
|
52
53
|
class GpuIndex : public faiss::Index {
|
53
54
|
public:
|
@@ -84,19 +85,14 @@ class GpuIndex : public faiss::Index {
|
|
84
85
|
|
85
86
|
/// `x` and `labels` can be resident on the CPU or any GPU; copies are
|
86
87
|
/// performed as needed
|
87
|
-
void assign(
|
88
|
-
|
89
|
-
const float* x,
|
90
|
-
idx_t* labels,
|
91
|
-
// faiss::Index has idx_t for k
|
92
|
-
idx_t k = 1) const override;
|
88
|
+
void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
|
89
|
+
const override;
|
93
90
|
|
94
91
|
/// `x`, `distances` and `labels` can be resident on the CPU or any
|
95
92
|
/// GPU; copies are performed as needed
|
96
93
|
void search(
|
97
94
|
idx_t n,
|
98
95
|
const float* x,
|
99
|
-
// faiss::Index has idx_t for k
|
100
96
|
idx_t k,
|
101
97
|
float* distances,
|
102
98
|
idx_t* labels,
|
@@ -107,7 +103,6 @@ class GpuIndex : public faiss::Index {
|
|
107
103
|
void search_and_reconstruct(
|
108
104
|
idx_t n,
|
109
105
|
const float* x,
|
110
|
-
// faiss::Index has idx_t for k
|
111
106
|
idx_t k,
|
112
107
|
float* distances,
|
113
108
|
idx_t* labels,
|
@@ -0,0 +1,285 @@
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
2
|
+
/**
|
3
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
/*
|
9
|
+
* Copyright (c) 2024, NVIDIA CORPORATION.
|
10
|
+
*
|
11
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
12
|
+
* you may not use this file except in compliance with the License.
|
13
|
+
* You may obtain a copy of the License at
|
14
|
+
*
|
15
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
16
|
+
*
|
17
|
+
* Unless required by applicable law or agreed to in writing, software
|
18
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
19
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
20
|
+
* See the License for the specific language governing permissions and
|
21
|
+
* limitations under the License.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#pragma once
|
25
|
+
|
26
|
+
#include <faiss/IndexIVF.h>
|
27
|
+
#include <faiss/gpu/GpuIndex.h>
|
28
|
+
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
29
|
+
|
30
|
+
namespace faiss {
|
31
|
+
struct IndexHNSWCagra;
|
32
|
+
}
|
33
|
+
|
34
|
+
namespace faiss {
|
35
|
+
namespace gpu {
|
36
|
+
|
37
|
+
class CuvsCagra;
|
38
|
+
|
39
|
+
enum class graph_build_algo {
|
40
|
+
/// Use IVF-PQ to build all-neighbors knn graph
|
41
|
+
IVF_PQ,
|
42
|
+
/// Use NN-Descent to build all-neighbors knn graph
|
43
|
+
NN_DESCENT
|
44
|
+
};
|
45
|
+
|
46
|
+
/// A type for specifying how PQ codebooks are created.
|
47
|
+
enum class codebook_gen { // NOLINT
|
48
|
+
PER_SUBSPACE = 0, // NOLINT
|
49
|
+
PER_CLUSTER = 1, // NOLINT
|
50
|
+
};
|
51
|
+
|
52
|
+
struct IVFPQBuildCagraConfig {
|
53
|
+
///
|
54
|
+
/// The number of inverted lists (clusters)
|
55
|
+
///
|
56
|
+
/// Hint: the number of vectors per cluster (`n_rows/n_lists`) should be
|
57
|
+
/// approximately 1,000 to 10,000.
|
58
|
+
|
59
|
+
uint32_t n_lists = 1024;
|
60
|
+
/// The number of iterations searching for kmeans centers (index building).
|
61
|
+
uint32_t kmeans_n_iters = 20;
|
62
|
+
/// The fraction of data to use during iterative kmeans building.
|
63
|
+
double kmeans_trainset_fraction = 0.5;
|
64
|
+
///
|
65
|
+
/// The bit length of the vector element after compression by PQ.
|
66
|
+
///
|
67
|
+
/// Possible values: [4, 5, 6, 7, 8].
|
68
|
+
///
|
69
|
+
/// Hint: the smaller the 'pq_bits', the smaller the index size and the
|
70
|
+
/// better the search performance, but the lower the recall.
|
71
|
+
|
72
|
+
uint32_t pq_bits = 8;
|
73
|
+
///
|
74
|
+
/// The dimensionality of the vector after compression by PQ. When zero, an
|
75
|
+
/// optimal value is selected using a heuristic.
|
76
|
+
///
|
77
|
+
/// NB: `pq_dim /// pq_bits` must be a multiple of 8.
|
78
|
+
///
|
79
|
+
/// Hint: a smaller 'pq_dim' results in a smaller index size and better
|
80
|
+
/// search performance, but lower recall. If 'pq_bits' is 8, 'pq_dim' can be
|
81
|
+
/// set to any number, but multiple of 8 are desirable for good performance.
|
82
|
+
/// If 'pq_bits' is not 8, 'pq_dim' should be a multiple of 8. For good
|
83
|
+
/// performance, it is desirable that 'pq_dim' is a multiple of 32. Ideally,
|
84
|
+
/// 'pq_dim' should be also a divisor of the dataset dim.
|
85
|
+
|
86
|
+
uint32_t pq_dim = 0;
|
87
|
+
/// How PQ codebooks are created.
|
88
|
+
codebook_gen codebook_kind = codebook_gen::PER_SUBSPACE;
|
89
|
+
///
|
90
|
+
/// Apply a random rotation matrix on the input data and queries even if
|
91
|
+
/// `dim % pq_dim == 0`.
|
92
|
+
///
|
93
|
+
/// Note: if `dim` is not multiple of `pq_dim`, a random rotation is always
|
94
|
+
/// applied to the input data and queries to transform the working space
|
95
|
+
/// from `dim` to `rot_dim`, which may be slightly larger than the original
|
96
|
+
/// space and and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`).
|
97
|
+
/// However, this transform is not necessary when `dim` is multiple of
|
98
|
+
/// `pq_dim`
|
99
|
+
/// (`dim == rot_dim`, hence no need in adding "extra" data columns /
|
100
|
+
/// features).
|
101
|
+
///
|
102
|
+
/// By default, if `dim == rot_dim`, the rotation transform is initialized
|
103
|
+
/// with the identity matrix. When `force_random_rotation == true`, a random
|
104
|
+
/// orthogonal transform matrix is generated regardless of the values of
|
105
|
+
/// `dim` and `pq_dim`.
|
106
|
+
|
107
|
+
bool force_random_rotation = false;
|
108
|
+
///
|
109
|
+
/// By default, the algorithm allocates more space than necessary for
|
110
|
+
/// individual clusters
|
111
|
+
/// (`list_data`). This allows to amortize the cost of memory allocation and
|
112
|
+
/// reduce the number of data copies during repeated calls to `extend`
|
113
|
+
/// (extending the database).
|
114
|
+
///
|
115
|
+
/// The alternative is the conservative allocation behavior; when enabled,
|
116
|
+
/// the algorithm always allocates the minimum amount of memory required to
|
117
|
+
/// store the given number of records. Set this flag to `true` if you prefer
|
118
|
+
/// to use as little GPU memory for the database as possible.
|
119
|
+
|
120
|
+
bool conservative_memory_allocation = false;
|
121
|
+
};
|
122
|
+
|
123
|
+
struct IVFPQSearchCagraConfig {
|
124
|
+
/// The number of clusters to search.
|
125
|
+
uint32_t n_probes = 20;
|
126
|
+
///
|
127
|
+
/// Data type of look up table to be created dynamically at search time.
|
128
|
+
///
|
129
|
+
/// Possible values: [CUDA_R_32F, CUDA_R_16F, CUDA_R_8U]
|
130
|
+
///
|
131
|
+
/// The use of low-precision types reduces the amount of shared memory
|
132
|
+
/// required at search time, so fast shared memory kernels can be used even
|
133
|
+
/// for datasets with large dimansionality. Note that the recall is slightly
|
134
|
+
/// degraded when low-precision type is selected.
|
135
|
+
|
136
|
+
cudaDataType_t lut_dtype = CUDA_R_32F;
|
137
|
+
///
|
138
|
+
/// Storage data type for distance/similarity computed at search time.
|
139
|
+
///
|
140
|
+
/// Possible values: [CUDA_R_16F, CUDA_R_32F]
|
141
|
+
///
|
142
|
+
/// If the performance limiter at search time is device memory access,
|
143
|
+
/// selecting FP16 will improve performance slightly.
|
144
|
+
|
145
|
+
cudaDataType_t internal_distance_dtype = CUDA_R_32F;
|
146
|
+
///
|
147
|
+
/// Preferred fraction of SM's unified memory / L1 cache to be used as
|
148
|
+
/// shared memory.
|
149
|
+
///
|
150
|
+
/// Possible values: [0.0 - 1.0] as a fraction of the
|
151
|
+
/// `sharedMemPerMultiprocessor`.
|
152
|
+
///
|
153
|
+
/// One wants to increase the carveout to make sure a good GPU occupancy for
|
154
|
+
/// the main search kernel, but not to keep it too high to leave some memory
|
155
|
+
/// to be used as L1 cache. Note, this value is interpreted only as a hint.
|
156
|
+
/// Moreover, a GPU usually allows only a fixed set of cache configurations,
|
157
|
+
/// so the provided value is rounded up to the nearest configuration. Refer
|
158
|
+
/// to the NVIDIA tuning guide for the target GPU architecture.
|
159
|
+
///
|
160
|
+
/// Note, this is a low-level tuning parameter that can have drastic
|
161
|
+
/// negative effects on the search performance if tweaked incorrectly.
|
162
|
+
|
163
|
+
double preferred_shmem_carveout = 1.0;
|
164
|
+
};
|
165
|
+
|
166
|
+
struct GpuIndexCagraConfig : public GpuIndexConfig {
|
167
|
+
/// Degree of input graph for pruning.
|
168
|
+
size_t intermediate_graph_degree = 128;
|
169
|
+
/// Degree of output graph.
|
170
|
+
size_t graph_degree = 64;
|
171
|
+
/// ANN algorithm to build knn graph.
|
172
|
+
graph_build_algo build_algo = graph_build_algo::IVF_PQ;
|
173
|
+
/// Number of Iterations to run if building with NN_DESCENT
|
174
|
+
size_t nn_descent_niter = 20;
|
175
|
+
|
176
|
+
IVFPQBuildCagraConfig* ivf_pq_params = nullptr;
|
177
|
+
IVFPQSearchCagraConfig* ivf_pq_search_params = nullptr;
|
178
|
+
float refine_rate = 2.0f;
|
179
|
+
bool store_dataset = true;
|
180
|
+
};
|
181
|
+
|
182
|
+
enum class search_algo {
|
183
|
+
/// For large batch sizes.
|
184
|
+
SINGLE_CTA,
|
185
|
+
/// For small batch sizes.
|
186
|
+
MULTI_CTA,
|
187
|
+
MULTI_KERNEL,
|
188
|
+
AUTO
|
189
|
+
};
|
190
|
+
|
191
|
+
enum class hash_mode { HASH, SMALL, AUTO };
|
192
|
+
|
193
|
+
struct SearchParametersCagra : SearchParameters {
|
194
|
+
/// Maximum number of queries to search at the same time (batch size). Auto
|
195
|
+
/// select when 0.
|
196
|
+
size_t max_queries = 0;
|
197
|
+
|
198
|
+
/// Number of intermediate search results retained during the search.
|
199
|
+
///
|
200
|
+
/// This is the main knob to adjust trade off between accuracy and search
|
201
|
+
/// speed. Higher values improve the search accuracy.
|
202
|
+
|
203
|
+
size_t itopk_size = 64;
|
204
|
+
|
205
|
+
/// Upper limit of search iterations. Auto select when 0.
|
206
|
+
size_t max_iterations = 0;
|
207
|
+
|
208
|
+
// In the following we list additional search parameters for fine tuning.
|
209
|
+
// Reasonable default values are automatically chosen.
|
210
|
+
|
211
|
+
/// Which search implementation to use.
|
212
|
+
search_algo algo = search_algo::AUTO;
|
213
|
+
|
214
|
+
/// Number of threads used to calculate a single distance. 4, 8, 16, or 32.
|
215
|
+
|
216
|
+
size_t team_size = 0;
|
217
|
+
|
218
|
+
/// Number of graph nodes to select as the starting point for the search in
|
219
|
+
/// each iteration. aka search width?
|
220
|
+
size_t search_width = 1;
|
221
|
+
/// Lower limit of search iterations.
|
222
|
+
size_t min_iterations = 0;
|
223
|
+
|
224
|
+
/// Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.
|
225
|
+
size_t thread_block_size = 0;
|
226
|
+
/// Hashmap type. Auto selection when AUTO.
|
227
|
+
hash_mode hashmap_mode = hash_mode::AUTO;
|
228
|
+
/// Lower limit of hashmap bit length. More than 8.
|
229
|
+
size_t hashmap_min_bitlen = 0;
|
230
|
+
/// Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
|
231
|
+
float hashmap_max_fill_rate = 0.5;
|
232
|
+
|
233
|
+
/// Number of iterations of initial random seed node selection. 1 or more.
|
234
|
+
|
235
|
+
uint32_t num_random_samplings = 1;
|
236
|
+
/// Bit mask used for initial random seed node selection.
|
237
|
+
uint64_t seed = 0x128394;
|
238
|
+
};
|
239
|
+
|
240
|
+
struct GpuIndexCagra : public GpuIndex {
|
241
|
+
public:
|
242
|
+
GpuIndexCagra(
|
243
|
+
GpuResourcesProvider* provider,
|
244
|
+
int dims,
|
245
|
+
faiss::MetricType metric = faiss::METRIC_L2,
|
246
|
+
GpuIndexCagraConfig config = GpuIndexCagraConfig());
|
247
|
+
|
248
|
+
/// Trains CAGRA based on the given vector data
|
249
|
+
void train(idx_t n, const float* x) override;
|
250
|
+
|
251
|
+
/// Initialize ourselves from the given CPU index; will overwrite
|
252
|
+
/// all data in ourselves
|
253
|
+
void copyFrom(const faiss::IndexHNSWCagra* index);
|
254
|
+
|
255
|
+
/// Copy ourselves to the given CPU index; will overwrite all data
|
256
|
+
/// in the index instance
|
257
|
+
void copyTo(faiss::IndexHNSWCagra* index) const;
|
258
|
+
|
259
|
+
void reset() override;
|
260
|
+
|
261
|
+
std::vector<idx_t> get_knngraph() const;
|
262
|
+
|
263
|
+
protected:
|
264
|
+
bool addImplRequiresIDs_() const override;
|
265
|
+
|
266
|
+
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
|
267
|
+
|
268
|
+
/// Called from GpuIndex for search
|
269
|
+
void searchImpl_(
|
270
|
+
idx_t n,
|
271
|
+
const float* x,
|
272
|
+
int k,
|
273
|
+
float* distances,
|
274
|
+
idx_t* labels,
|
275
|
+
const SearchParameters* search_params) const override;
|
276
|
+
|
277
|
+
/// Our configuration options
|
278
|
+
const GpuIndexCagraConfig cagraConfig_;
|
279
|
+
|
280
|
+
/// Instance that we own; contains the inverted lists
|
281
|
+
std::shared_ptr<CuvsCagra> index_;
|
282
|
+
};
|
283
|
+
|
284
|
+
} // namespace gpu
|
285
|
+
} // namespace faiss
|