faiss 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/ext/faiss/index.cpp +13 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +30 -4
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
2
8
|
#ifndef LEVEL2_NEON_INL_H
|
|
3
9
|
#define LEVEL2_NEON_INL_H
|
|
4
10
|
|
|
@@ -1940,9 +1946,15 @@ struct Index2LevelDecoderImpl<
|
|
|
1940
1946
|
} // namespace
|
|
1941
1947
|
|
|
1942
1948
|
// Suitable for IVF256,PQ[1]x8
|
|
1949
|
+
// Subtable for IVF256,PQ[1]x10 (such as IVF256,PQ16x10np)
|
|
1950
|
+
// Subtable for IVF256,PQ[1]x12 (such as IVF256,PQ16x12np)
|
|
1951
|
+
// Suitable for IVF256,PQ[1]x16 (such as IVF256,PQ16x16np)
|
|
1943
1952
|
// Suitable for Residual[1]x8,PQ[2]x8
|
|
1944
|
-
// Suitable for IVF[9-16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
|
|
1945
|
-
// Suitable for
|
|
1953
|
+
// Suitable for IVF[2^9-2^16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
|
|
1954
|
+
// Suitable for IVF[2^9-2^16 bit],PQ[1]x10 (such as IVF1024,PQ16x10np)
|
|
1955
|
+
// Suitable for IVF[2^9-2^16 bit],PQ[1]x12 (such as IVF1024,PQ16x12np)
|
|
1956
|
+
// Suitable for IVF[2^9-2^16 bit],PQ[1]x16 (such as IVF1024,PQ16x16np)
|
|
1957
|
+
// Suitable for Residual[1]x[9-16 bit],PQ[2]x[3] (such as Residual2x9,PQ8)
|
|
1946
1958
|
template <
|
|
1947
1959
|
intptr_t DIM,
|
|
1948
1960
|
intptr_t COARSE_SIZE,
|
|
@@ -1951,11 +1963,13 @@ template <
|
|
|
1951
1963
|
intptr_t FINE_BITS = 8>
|
|
1952
1964
|
struct Index2LevelDecoder {
|
|
1953
1965
|
static_assert(
|
|
1954
|
-
COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS ==
|
|
1955
|
-
|
|
1966
|
+
COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS == 12 ||
|
|
1967
|
+
COARSE_BITS == 16,
|
|
1968
|
+
"Only 8, 10, 12 or 16 bits are currently supported for COARSE_BITS");
|
|
1956
1969
|
static_assert(
|
|
1957
|
-
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS ==
|
|
1958
|
-
|
|
1970
|
+
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
|
|
1971
|
+
FINE_BITS == 16,
|
|
1972
|
+
"Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
|
|
1959
1973
|
|
|
1960
1974
|
static constexpr intptr_t dim = DIM;
|
|
1961
1975
|
static constexpr intptr_t coarseSize = COARSE_SIZE;
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
2
7
|
|
|
3
8
|
#ifndef PQ_AVX2_INL_H
|
|
4
9
|
#define PQ_AVX2_INL_H
|
|
@@ -1488,12 +1493,14 @@ struct IndexPQDecoderImpl<
|
|
|
1488
1493
|
|
|
1489
1494
|
// Suitable for PQ[1]x8
|
|
1490
1495
|
// Suitable for PQ[1]x10
|
|
1496
|
+
// Suitable for PQ[1]x12
|
|
1491
1497
|
// Suitable for PQ[1]x16
|
|
1492
1498
|
template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
|
|
1493
1499
|
struct IndexPQDecoder {
|
|
1494
1500
|
static_assert(
|
|
1495
|
-
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS ==
|
|
1496
|
-
|
|
1501
|
+
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
|
|
1502
|
+
FINE_BITS == 16,
|
|
1503
|
+
"Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
|
|
1497
1504
|
|
|
1498
1505
|
static constexpr intptr_t dim = DIM;
|
|
1499
1506
|
static constexpr intptr_t fineSize = FINE_SIZE;
|
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
2
8
|
#ifndef PQ_INL_H
|
|
3
9
|
#define PQ_INL_H
|
|
4
10
|
|
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
2
8
|
#ifndef PQ_NEON_INL_H
|
|
3
9
|
#define PQ_NEON_INL_H
|
|
4
10
|
|
|
@@ -1322,12 +1328,14 @@ struct IndexPQDecoderImpl<
|
|
|
1322
1328
|
|
|
1323
1329
|
// Suitable for PQ[1]x8
|
|
1324
1330
|
// Suitable for PQ[1]x10
|
|
1331
|
+
// Suitable for PQ[1]x12
|
|
1325
1332
|
// Suitable for PQ[1]x16
|
|
1326
1333
|
template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
|
|
1327
1334
|
struct IndexPQDecoder {
|
|
1328
1335
|
static_assert(
|
|
1329
|
-
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS ==
|
|
1330
|
-
|
|
1336
|
+
FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
|
|
1337
|
+
FINE_BITS == 16,
|
|
1338
|
+
"Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
|
|
1331
1339
|
|
|
1332
1340
|
static constexpr intptr_t dim = DIM;
|
|
1333
1341
|
static constexpr intptr_t fineSize = FINE_SIZE;
|
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
#include <faiss/IndexPreTransform.h>
|
|
12
12
|
#include <faiss/IndexReplicas.h>
|
|
13
13
|
#include <faiss/IndexShards.h>
|
|
14
|
+
#include <faiss/IndexShardsIVF.h>
|
|
15
|
+
|
|
14
16
|
#include <faiss/gpu/GpuIndex.h>
|
|
15
17
|
#include <faiss/gpu/GpuIndexFlat.h>
|
|
16
18
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
|
@@ -33,7 +35,12 @@ using namespace ::faiss;
|
|
|
33
35
|
|
|
34
36
|
void GpuParameterSpace::initialize(const Index* index) {
|
|
35
37
|
if (DC(IndexPreTransform)) {
|
|
36
|
-
|
|
38
|
+
initialize(ix->index);
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
if (DC(IndexShardsIVF)) {
|
|
42
|
+
ParameterSpace::initialize(index);
|
|
43
|
+
return;
|
|
37
44
|
}
|
|
38
45
|
if (DC(IndexReplicas)) {
|
|
39
46
|
if (ix->count() == 0)
|
|
@@ -53,6 +60,14 @@ void GpuParameterSpace::initialize(const Index* index) {
|
|
|
53
60
|
break;
|
|
54
61
|
pr.values.push_back(nprobe);
|
|
55
62
|
}
|
|
63
|
+
|
|
64
|
+
ParameterSpace ivf_pspace;
|
|
65
|
+
ivf_pspace.initialize(ix->quantizer);
|
|
66
|
+
|
|
67
|
+
for (const ParameterRange& p : ivf_pspace.parameter_ranges) {
|
|
68
|
+
ParameterRange& pr = add_range("quantizer_" + p.name);
|
|
69
|
+
pr.values = p.values;
|
|
70
|
+
}
|
|
56
71
|
}
|
|
57
72
|
// not sure we should call the parent initializer
|
|
58
73
|
}
|
|
@@ -72,7 +87,7 @@ void GpuParameterSpace::set_index_parameter(
|
|
|
72
87
|
}
|
|
73
88
|
if (name == "nprobe") {
|
|
74
89
|
if (DC(GpuIndexIVF)) {
|
|
75
|
-
ix->
|
|
90
|
+
ix->nprobe = size_t(val);
|
|
76
91
|
return;
|
|
77
92
|
}
|
|
78
93
|
}
|
|
@@ -83,6 +98,14 @@ void GpuParameterSpace::set_index_parameter(
|
|
|
83
98
|
}
|
|
84
99
|
}
|
|
85
100
|
|
|
101
|
+
if (name.find("quantizer_") == 0) {
|
|
102
|
+
if (DC(GpuIndexIVF)) {
|
|
103
|
+
std::string sub_name = name.substr(strlen("quantizer_"));
|
|
104
|
+
set_index_parameter(ix->quantizer, sub_name, val);
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
86
109
|
// maybe normal index parameters apply?
|
|
87
110
|
ParameterSpace::set_index_parameter(index, name, val);
|
|
88
111
|
}
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
#include <faiss/IndexPreTransform.h>
|
|
19
19
|
#include <faiss/IndexReplicas.h>
|
|
20
20
|
#include <faiss/IndexScalarQuantizer.h>
|
|
21
|
+
#include <faiss/IndexShardsIVF.h>
|
|
21
22
|
#include <faiss/MetaIndexes.h>
|
|
22
23
|
#include <faiss/gpu/GpuIndex.h>
|
|
23
24
|
#include <faiss/gpu/GpuIndexFlat.h>
|
|
@@ -116,7 +117,6 @@ ToGpuCloner::ToGpuCloner(
|
|
|
116
117
|
: GpuClonerOptions(options), provider(prov), device(device) {}
|
|
117
118
|
|
|
118
119
|
Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
119
|
-
using idx_t = Index::idx_t;
|
|
120
120
|
if (auto ifl = dynamic_cast<const IndexFlat*>(index)) {
|
|
121
121
|
GpuIndexFlatConfig config;
|
|
122
122
|
config.device = device;
|
|
@@ -227,8 +227,8 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
|
|
|
227
227
|
std::vector<int>& devices,
|
|
228
228
|
const GpuMultipleClonerOptions& options)
|
|
229
229
|
: GpuMultipleClonerOptions(options) {
|
|
230
|
-
|
|
231
|
-
for (
|
|
230
|
+
FAISS_THROW_IF_NOT(provider.size() == devices.size());
|
|
231
|
+
for (size_t i = 0; i < provider.size(); i++) {
|
|
232
232
|
sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
|
|
233
233
|
}
|
|
234
234
|
}
|
|
@@ -241,28 +241,43 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
|
|
|
241
241
|
void ToGpuClonerMultiple::copy_ivf_shard(
|
|
242
242
|
const IndexIVF* index_ivf,
|
|
243
243
|
IndexIVF* idx2,
|
|
244
|
-
|
|
245
|
-
|
|
244
|
+
idx_t n,
|
|
245
|
+
idx_t i) {
|
|
246
246
|
if (shard_type == 2) {
|
|
247
|
-
|
|
248
|
-
|
|
247
|
+
idx_t i0 = i * index_ivf->ntotal / n;
|
|
248
|
+
idx_t i1 = (i + 1) * index_ivf->ntotal / n;
|
|
249
249
|
|
|
250
250
|
if (verbose)
|
|
251
251
|
printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
|
|
252
|
-
index_ivf->copy_subset_to(
|
|
252
|
+
index_ivf->copy_subset_to(
|
|
253
|
+
*idx2, InvertedLists::SUBSET_TYPE_ID_RANGE, i0, i1);
|
|
253
254
|
FAISS_ASSERT(idx2->ntotal == i1 - i0);
|
|
254
255
|
} else if (shard_type == 1) {
|
|
255
256
|
if (verbose)
|
|
256
257
|
printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
|
|
257
|
-
index_ivf->copy_subset_to(
|
|
258
|
+
index_ivf->copy_subset_to(
|
|
259
|
+
*idx2, InvertedLists::SUBSET_TYPE_ID_MOD, n, i);
|
|
260
|
+
} else if (shard_type == 4) {
|
|
261
|
+
idx_t i0 = i * index_ivf->nlist / n;
|
|
262
|
+
idx_t i1 = (i + 1) * index_ivf->nlist / n;
|
|
263
|
+
if (verbose) {
|
|
264
|
+
printf("IndexShards %ld/%ld select lists %d:%d\n",
|
|
265
|
+
i,
|
|
266
|
+
n,
|
|
267
|
+
int(i0),
|
|
268
|
+
int(i1));
|
|
269
|
+
}
|
|
270
|
+
index_ivf->copy_subset_to(
|
|
271
|
+
*idx2, InvertedLists::SUBSET_TYPE_INVLIST, i0, i1);
|
|
258
272
|
} else {
|
|
259
273
|
FAISS_THROW_FMT("shard_type %d not implemented", shard_type);
|
|
260
274
|
}
|
|
261
275
|
}
|
|
262
276
|
|
|
263
277
|
Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
264
|
-
|
|
278
|
+
idx_t n = sub_cloners.size();
|
|
265
279
|
|
|
280
|
+
auto index_ivf = dynamic_cast<const faiss::IndexIVF*>(index);
|
|
266
281
|
auto index_ivfpq = dynamic_cast<const faiss::IndexIVFPQ*>(index);
|
|
267
282
|
auto index_ivfflat = dynamic_cast<const faiss::IndexIVFFlat*>(index);
|
|
268
283
|
auto index_ivfsq =
|
|
@@ -274,16 +289,36 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
274
289
|
"IndexIVFFlat, IndexIVFScalarQuantizer, "
|
|
275
290
|
"IndexFlat and IndexIVFPQ");
|
|
276
291
|
|
|
292
|
+
// decide what coarse quantizer the sub-indexes are going to have
|
|
293
|
+
const Index* quantizer = nullptr;
|
|
294
|
+
std::unique_ptr<Index> new_quantizer;
|
|
295
|
+
if (index_ivf) {
|
|
296
|
+
quantizer = index_ivf->quantizer;
|
|
297
|
+
if (common_ivf_quantizer &&
|
|
298
|
+
!dynamic_cast<const IndexFlat*>(quantizer)) {
|
|
299
|
+
// then we flatten the coarse quantizer so that everything remains
|
|
300
|
+
// on GPU
|
|
301
|
+
new_quantizer.reset(
|
|
302
|
+
new IndexFlat(quantizer->d, quantizer->metric_type));
|
|
303
|
+
std::vector<float> centroids(quantizer->d * quantizer->ntotal);
|
|
304
|
+
quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
|
|
305
|
+
new_quantizer->add(quantizer->ntotal, centroids.data());
|
|
306
|
+
quantizer = new_quantizer.get();
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
277
310
|
std::vector<faiss::Index*> shards(n);
|
|
278
311
|
|
|
279
|
-
for (
|
|
312
|
+
for (idx_t i = 0; i < n; i++) {
|
|
280
313
|
// make a shallow copy
|
|
281
|
-
if (reserveVecs)
|
|
314
|
+
if (reserveVecs) {
|
|
282
315
|
sub_cloners[i].reserveVecs = (reserveVecs + n - 1) / n;
|
|
283
|
-
|
|
316
|
+
}
|
|
317
|
+
// note: const_casts here are harmless because the indexes build here
|
|
318
|
+
// are short-lived, translated immediately to GPU indexes.
|
|
284
319
|
if (index_ivfpq) {
|
|
285
320
|
faiss::IndexIVFPQ idx2(
|
|
286
|
-
|
|
321
|
+
const_cast<Index*>(quantizer),
|
|
287
322
|
index_ivfpq->d,
|
|
288
323
|
index_ivfpq->nlist,
|
|
289
324
|
index_ivfpq->code_size,
|
|
@@ -297,7 +332,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
297
332
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
|
298
333
|
} else if (index_ivfflat) {
|
|
299
334
|
faiss::IndexIVFFlat idx2(
|
|
300
|
-
|
|
335
|
+
const_cast<Index*>(quantizer),
|
|
301
336
|
index->d,
|
|
302
337
|
index_ivfflat->nlist,
|
|
303
338
|
index_ivfflat->metric_type);
|
|
@@ -307,7 +342,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
307
342
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
|
308
343
|
} else if (index_ivfsq) {
|
|
309
344
|
faiss::IndexIVFScalarQuantizer idx2(
|
|
310
|
-
|
|
345
|
+
const_cast<Index*>(quantizer),
|
|
311
346
|
index->d,
|
|
312
347
|
index_ivfsq->nlist,
|
|
313
348
|
index_ivfsq->sq.qtype,
|
|
@@ -323,40 +358,52 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
323
358
|
faiss::IndexFlat idx2(index->d, index->metric_type);
|
|
324
359
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
|
325
360
|
if (index->ntotal > 0) {
|
|
326
|
-
|
|
327
|
-
|
|
361
|
+
idx_t i0 = index->ntotal * i / n;
|
|
362
|
+
idx_t i1 = index->ntotal * (i + 1) / n;
|
|
328
363
|
shards[i]->add(i1 - i0, index_flat->get_xb() + i0 * index->d);
|
|
329
364
|
}
|
|
330
365
|
}
|
|
331
366
|
}
|
|
332
367
|
|
|
333
368
|
bool successive_ids = index_flat != nullptr;
|
|
334
|
-
faiss::IndexShards* res
|
|
335
|
-
|
|
369
|
+
faiss::IndexShards* res;
|
|
370
|
+
if (common_ivf_quantizer && index_ivf) {
|
|
371
|
+
this->shard = false;
|
|
372
|
+
Index* common_quantizer = clone_Index(index_ivf->quantizer);
|
|
373
|
+
this->shard = true;
|
|
374
|
+
IndexShardsIVF* idx = new faiss::IndexShardsIVF(
|
|
375
|
+
common_quantizer, index_ivf->nlist, true, false);
|
|
376
|
+
idx->own_fields = true;
|
|
377
|
+
idx->own_indices = true;
|
|
378
|
+
res = idx;
|
|
379
|
+
} else {
|
|
380
|
+
res = new faiss::IndexShards(index->d, true, successive_ids);
|
|
381
|
+
res->own_indices = true;
|
|
382
|
+
}
|
|
336
383
|
|
|
337
384
|
for (int i = 0; i < n; i++) {
|
|
338
385
|
res->add_shard(shards[i]);
|
|
339
386
|
}
|
|
340
|
-
res->own_fields = true;
|
|
341
387
|
FAISS_ASSERT(index->ntotal == res->ntotal);
|
|
342
388
|
return res;
|
|
343
389
|
}
|
|
344
390
|
|
|
345
391
|
Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
|
|
346
|
-
|
|
347
|
-
if (n == 1)
|
|
392
|
+
idx_t n = sub_cloners.size();
|
|
393
|
+
if (n == 1) {
|
|
348
394
|
return sub_cloners[0].clone_Index(index);
|
|
395
|
+
}
|
|
349
396
|
|
|
350
397
|
if (dynamic_cast<const IndexFlat*>(index) ||
|
|
351
|
-
dynamic_cast<const
|
|
352
|
-
dynamic_cast<const
|
|
353
|
-
dynamic_cast<const
|
|
398
|
+
dynamic_cast<const IndexIVFFlat*>(index) ||
|
|
399
|
+
dynamic_cast<const IndexIVFScalarQuantizer*>(index) ||
|
|
400
|
+
dynamic_cast<const IndexIVFPQ*>(index)) {
|
|
354
401
|
if (!shard) {
|
|
355
402
|
IndexReplicas* res = new IndexReplicas();
|
|
356
403
|
for (auto& sub_cloner : sub_cloners) {
|
|
357
404
|
res->addIndex(sub_cloner.clone_Index(index));
|
|
358
405
|
}
|
|
359
|
-
res->
|
|
406
|
+
res->own_indices = true;
|
|
360
407
|
return res;
|
|
361
408
|
} else {
|
|
362
409
|
return clone_Index_to_shards(index);
|
|
@@ -373,8 +420,8 @@ Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
|
|
|
373
420
|
for (int m = 0; m < pq.M; m++) {
|
|
374
421
|
// which GPU(s) will be assigned to this sub-quantizer
|
|
375
422
|
|
|
376
|
-
|
|
377
|
-
|
|
423
|
+
idx_t i0 = m * n / pq.M;
|
|
424
|
+
idx_t i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
|
|
378
425
|
std::vector<ToGpuCloner> sub_cloners_2;
|
|
379
426
|
sub_cloners_2.insert(
|
|
380
427
|
sub_cloners_2.begin(),
|
|
@@ -14,41 +14,42 @@ namespace gpu {
|
|
|
14
14
|
|
|
15
15
|
/// set some options on how to copy to GPU
|
|
16
16
|
struct GpuClonerOptions {
|
|
17
|
-
GpuClonerOptions();
|
|
18
|
-
|
|
19
17
|
/// how should indices be stored on index types that support indices
|
|
20
18
|
/// (anything but GpuIndexFlat*)?
|
|
21
|
-
IndicesOptions indicesOptions;
|
|
19
|
+
IndicesOptions indicesOptions = INDICES_64_BIT;
|
|
22
20
|
|
|
23
21
|
/// is the coarse quantizer in float16?
|
|
24
|
-
bool useFloat16CoarseQuantizer;
|
|
22
|
+
bool useFloat16CoarseQuantizer = false;
|
|
25
23
|
|
|
26
24
|
/// for GpuIndexIVFFlat, is storage in float16?
|
|
27
25
|
/// for GpuIndexIVFPQ, are intermediate calculations in float16?
|
|
28
|
-
bool useFloat16;
|
|
26
|
+
bool useFloat16 = false;
|
|
29
27
|
|
|
30
28
|
/// use precomputed tables?
|
|
31
|
-
bool usePrecomputed;
|
|
29
|
+
bool usePrecomputed = false;
|
|
32
30
|
|
|
33
31
|
/// reserve vectors in the invfiles?
|
|
34
|
-
long reserveVecs;
|
|
32
|
+
long reserveVecs = 0;
|
|
35
33
|
|
|
36
34
|
/// For GpuIndexFlat, store data in transposed layout?
|
|
37
|
-
bool storeTransposed;
|
|
35
|
+
bool storeTransposed = false;
|
|
38
36
|
|
|
39
37
|
/// Set verbose options on the index
|
|
40
|
-
bool verbose;
|
|
38
|
+
bool verbose = false;
|
|
41
39
|
};
|
|
42
40
|
|
|
43
41
|
struct GpuMultipleClonerOptions : public GpuClonerOptions {
|
|
44
|
-
GpuMultipleClonerOptions();
|
|
45
|
-
|
|
46
42
|
/// Whether to shard the index across GPUs, versus replication
|
|
47
43
|
/// across GPUs
|
|
48
|
-
bool shard;
|
|
44
|
+
bool shard = false;
|
|
49
45
|
|
|
50
46
|
/// IndexIVF::copy_subset_to subset type
|
|
51
|
-
int shard_type;
|
|
47
|
+
int shard_type = 1;
|
|
48
|
+
|
|
49
|
+
/// set to true if an IndexIVF is to be dispatched to multiple GPUs with a
|
|
50
|
+
/// single common IVF quantizer, ie. only the inverted lists are sharded on
|
|
51
|
+
/// the sub-indexes (uses an IndexShardsIVF)
|
|
52
|
+
bool common_ivf_quantizer = false;
|
|
52
53
|
};
|
|
53
54
|
|
|
54
55
|
} // namespace gpu
|
|
@@ -45,7 +45,8 @@ struct GpuDistanceParams {
|
|
|
45
45
|
outDistances(nullptr),
|
|
46
46
|
ignoreOutDistances(false),
|
|
47
47
|
outIndicesType(IndicesDataType::I64),
|
|
48
|
-
outIndices(nullptr)
|
|
48
|
+
outIndices(nullptr),
|
|
49
|
+
device(-1) {}
|
|
49
50
|
|
|
50
51
|
//
|
|
51
52
|
// Search parameters
|
|
@@ -76,7 +77,7 @@ struct GpuDistanceParams {
|
|
|
76
77
|
const void* vectors;
|
|
77
78
|
DistanceDataType vectorType;
|
|
78
79
|
bool vectorsRowMajor;
|
|
79
|
-
|
|
80
|
+
idx_t numVectors;
|
|
80
81
|
|
|
81
82
|
/// Precomputed L2 norms for each vector in `vectors`, which can be
|
|
82
83
|
/// optionally provided in advance to speed computation for METRIC_L2
|
|
@@ -93,7 +94,7 @@ struct GpuDistanceParams {
|
|
|
93
94
|
const void* queries;
|
|
94
95
|
DistanceDataType queryType;
|
|
95
96
|
bool queriesRowMajor;
|
|
96
|
-
|
|
97
|
+
idx_t numQueries;
|
|
97
98
|
|
|
98
99
|
//
|
|
99
100
|
// Output results
|
|
@@ -112,6 +113,17 @@ struct GpuDistanceParams {
|
|
|
112
113
|
/// innermost (row major). Not used if k == -1 (all pairwise distances)
|
|
113
114
|
IndicesDataType outIndicesType;
|
|
114
115
|
void* outIndices;
|
|
116
|
+
|
|
117
|
+
//
|
|
118
|
+
// Execution information
|
|
119
|
+
//
|
|
120
|
+
|
|
121
|
+
/// On which GPU device should the search run?
|
|
122
|
+
/// -1 indicates that the current CUDA thread-local device
|
|
123
|
+
/// (via cudaGetDevice/cudaSetDevice) is used
|
|
124
|
+
/// Otherwise, an integer 0 <= device < numDevices indicates the device for
|
|
125
|
+
/// execution
|
|
126
|
+
int device;
|
|
115
127
|
};
|
|
116
128
|
|
|
117
129
|
/// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
|
|
@@ -137,13 +149,13 @@ void bruteForceKnn(
|
|
|
137
149
|
// dims x numVectors, with numVectors innermost
|
|
138
150
|
const float* vectors,
|
|
139
151
|
bool vectorsRowMajor,
|
|
140
|
-
|
|
152
|
+
idx_t numVectors,
|
|
141
153
|
// If queriesRowMajor is true, this is
|
|
142
154
|
// numQueries x dims, with dims innermost; otherwise,
|
|
143
155
|
// dims x numQueries, with numQueries innermost
|
|
144
156
|
const float* queries,
|
|
145
157
|
bool queriesRowMajor,
|
|
146
|
-
|
|
158
|
+
idx_t numQueries,
|
|
147
159
|
int dims,
|
|
148
160
|
int k,
|
|
149
161
|
// A region of memory size numQueries x k, with k
|
|
@@ -151,7 +163,7 @@ void bruteForceKnn(
|
|
|
151
163
|
float* outDistances,
|
|
152
164
|
// A region of memory size numQueries x k, with k
|
|
153
165
|
// innermost (row major)
|
|
154
|
-
|
|
166
|
+
idx_t* outIndices);
|
|
155
167
|
|
|
156
168
|
} // namespace gpu
|
|
157
169
|
} // namespace faiss
|
|
@@ -51,30 +51,31 @@ class GpuIndex : public faiss::Index {
|
|
|
51
51
|
/// `x` can be resident on the CPU or any GPU; copies are performed
|
|
52
52
|
/// as needed
|
|
53
53
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
|
54
|
-
void add(
|
|
54
|
+
void add(idx_t, const float* x) override;
|
|
55
55
|
|
|
56
56
|
/// `x` and `ids` can be resident on the CPU or any GPU; copies are
|
|
57
57
|
/// performed as needed
|
|
58
58
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
|
59
|
-
void add_with_ids(
|
|
60
|
-
override;
|
|
59
|
+
void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
|
|
61
60
|
|
|
62
61
|
/// `x` and `labels` can be resident on the CPU or any GPU; copies are
|
|
63
62
|
/// performed as needed
|
|
64
63
|
void assign(
|
|
65
|
-
|
|
64
|
+
idx_t n,
|
|
66
65
|
const float* x,
|
|
67
|
-
|
|
68
|
-
Index
|
|
66
|
+
idx_t* labels,
|
|
67
|
+
// faiss::Index has idx_t for k
|
|
68
|
+
idx_t k = 1) const override;
|
|
69
69
|
|
|
70
70
|
/// `x`, `distances` and `labels` can be resident on the CPU or any
|
|
71
71
|
/// GPU; copies are performed as needed
|
|
72
72
|
void search(
|
|
73
|
-
|
|
73
|
+
idx_t n,
|
|
74
74
|
const float* x,
|
|
75
|
-
Index
|
|
75
|
+
// faiss::Index has idx_t for k
|
|
76
|
+
idx_t k,
|
|
76
77
|
float* distances,
|
|
77
|
-
|
|
78
|
+
idx_t* labels,
|
|
78
79
|
const SearchParameters* params = nullptr) const override;
|
|
79
80
|
|
|
80
81
|
/// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
|
|
@@ -82,6 +83,7 @@ class GpuIndex : public faiss::Index {
|
|
|
82
83
|
void search_and_reconstruct(
|
|
83
84
|
idx_t n,
|
|
84
85
|
const float* x,
|
|
86
|
+
// faiss::Index has idx_t for k
|
|
85
87
|
idx_t k,
|
|
86
88
|
float* distances,
|
|
87
89
|
idx_t* labels,
|
|
@@ -90,16 +92,16 @@ class GpuIndex : public faiss::Index {
|
|
|
90
92
|
|
|
91
93
|
/// Overridden to force GPU indices to provide their own GPU-friendly
|
|
92
94
|
/// implementation
|
|
93
|
-
void compute_residual(const float* x, float* residual,
|
|
95
|
+
void compute_residual(const float* x, float* residual, idx_t key)
|
|
94
96
|
const override;
|
|
95
97
|
|
|
96
98
|
/// Overridden to force GPU indices to provide their own GPU-friendly
|
|
97
99
|
/// implementation
|
|
98
100
|
void compute_residual_n(
|
|
99
|
-
|
|
101
|
+
idx_t n,
|
|
100
102
|
const float* xs,
|
|
101
103
|
float* residuals,
|
|
102
|
-
const
|
|
104
|
+
const idx_t* keys) const override;
|
|
103
105
|
|
|
104
106
|
protected:
|
|
105
107
|
/// Copy what we need from the CPU equivalent
|
|
@@ -114,43 +116,43 @@ class GpuIndex : public faiss::Index {
|
|
|
114
116
|
|
|
115
117
|
/// Overridden to actually perform the add
|
|
116
118
|
/// All data is guaranteed to be resident on our device
|
|
117
|
-
virtual void addImpl_(
|
|
119
|
+
virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
|
|
118
120
|
|
|
119
121
|
/// Overridden to actually perform the search
|
|
120
122
|
/// All data is guaranteed to be resident on our device
|
|
121
123
|
virtual void searchImpl_(
|
|
122
|
-
|
|
124
|
+
idx_t n,
|
|
123
125
|
const float* x,
|
|
124
126
|
int k,
|
|
125
127
|
float* distances,
|
|
126
|
-
|
|
128
|
+
idx_t* labels,
|
|
127
129
|
const SearchParameters* params) const = 0;
|
|
128
130
|
|
|
129
131
|
private:
|
|
130
132
|
/// Handles paged adds if the add set is too large, passes to
|
|
131
133
|
/// addImpl_ to actually perform the add for the current page
|
|
132
|
-
void addPaged_(
|
|
134
|
+
void addPaged_(idx_t n, const float* x, const idx_t* ids);
|
|
133
135
|
|
|
134
136
|
/// Calls addImpl_ for a single page of GPU-resident data
|
|
135
|
-
void addPage_(
|
|
137
|
+
void addPage_(idx_t n, const float* x, const idx_t* ids);
|
|
136
138
|
|
|
137
139
|
/// Calls searchImpl_ for a single page of GPU-resident data
|
|
138
140
|
void searchNonPaged_(
|
|
139
|
-
|
|
141
|
+
idx_t n,
|
|
140
142
|
const float* x,
|
|
141
143
|
int k,
|
|
142
144
|
float* outDistancesData,
|
|
143
|
-
|
|
145
|
+
idx_t* outIndicesData,
|
|
144
146
|
const SearchParameters* params) const;
|
|
145
147
|
|
|
146
148
|
/// Calls searchImpl_ for a single page of GPU-resident data,
|
|
147
149
|
/// handling paging of the data and copies from the CPU
|
|
148
150
|
void searchFromCpuPaged_(
|
|
149
|
-
|
|
151
|
+
idx_t n,
|
|
150
152
|
const float* x,
|
|
151
153
|
int k,
|
|
152
154
|
float* outDistancesData,
|
|
153
|
-
|
|
155
|
+
idx_t* outIndicesData,
|
|
154
156
|
const SearchParameters* params) const;
|
|
155
157
|
|
|
156
158
|
protected:
|