faiss 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +18 -18
- data/README.md +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/Clustering.cpp +318 -53
- data/vendor/faiss/Clustering.h +39 -11
- data/vendor/faiss/DirectMap.cpp +267 -0
- data/vendor/faiss/DirectMap.h +120 -0
- data/vendor/faiss/IVFlib.cpp +24 -4
- data/vendor/faiss/IVFlib.h +4 -0
- data/vendor/faiss/Index.h +5 -24
- data/vendor/faiss/Index2Layer.cpp +0 -1
- data/vendor/faiss/IndexBinary.h +7 -3
- data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
- data/vendor/faiss/IndexBinaryFlat.h +3 -0
- data/vendor/faiss/IndexBinaryHash.cpp +492 -0
- data/vendor/faiss/IndexBinaryHash.h +116 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
- data/vendor/faiss/IndexBinaryIVF.h +14 -4
- data/vendor/faiss/IndexFlat.h +2 -1
- data/vendor/faiss/IndexHNSW.cpp +68 -16
- data/vendor/faiss/IndexHNSW.h +3 -3
- data/vendor/faiss/IndexIVF.cpp +72 -76
- data/vendor/faiss/IndexIVF.h +24 -5
- data/vendor/faiss/IndexIVFFlat.cpp +19 -54
- data/vendor/faiss/IndexIVFFlat.h +1 -11
- data/vendor/faiss/IndexIVFPQ.cpp +49 -26
- data/vendor/faiss/IndexIVFPQ.h +9 -10
- data/vendor/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
- data/vendor/faiss/IndexLSH.h +4 -1
- data/vendor/faiss/IndexPreTransform.cpp +0 -1
- data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
- data/vendor/faiss/InvertedLists.cpp +0 -2
- data/vendor/faiss/MetaIndexes.cpp +0 -1
- data/vendor/faiss/MetricType.h +36 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
- data/vendor/faiss/c_api/Clustering_c.h +11 -5
- data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
- data/vendor/faiss/gpu/GpuDistance.h +93 -0
- data/vendor/faiss/gpu/GpuIndex.h +7 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
- data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
- data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
- data/vendor/faiss/impl/HNSW.cpp +0 -1
- data/vendor/faiss/impl/PolysemousTraining.h +5 -5
- data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
- data/vendor/faiss/impl/ProductQuantizer.h +42 -47
- data/vendor/faiss/impl/index_read.cpp +103 -7
- data/vendor/faiss/impl/index_write.cpp +101 -5
- data/vendor/faiss/impl/io.cpp +111 -1
- data/vendor/faiss/impl/io.h +38 -0
- data/vendor/faiss/index_factory.cpp +0 -1
- data/vendor/faiss/tests/test_merge.cpp +0 -1
- data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
- data/vendor/faiss/utils/distances.cpp +4 -5
- data/vendor/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/utils/hamming.cpp +85 -3
- data/vendor/faiss/utils/hamming.h +20 -0
- data/vendor/faiss/utils/utils.cpp +0 -96
- data/vendor/faiss/utils/utils.h +0 -15
- metadata +11 -3
- data/lib/faiss/ext.bundle +0 -0
@@ -300,6 +300,7 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
|
|
300
300
|
index_ivfflat->quantizer, index->d,
|
301
301
|
index_ivfflat->nlist, index_ivfflat->metric_type);
|
302
302
|
idx2.nprobe = index_ivfflat->nprobe;
|
303
|
+
idx2.is_trained = index->is_trained;
|
303
304
|
copy_ivf_shard (index_ivfflat, &idx2, n, i);
|
304
305
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
305
306
|
} else if (index_ivfsq) {
|
@@ -308,7 +309,10 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
|
|
308
309
|
index_ivfsq->sq.qtype,
|
309
310
|
index_ivfsq->metric_type,
|
310
311
|
index_ivfsq->by_residual);
|
312
|
+
|
311
313
|
idx2.nprobe = index_ivfsq->nprobe;
|
314
|
+
idx2.is_trained = index->is_trained;
|
315
|
+
idx2.sq = index_ivfsq->sq;
|
312
316
|
copy_ivf_shard (index_ivfsq, &idx2, n, i);
|
313
317
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
314
318
|
} else if (index_flat) {
|
@@ -14,6 +14,96 @@ namespace faiss { namespace gpu {
|
|
14
14
|
|
15
15
|
class GpuResources;
|
16
16
|
|
17
|
+
// Scalar type of the vector data
|
18
|
+
enum class DistanceDataType {
|
19
|
+
F32 = 1,
|
20
|
+
F16,
|
21
|
+
};
|
22
|
+
|
23
|
+
/// Arguments to brute-force GPU k-nearest neighbor searching
|
24
|
+
struct GpuDistanceParams {
|
25
|
+
GpuDistanceParams()
|
26
|
+
: metric(faiss::MetricType::METRIC_L2),
|
27
|
+
metricArg(0),
|
28
|
+
k(0),
|
29
|
+
dims(0),
|
30
|
+
vectors(nullptr),
|
31
|
+
vectorType(DistanceDataType::F32),
|
32
|
+
vectorsRowMajor(true),
|
33
|
+
numVectors(0),
|
34
|
+
vectorNorms(nullptr),
|
35
|
+
queries(nullptr),
|
36
|
+
queryType(DistanceDataType::F32),
|
37
|
+
queriesRowMajor(true),
|
38
|
+
numQueries(0),
|
39
|
+
outDistances(nullptr),
|
40
|
+
ignoreOutDistances(false),
|
41
|
+
outIndices(nullptr) {
|
42
|
+
}
|
43
|
+
|
44
|
+
//
|
45
|
+
// Search parameters
|
46
|
+
//
|
47
|
+
|
48
|
+
// Search parameter: distance metric
|
49
|
+
faiss::MetricType metric;
|
50
|
+
|
51
|
+
// Search parameter: distance metric argument (if applicable)
|
52
|
+
// For metric == METRIC_Lp, this is the p-value
|
53
|
+
float metricArg;
|
54
|
+
|
55
|
+
// Search parameter: return k nearest neighbors
|
56
|
+
int k;
|
57
|
+
|
58
|
+
// Vector dimensionality
|
59
|
+
int dims;
|
60
|
+
|
61
|
+
//
|
62
|
+
// Vectors being queried
|
63
|
+
//
|
64
|
+
|
65
|
+
// If vectorsRowMajor is true, this is
|
66
|
+
// numVectors x dims, with dims innermost; otherwise,
|
67
|
+
// dims x numVectors, with numVectors innermost
|
68
|
+
const void* vectors;
|
69
|
+
DistanceDataType vectorType;
|
70
|
+
bool vectorsRowMajor;
|
71
|
+
int numVectors;
|
72
|
+
|
73
|
+
// Precomputed L2 norms for each vector in `vectors`, which can be optionally
|
74
|
+
// provided in advance to speed computation for METRIC_L2
|
75
|
+
const float* vectorNorms;
|
76
|
+
|
77
|
+
//
|
78
|
+
// The query vectors (i.e., find k-nearest neighbors in `vectors` for each of
|
79
|
+
// the `queries`
|
80
|
+
//
|
81
|
+
|
82
|
+
// If queriesRowMajor is true, this is
|
83
|
+
// numQueries x dims, with dims innermost; otherwise,
|
84
|
+
// dims x numQueries, with numQueries innermost
|
85
|
+
const void* queries;
|
86
|
+
DistanceDataType queryType;
|
87
|
+
bool queriesRowMajor;
|
88
|
+
int numQueries;
|
89
|
+
|
90
|
+
//
|
91
|
+
// Output results
|
92
|
+
//
|
93
|
+
|
94
|
+
// A region of memory size numQueries x k, with k
|
95
|
+
// innermost (row major)
|
96
|
+
float* outDistances;
|
97
|
+
|
98
|
+
// Do we only care abouty the indices reported, rather than the output
|
99
|
+
// distances?
|
100
|
+
bool ignoreOutDistances;
|
101
|
+
|
102
|
+
// A region of memory size numQueries x k, with k
|
103
|
+
// innermost (row major)
|
104
|
+
faiss::Index::idx_t* outIndices;
|
105
|
+
};
|
106
|
+
|
17
107
|
/// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
|
18
108
|
/// neighbor searches on an externally-provided region of memory (e.g., from a
|
19
109
|
/// pytorch tensor).
|
@@ -26,6 +116,9 @@ class GpuResources;
|
|
26
116
|
///
|
27
117
|
/// For each vector in `queries`, searches all of `vectors` to find its k
|
28
118
|
/// nearest neighbors with respect to the given metric
|
119
|
+
void bfKnn(GpuResources* resources, const GpuDistanceParams& args);
|
120
|
+
|
121
|
+
/// Deprecated legacy implementation
|
29
122
|
void bruteForceKnn(GpuResources* resources,
|
30
123
|
faiss::MetricType metric,
|
31
124
|
// If vectorsRowMajor is true, this is
|
data/vendor/faiss/gpu/GpuIndex.h
CHANGED
@@ -35,6 +35,7 @@ class GpuIndex : public faiss::Index {
|
|
35
35
|
GpuIndex(GpuResources* resources,
|
36
36
|
int dims,
|
37
37
|
faiss::MetricType metric,
|
38
|
+
float metricArg,
|
38
39
|
GpuIndexConfig config);
|
39
40
|
|
40
41
|
inline int getDevice() const {
|
@@ -86,6 +87,12 @@ class GpuIndex : public faiss::Index {
|
|
86
87
|
const Index::idx_t* keys) const override;
|
87
88
|
|
88
89
|
protected:
|
90
|
+
/// Copy what we need from the CPU equivalent
|
91
|
+
void copyFrom(const faiss::Index* index);
|
92
|
+
|
93
|
+
/// Copy what we have to the CPU equivalent
|
94
|
+
void copyTo(faiss::Index* index) const;
|
95
|
+
|
89
96
|
/// Does addImpl_ require IDs? If so, and no IDs are provided, we will
|
90
97
|
/// generate them sequentially based on the order in which the IDs are added
|
91
98
|
virtual bool addImplRequiresIDs_() const = 0;
|
@@ -25,18 +25,12 @@ struct FlatIndex;
|
|
25
25
|
struct GpuIndexFlatConfig : public GpuIndexConfig {
|
26
26
|
inline GpuIndexFlatConfig()
|
27
27
|
: useFloat16(false),
|
28
|
-
useFloat16Accumulator(false),
|
29
28
|
storeTransposed(false) {
|
30
29
|
}
|
31
30
|
|
32
31
|
/// Whether or not data is stored as float16
|
33
32
|
bool useFloat16;
|
34
33
|
|
35
|
-
/// Whether or not all math is performed in float16, if useFloat16 is
|
36
|
-
/// specified. If true, we use cublasHgemm, supported only on CC
|
37
|
-
/// 5.3+. Otherwise, we use cublasSgemmEx.
|
38
|
-
bool useFloat16Accumulator;
|
39
|
-
|
40
34
|
/// Whether or not data is stored (transparently) in a transposed
|
41
35
|
/// layout, enabling use of the NN GEMM call, which is ~10% faster.
|
42
36
|
/// This will improve the speed of the flat index, but will
|
@@ -124,10 +118,6 @@ class GpuIndexFlat : public GpuIndex {
|
|
124
118
|
float* distances,
|
125
119
|
faiss::Index::idx_t* labels) const override;
|
126
120
|
|
127
|
-
private:
|
128
|
-
/// Checks user settings for consistency
|
129
|
-
void verifySettings_() const;
|
130
|
-
|
131
121
|
protected:
|
132
122
|
/// Our config object
|
133
123
|
const GpuIndexFlatConfig config_;
|
@@ -7,6 +7,7 @@
|
|
7
7
|
|
8
8
|
|
9
9
|
#include <faiss/gpu/StandardGpuResources.h>
|
10
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
10
11
|
#include <faiss/gpu/utils/MemorySpace.h>
|
11
12
|
#include <faiss/impl/FaissAssert.h>
|
12
13
|
#include <limits>
|
@@ -247,6 +248,13 @@ StandardGpuResources::initializeForDevice(int device) {
|
|
247
248
|
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
|
248
249
|
blasHandles_[device] = blasHandle;
|
249
250
|
|
251
|
+
// Enable tensor core support if available
|
252
|
+
#if CUDA_VERSION >= 9000
|
253
|
+
if (getTensorCoreSupport(device)) {
|
254
|
+
cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
|
255
|
+
}
|
256
|
+
#endif
|
257
|
+
|
250
258
|
FAISS_ASSERT(memory_.count(device) == 0);
|
251
259
|
|
252
260
|
auto mem = std::unique_ptr<StackDeviceMemory>(
|
@@ -21,7 +21,8 @@ constexpr float kF32MaxRelErr = 6e-3f;
|
|
21
21
|
|
22
22
|
struct TestFlatOptions {
|
23
23
|
TestFlatOptions()
|
24
|
-
:
|
24
|
+
: metric(faiss::MetricType::METRIC_L2),
|
25
|
+
metricArg(0),
|
25
26
|
useFloat16(false),
|
26
27
|
useTransposed(false),
|
27
28
|
numVecsOverride(-1),
|
@@ -30,7 +31,9 @@ struct TestFlatOptions {
|
|
30
31
|
dimOverride(-1) {
|
31
32
|
}
|
32
33
|
|
33
|
-
|
34
|
+
faiss::MetricType metric;
|
35
|
+
float metricArg;
|
36
|
+
|
34
37
|
bool useFloat16;
|
35
38
|
bool useTransposed;
|
36
39
|
int numVecsOverride;
|
@@ -41,7 +44,7 @@ struct TestFlatOptions {
|
|
41
44
|
|
42
45
|
void testFlat(const TestFlatOptions& opt) {
|
43
46
|
int numVecs = opt.numVecsOverride > 0 ?
|
44
|
-
opt.numVecsOverride : faiss::gpu::randVal(1000,
|
47
|
+
opt.numVecsOverride : faiss::gpu::randVal(1000, 5000);
|
45
48
|
int dim = opt.dimOverride > 0 ?
|
46
49
|
opt.dimOverride : faiss::gpu::randVal(50, 800);
|
47
50
|
int numQuery = opt.numQueriesOverride > 0 ?
|
@@ -57,12 +60,8 @@ void testFlat(const TestFlatOptions& opt) {
|
|
57
60
|
k = opt.kOverride;
|
58
61
|
}
|
59
62
|
|
60
|
-
faiss::
|
61
|
-
|
62
|
-
|
63
|
-
faiss::IndexFlat* cpuIndex =
|
64
|
-
opt.useL2 ? (faiss::IndexFlat*) &cpuIndexL2 :
|
65
|
-
(faiss::IndexFlat*) &cpuIndexIP;
|
63
|
+
faiss::IndexFlat cpuIndex(dim, opt.metric);
|
64
|
+
cpuIndex.metric_arg = opt.metricArg;
|
66
65
|
|
67
66
|
// Construct on a random device to test multi-device, if we have
|
68
67
|
// multiple devices
|
@@ -71,25 +70,22 @@ void testFlat(const TestFlatOptions& opt) {
|
|
71
70
|
faiss::gpu::StandardGpuResources res;
|
72
71
|
res.noTempMemory();
|
73
72
|
|
74
|
-
|
75
73
|
faiss::gpu::GpuIndexFlatConfig config;
|
76
74
|
config.device = device;
|
77
75
|
config.useFloat16 = opt.useFloat16;
|
78
76
|
config.storeTransposed = opt.useTransposed;
|
79
77
|
|
80
|
-
faiss::gpu::
|
81
|
-
|
82
|
-
|
83
|
-
faiss::gpu::GpuIndexFlat* gpuIndex =
|
84
|
-
opt.useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
|
85
|
-
(faiss::gpu::GpuIndexFlat*) &gpuIndexIP;
|
78
|
+
faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
|
79
|
+
gpuIndex.metric_arg = opt.metricArg;
|
86
80
|
|
87
81
|
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
|
88
|
-
cpuIndex
|
89
|
-
gpuIndex
|
82
|
+
cpuIndex.add(numVecs, vecs.data());
|
83
|
+
gpuIndex.add(numVecs, vecs.data());
|
90
84
|
|
91
85
|
std::stringstream str;
|
92
|
-
str <<
|
86
|
+
str << "metric " << opt.metric
|
87
|
+
<< " marg " << opt.metricArg
|
88
|
+
<< " numVecs " << numVecs
|
93
89
|
<< " dim " << dim
|
94
90
|
<< " useFloat16 " << opt.useFloat16
|
95
91
|
<< " transposed " << opt.useTransposed
|
@@ -98,7 +94,7 @@ void testFlat(const TestFlatOptions& opt) {
|
|
98
94
|
|
99
95
|
// To some extent, we depend upon the relative error for the test
|
100
96
|
// for float16
|
101
|
-
faiss::gpu::compareIndices(
|
97
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex, numQuery, dim, k, str.str(),
|
102
98
|
opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
103
99
|
// FIXME: the fp16 bounds are
|
104
100
|
// useless when math (the accumulator) is
|
@@ -110,7 +106,7 @@ void testFlat(const TestFlatOptions& opt) {
|
|
110
106
|
TEST(TestGpuIndexFlat, IP_Float32) {
|
111
107
|
for (int tries = 0; tries < 3; ++tries) {
|
112
108
|
TestFlatOptions opt;
|
113
|
-
opt.
|
109
|
+
opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
|
114
110
|
opt.useFloat16 = false;
|
115
111
|
opt.useTransposed = false;
|
116
112
|
|
@@ -121,10 +117,36 @@ TEST(TestGpuIndexFlat, IP_Float32) {
|
|
121
117
|
}
|
122
118
|
}
|
123
119
|
|
120
|
+
TEST(TestGpuIndexFlat, L1_Float32) {
|
121
|
+
TestFlatOptions opt;
|
122
|
+
opt.metric = faiss::MetricType::METRIC_L1;
|
123
|
+
opt.useFloat16 = false;
|
124
|
+
opt.useTransposed = false;
|
125
|
+
|
126
|
+
testFlat(opt);
|
127
|
+
|
128
|
+
opt.useTransposed = true;
|
129
|
+
testFlat(opt);
|
130
|
+
}
|
131
|
+
|
132
|
+
TEST(TestGpuIndexFlat, Lp_Float32) {
|
133
|
+
TestFlatOptions opt;
|
134
|
+
opt.metric = faiss::MetricType::METRIC_Lp;
|
135
|
+
opt.metricArg = 5;
|
136
|
+
opt.useFloat16 = false;
|
137
|
+
opt.useTransposed = false;
|
138
|
+
|
139
|
+
testFlat(opt);
|
140
|
+
|
141
|
+
// Don't bother testing the transposed version, the L1 test should be good
|
142
|
+
// enough for that
|
143
|
+
}
|
144
|
+
|
124
145
|
TEST(TestGpuIndexFlat, L2_Float32) {
|
125
146
|
for (int tries = 0; tries < 3; ++tries) {
|
126
147
|
TestFlatOptions opt;
|
127
|
-
opt.
|
148
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
149
|
+
|
128
150
|
opt.useFloat16 = false;
|
129
151
|
opt.useTransposed = false;
|
130
152
|
|
@@ -139,7 +161,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
|
|
139
161
|
TEST(TestGpuIndexFlat, L2_Float32_K1) {
|
140
162
|
for (int tries = 0; tries < 3; ++tries) {
|
141
163
|
TestFlatOptions opt;
|
142
|
-
opt.
|
164
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
143
165
|
opt.useFloat16 = false;
|
144
166
|
opt.useTransposed = false;
|
145
167
|
opt.kOverride = 1;
|
@@ -151,7 +173,7 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
|
|
151
173
|
TEST(TestGpuIndexFlat, IP_Float16) {
|
152
174
|
for (int tries = 0; tries < 3; ++tries) {
|
153
175
|
TestFlatOptions opt;
|
154
|
-
opt.
|
176
|
+
opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
|
155
177
|
opt.useFloat16 = true;
|
156
178
|
opt.useTransposed = false;
|
157
179
|
|
@@ -165,7 +187,7 @@ TEST(TestGpuIndexFlat, IP_Float16) {
|
|
165
187
|
TEST(TestGpuIndexFlat, L2_Float16) {
|
166
188
|
for (int tries = 0; tries < 3; ++tries) {
|
167
189
|
TestFlatOptions opt;
|
168
|
-
opt.
|
190
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
169
191
|
opt.useFloat16 = true;
|
170
192
|
opt.useTransposed = false;
|
171
193
|
|
@@ -180,7 +202,7 @@ TEST(TestGpuIndexFlat, L2_Float16) {
|
|
180
202
|
TEST(TestGpuIndexFlat, L2_Float16_K1) {
|
181
203
|
for (int tries = 0; tries < 3; ++tries) {
|
182
204
|
TestFlatOptions opt;
|
183
|
-
opt.
|
205
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
184
206
|
opt.useFloat16 = true;
|
185
207
|
opt.useTransposed = false;
|
186
208
|
opt.kOverride = 1;
|
@@ -193,7 +215,7 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
|
|
193
215
|
TEST(TestGpuIndexFlat, L2_Tiling) {
|
194
216
|
for (int tries = 0; tries < 2; ++tries) {
|
195
217
|
TestFlatOptions opt;
|
196
|
-
opt.
|
218
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
197
219
|
opt.useFloat16 = false;
|
198
220
|
opt.useTransposed = false;
|
199
221
|
opt.numVecsOverride = 1000000;
|
@@ -117,7 +117,7 @@ struct Options {
|
|
117
117
|
int device;
|
118
118
|
};
|
119
119
|
|
120
|
-
TEST(TestGpuIndexIVFPQ,
|
120
|
+
TEST(TestGpuIndexIVFPQ, Query_L2) {
|
121
121
|
for (int tries = 0; tries < 2; ++tries) {
|
122
122
|
Options opt;
|
123
123
|
|
@@ -151,7 +151,78 @@ TEST(TestGpuIndexIVFPQ, Query) {
|
|
151
151
|
}
|
152
152
|
}
|
153
153
|
|
154
|
-
TEST(TestGpuIndexIVFPQ,
|
154
|
+
TEST(TestGpuIndexIVFPQ, Query_IP) {
|
155
|
+
for (int tries = 0; tries < 2; ++tries) {
|
156
|
+
Options opt;
|
157
|
+
|
158
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
159
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
160
|
+
|
161
|
+
faiss::IndexFlatIP coarseQuantizer(opt.dim);
|
162
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
163
|
+
opt.codes, opt.bitsPerCode);
|
164
|
+
cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
|
165
|
+
|
166
|
+
cpuIndex.nprobe = opt.nprobe;
|
167
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
168
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
169
|
+
|
170
|
+
faiss::gpu::StandardGpuResources res;
|
171
|
+
res.noTempMemory();
|
172
|
+
|
173
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
174
|
+
config.device = opt.device;
|
175
|
+
config.usePrecomputedTables = false; // not supported/required for IP
|
176
|
+
config.indicesOptions = opt.indicesOpt;
|
177
|
+
config.useFloat16LookupTables = opt.useFloat16;
|
178
|
+
|
179
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
180
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
181
|
+
|
182
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
183
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
184
|
+
opt.getCompareEpsilon(),
|
185
|
+
opt.getPctMaxDiff1(),
|
186
|
+
opt.getPctMaxDiffN());
|
187
|
+
}
|
188
|
+
}
|
189
|
+
|
190
|
+
TEST(TestGpuIndexIVFPQ, Float16Coarse) {
|
191
|
+
Options opt;
|
192
|
+
|
193
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
194
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
195
|
+
|
196
|
+
faiss::IndexFlatL2 coarseQuantizer(opt.dim);
|
197
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
198
|
+
opt.codes, opt.bitsPerCode);
|
199
|
+
cpuIndex.nprobe = opt.nprobe;
|
200
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
201
|
+
|
202
|
+
faiss::gpu::StandardGpuResources res;
|
203
|
+
res.noTempMemory();
|
204
|
+
|
205
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
206
|
+
config.device = opt.device;
|
207
|
+
config.flatConfig.useFloat16 = true;
|
208
|
+
config.usePrecomputedTables = opt.usePrecomputed;
|
209
|
+
config.indicesOptions = opt.indicesOpt;
|
210
|
+
config.useFloat16LookupTables = opt.useFloat16;
|
211
|
+
|
212
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
213
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
214
|
+
|
215
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
216
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
217
|
+
|
218
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
219
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
220
|
+
opt.getCompareEpsilon(),
|
221
|
+
opt.getPctMaxDiff1(),
|
222
|
+
opt.getPctMaxDiffN());
|
223
|
+
}
|
224
|
+
|
225
|
+
TEST(TestGpuIndexIVFPQ, Add_L2) {
|
155
226
|
for (int tries = 0; tries < 2; ++tries) {
|
156
227
|
Options opt;
|
157
228
|
|
@@ -187,6 +258,43 @@ TEST(TestGpuIndexIVFPQ, Add) {
|
|
187
258
|
}
|
188
259
|
}
|
189
260
|
|
261
|
+
TEST(TestGpuIndexIVFPQ, Add_IP) {
|
262
|
+
for (int tries = 0; tries < 2; ++tries) {
|
263
|
+
Options opt;
|
264
|
+
|
265
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
266
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
267
|
+
|
268
|
+
faiss::IndexFlatIP coarseQuantizer(opt.dim);
|
269
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
270
|
+
opt.codes, opt.bitsPerCode);
|
271
|
+
cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
|
272
|
+
cpuIndex.nprobe = opt.nprobe;
|
273
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
274
|
+
|
275
|
+
faiss::gpu::StandardGpuResources res;
|
276
|
+
res.noTempMemory();
|
277
|
+
|
278
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
279
|
+
config.device = opt.device;
|
280
|
+
config.usePrecomputedTables = opt.usePrecomputed;
|
281
|
+
config.indicesOptions = opt.indicesOpt;
|
282
|
+
config.useFloat16LookupTables = opt.useFloat16;
|
283
|
+
|
284
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
285
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
286
|
+
|
287
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
288
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
289
|
+
|
290
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
291
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
292
|
+
opt.getCompareEpsilon(),
|
293
|
+
opt.getPctMaxDiff1(),
|
294
|
+
opt.getPctMaxDiffN());
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
190
298
|
TEST(TestGpuIndexIVFPQ, CopyTo) {
|
191
299
|
Options opt;
|
192
300
|
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|