faiss 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +18 -18
- data/README.md +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/Clustering.cpp +318 -53
- data/vendor/faiss/Clustering.h +39 -11
- data/vendor/faiss/DirectMap.cpp +267 -0
- data/vendor/faiss/DirectMap.h +120 -0
- data/vendor/faiss/IVFlib.cpp +24 -4
- data/vendor/faiss/IVFlib.h +4 -0
- data/vendor/faiss/Index.h +5 -24
- data/vendor/faiss/Index2Layer.cpp +0 -1
- data/vendor/faiss/IndexBinary.h +7 -3
- data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
- data/vendor/faiss/IndexBinaryFlat.h +3 -0
- data/vendor/faiss/IndexBinaryHash.cpp +492 -0
- data/vendor/faiss/IndexBinaryHash.h +116 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
- data/vendor/faiss/IndexBinaryIVF.h +14 -4
- data/vendor/faiss/IndexFlat.h +2 -1
- data/vendor/faiss/IndexHNSW.cpp +68 -16
- data/vendor/faiss/IndexHNSW.h +3 -3
- data/vendor/faiss/IndexIVF.cpp +72 -76
- data/vendor/faiss/IndexIVF.h +24 -5
- data/vendor/faiss/IndexIVFFlat.cpp +19 -54
- data/vendor/faiss/IndexIVFFlat.h +1 -11
- data/vendor/faiss/IndexIVFPQ.cpp +49 -26
- data/vendor/faiss/IndexIVFPQ.h +9 -10
- data/vendor/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
- data/vendor/faiss/IndexLSH.h +4 -1
- data/vendor/faiss/IndexPreTransform.cpp +0 -1
- data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
- data/vendor/faiss/InvertedLists.cpp +0 -2
- data/vendor/faiss/MetaIndexes.cpp +0 -1
- data/vendor/faiss/MetricType.h +36 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
- data/vendor/faiss/c_api/Clustering_c.h +11 -5
- data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
- data/vendor/faiss/gpu/GpuDistance.h +93 -0
- data/vendor/faiss/gpu/GpuIndex.h +7 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
- data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
- data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
- data/vendor/faiss/impl/HNSW.cpp +0 -1
- data/vendor/faiss/impl/PolysemousTraining.h +5 -5
- data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
- data/vendor/faiss/impl/ProductQuantizer.h +42 -47
- data/vendor/faiss/impl/index_read.cpp +103 -7
- data/vendor/faiss/impl/index_write.cpp +101 -5
- data/vendor/faiss/impl/io.cpp +111 -1
- data/vendor/faiss/impl/io.h +38 -0
- data/vendor/faiss/index_factory.cpp +0 -1
- data/vendor/faiss/tests/test_merge.cpp +0 -1
- data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
- data/vendor/faiss/utils/distances.cpp +4 -5
- data/vendor/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/utils/hamming.cpp +85 -3
- data/vendor/faiss/utils/hamming.h +20 -0
- data/vendor/faiss/utils/utils.cpp +0 -96
- data/vendor/faiss/utils/utils.h +0 -15
- metadata +11 -3
- data/lib/faiss/ext.bundle +0 -0
@@ -300,6 +300,7 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
|
|
300
300
|
index_ivfflat->quantizer, index->d,
|
301
301
|
index_ivfflat->nlist, index_ivfflat->metric_type);
|
302
302
|
idx2.nprobe = index_ivfflat->nprobe;
|
303
|
+
idx2.is_trained = index->is_trained;
|
303
304
|
copy_ivf_shard (index_ivfflat, &idx2, n, i);
|
304
305
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
305
306
|
} else if (index_ivfsq) {
|
@@ -308,7 +309,10 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
|
|
308
309
|
index_ivfsq->sq.qtype,
|
309
310
|
index_ivfsq->metric_type,
|
310
311
|
index_ivfsq->by_residual);
|
312
|
+
|
311
313
|
idx2.nprobe = index_ivfsq->nprobe;
|
314
|
+
idx2.is_trained = index->is_trained;
|
315
|
+
idx2.sq = index_ivfsq->sq;
|
312
316
|
copy_ivf_shard (index_ivfsq, &idx2, n, i);
|
313
317
|
shards[i] = sub_cloners[i].clone_Index(&idx2);
|
314
318
|
} else if (index_flat) {
|
@@ -14,6 +14,96 @@ namespace faiss { namespace gpu {
|
|
14
14
|
|
15
15
|
class GpuResources;
|
16
16
|
|
17
|
+
// Scalar type of the vector data
|
18
|
+
enum class DistanceDataType {
|
19
|
+
F32 = 1,
|
20
|
+
F16,
|
21
|
+
};
|
22
|
+
|
23
|
+
/// Arguments to brute-force GPU k-nearest neighbor searching
|
24
|
+
struct GpuDistanceParams {
|
25
|
+
GpuDistanceParams()
|
26
|
+
: metric(faiss::MetricType::METRIC_L2),
|
27
|
+
metricArg(0),
|
28
|
+
k(0),
|
29
|
+
dims(0),
|
30
|
+
vectors(nullptr),
|
31
|
+
vectorType(DistanceDataType::F32),
|
32
|
+
vectorsRowMajor(true),
|
33
|
+
numVectors(0),
|
34
|
+
vectorNorms(nullptr),
|
35
|
+
queries(nullptr),
|
36
|
+
queryType(DistanceDataType::F32),
|
37
|
+
queriesRowMajor(true),
|
38
|
+
numQueries(0),
|
39
|
+
outDistances(nullptr),
|
40
|
+
ignoreOutDistances(false),
|
41
|
+
outIndices(nullptr) {
|
42
|
+
}
|
43
|
+
|
44
|
+
//
|
45
|
+
// Search parameters
|
46
|
+
//
|
47
|
+
|
48
|
+
// Search parameter: distance metric
|
49
|
+
faiss::MetricType metric;
|
50
|
+
|
51
|
+
// Search parameter: distance metric argument (if applicable)
|
52
|
+
// For metric == METRIC_Lp, this is the p-value
|
53
|
+
float metricArg;
|
54
|
+
|
55
|
+
// Search parameter: return k nearest neighbors
|
56
|
+
int k;
|
57
|
+
|
58
|
+
// Vector dimensionality
|
59
|
+
int dims;
|
60
|
+
|
61
|
+
//
|
62
|
+
// Vectors being queried
|
63
|
+
//
|
64
|
+
|
65
|
+
// If vectorsRowMajor is true, this is
|
66
|
+
// numVectors x dims, with dims innermost; otherwise,
|
67
|
+
// dims x numVectors, with numVectors innermost
|
68
|
+
const void* vectors;
|
69
|
+
DistanceDataType vectorType;
|
70
|
+
bool vectorsRowMajor;
|
71
|
+
int numVectors;
|
72
|
+
|
73
|
+
// Precomputed L2 norms for each vector in `vectors`, which can be optionally
|
74
|
+
// provided in advance to speed computation for METRIC_L2
|
75
|
+
const float* vectorNorms;
|
76
|
+
|
77
|
+
//
|
78
|
+
// The query vectors (i.e., find k-nearest neighbors in `vectors` for each of
|
79
|
+
// the `queries`
|
80
|
+
//
|
81
|
+
|
82
|
+
// If queriesRowMajor is true, this is
|
83
|
+
// numQueries x dims, with dims innermost; otherwise,
|
84
|
+
// dims x numQueries, with numQueries innermost
|
85
|
+
const void* queries;
|
86
|
+
DistanceDataType queryType;
|
87
|
+
bool queriesRowMajor;
|
88
|
+
int numQueries;
|
89
|
+
|
90
|
+
//
|
91
|
+
// Output results
|
92
|
+
//
|
93
|
+
|
94
|
+
// A region of memory size numQueries x k, with k
|
95
|
+
// innermost (row major)
|
96
|
+
float* outDistances;
|
97
|
+
|
98
|
+
// Do we only care abouty the indices reported, rather than the output
|
99
|
+
// distances?
|
100
|
+
bool ignoreOutDistances;
|
101
|
+
|
102
|
+
// A region of memory size numQueries x k, with k
|
103
|
+
// innermost (row major)
|
104
|
+
faiss::Index::idx_t* outIndices;
|
105
|
+
};
|
106
|
+
|
17
107
|
/// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
|
18
108
|
/// neighbor searches on an externally-provided region of memory (e.g., from a
|
19
109
|
/// pytorch tensor).
|
@@ -26,6 +116,9 @@ class GpuResources;
|
|
26
116
|
///
|
27
117
|
/// For each vector in `queries`, searches all of `vectors` to find its k
|
28
118
|
/// nearest neighbors with respect to the given metric
|
119
|
+
void bfKnn(GpuResources* resources, const GpuDistanceParams& args);
|
120
|
+
|
121
|
+
/// Deprecated legacy implementation
|
29
122
|
void bruteForceKnn(GpuResources* resources,
|
30
123
|
faiss::MetricType metric,
|
31
124
|
// If vectorsRowMajor is true, this is
|
data/vendor/faiss/gpu/GpuIndex.h
CHANGED
@@ -35,6 +35,7 @@ class GpuIndex : public faiss::Index {
|
|
35
35
|
GpuIndex(GpuResources* resources,
|
36
36
|
int dims,
|
37
37
|
faiss::MetricType metric,
|
38
|
+
float metricArg,
|
38
39
|
GpuIndexConfig config);
|
39
40
|
|
40
41
|
inline int getDevice() const {
|
@@ -86,6 +87,12 @@ class GpuIndex : public faiss::Index {
|
|
86
87
|
const Index::idx_t* keys) const override;
|
87
88
|
|
88
89
|
protected:
|
90
|
+
/// Copy what we need from the CPU equivalent
|
91
|
+
void copyFrom(const faiss::Index* index);
|
92
|
+
|
93
|
+
/// Copy what we have to the CPU equivalent
|
94
|
+
void copyTo(faiss::Index* index) const;
|
95
|
+
|
89
96
|
/// Does addImpl_ require IDs? If so, and no IDs are provided, we will
|
90
97
|
/// generate them sequentially based on the order in which the IDs are added
|
91
98
|
virtual bool addImplRequiresIDs_() const = 0;
|
@@ -25,18 +25,12 @@ struct FlatIndex;
|
|
25
25
|
struct GpuIndexFlatConfig : public GpuIndexConfig {
|
26
26
|
inline GpuIndexFlatConfig()
|
27
27
|
: useFloat16(false),
|
28
|
-
useFloat16Accumulator(false),
|
29
28
|
storeTransposed(false) {
|
30
29
|
}
|
31
30
|
|
32
31
|
/// Whether or not data is stored as float16
|
33
32
|
bool useFloat16;
|
34
33
|
|
35
|
-
/// Whether or not all math is performed in float16, if useFloat16 is
|
36
|
-
/// specified. If true, we use cublasHgemm, supported only on CC
|
37
|
-
/// 5.3+. Otherwise, we use cublasSgemmEx.
|
38
|
-
bool useFloat16Accumulator;
|
39
|
-
|
40
34
|
/// Whether or not data is stored (transparently) in a transposed
|
41
35
|
/// layout, enabling use of the NN GEMM call, which is ~10% faster.
|
42
36
|
/// This will improve the speed of the flat index, but will
|
@@ -124,10 +118,6 @@ class GpuIndexFlat : public GpuIndex {
|
|
124
118
|
float* distances,
|
125
119
|
faiss::Index::idx_t* labels) const override;
|
126
120
|
|
127
|
-
private:
|
128
|
-
/// Checks user settings for consistency
|
129
|
-
void verifySettings_() const;
|
130
|
-
|
131
121
|
protected:
|
132
122
|
/// Our config object
|
133
123
|
const GpuIndexFlatConfig config_;
|
@@ -7,6 +7,7 @@
|
|
7
7
|
|
8
8
|
|
9
9
|
#include <faiss/gpu/StandardGpuResources.h>
|
10
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
10
11
|
#include <faiss/gpu/utils/MemorySpace.h>
|
11
12
|
#include <faiss/impl/FaissAssert.h>
|
12
13
|
#include <limits>
|
@@ -247,6 +248,13 @@ StandardGpuResources::initializeForDevice(int device) {
|
|
247
248
|
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
|
248
249
|
blasHandles_[device] = blasHandle;
|
249
250
|
|
251
|
+
// Enable tensor core support if available
|
252
|
+
#if CUDA_VERSION >= 9000
|
253
|
+
if (getTensorCoreSupport(device)) {
|
254
|
+
cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
|
255
|
+
}
|
256
|
+
#endif
|
257
|
+
|
250
258
|
FAISS_ASSERT(memory_.count(device) == 0);
|
251
259
|
|
252
260
|
auto mem = std::unique_ptr<StackDeviceMemory>(
|
@@ -21,7 +21,8 @@ constexpr float kF32MaxRelErr = 6e-3f;
|
|
21
21
|
|
22
22
|
struct TestFlatOptions {
|
23
23
|
TestFlatOptions()
|
24
|
-
:
|
24
|
+
: metric(faiss::MetricType::METRIC_L2),
|
25
|
+
metricArg(0),
|
25
26
|
useFloat16(false),
|
26
27
|
useTransposed(false),
|
27
28
|
numVecsOverride(-1),
|
@@ -30,7 +31,9 @@ struct TestFlatOptions {
|
|
30
31
|
dimOverride(-1) {
|
31
32
|
}
|
32
33
|
|
33
|
-
|
34
|
+
faiss::MetricType metric;
|
35
|
+
float metricArg;
|
36
|
+
|
34
37
|
bool useFloat16;
|
35
38
|
bool useTransposed;
|
36
39
|
int numVecsOverride;
|
@@ -41,7 +44,7 @@ struct TestFlatOptions {
|
|
41
44
|
|
42
45
|
void testFlat(const TestFlatOptions& opt) {
|
43
46
|
int numVecs = opt.numVecsOverride > 0 ?
|
44
|
-
opt.numVecsOverride : faiss::gpu::randVal(1000,
|
47
|
+
opt.numVecsOverride : faiss::gpu::randVal(1000, 5000);
|
45
48
|
int dim = opt.dimOverride > 0 ?
|
46
49
|
opt.dimOverride : faiss::gpu::randVal(50, 800);
|
47
50
|
int numQuery = opt.numQueriesOverride > 0 ?
|
@@ -57,12 +60,8 @@ void testFlat(const TestFlatOptions& opt) {
|
|
57
60
|
k = opt.kOverride;
|
58
61
|
}
|
59
62
|
|
60
|
-
faiss::
|
61
|
-
|
62
|
-
|
63
|
-
faiss::IndexFlat* cpuIndex =
|
64
|
-
opt.useL2 ? (faiss::IndexFlat*) &cpuIndexL2 :
|
65
|
-
(faiss::IndexFlat*) &cpuIndexIP;
|
63
|
+
faiss::IndexFlat cpuIndex(dim, opt.metric);
|
64
|
+
cpuIndex.metric_arg = opt.metricArg;
|
66
65
|
|
67
66
|
// Construct on a random device to test multi-device, if we have
|
68
67
|
// multiple devices
|
@@ -71,25 +70,22 @@ void testFlat(const TestFlatOptions& opt) {
|
|
71
70
|
faiss::gpu::StandardGpuResources res;
|
72
71
|
res.noTempMemory();
|
73
72
|
|
74
|
-
|
75
73
|
faiss::gpu::GpuIndexFlatConfig config;
|
76
74
|
config.device = device;
|
77
75
|
config.useFloat16 = opt.useFloat16;
|
78
76
|
config.storeTransposed = opt.useTransposed;
|
79
77
|
|
80
|
-
faiss::gpu::
|
81
|
-
|
82
|
-
|
83
|
-
faiss::gpu::GpuIndexFlat* gpuIndex =
|
84
|
-
opt.useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
|
85
|
-
(faiss::gpu::GpuIndexFlat*) &gpuIndexIP;
|
78
|
+
faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
|
79
|
+
gpuIndex.metric_arg = opt.metricArg;
|
86
80
|
|
87
81
|
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
|
88
|
-
cpuIndex
|
89
|
-
gpuIndex
|
82
|
+
cpuIndex.add(numVecs, vecs.data());
|
83
|
+
gpuIndex.add(numVecs, vecs.data());
|
90
84
|
|
91
85
|
std::stringstream str;
|
92
|
-
str <<
|
86
|
+
str << "metric " << opt.metric
|
87
|
+
<< " marg " << opt.metricArg
|
88
|
+
<< " numVecs " << numVecs
|
93
89
|
<< " dim " << dim
|
94
90
|
<< " useFloat16 " << opt.useFloat16
|
95
91
|
<< " transposed " << opt.useTransposed
|
@@ -98,7 +94,7 @@ void testFlat(const TestFlatOptions& opt) {
|
|
98
94
|
|
99
95
|
// To some extent, we depend upon the relative error for the test
|
100
96
|
// for float16
|
101
|
-
faiss::gpu::compareIndices(
|
97
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex, numQuery, dim, k, str.str(),
|
102
98
|
opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
103
99
|
// FIXME: the fp16 bounds are
|
104
100
|
// useless when math (the accumulator) is
|
@@ -110,7 +106,7 @@ void testFlat(const TestFlatOptions& opt) {
|
|
110
106
|
TEST(TestGpuIndexFlat, IP_Float32) {
|
111
107
|
for (int tries = 0; tries < 3; ++tries) {
|
112
108
|
TestFlatOptions opt;
|
113
|
-
opt.
|
109
|
+
opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
|
114
110
|
opt.useFloat16 = false;
|
115
111
|
opt.useTransposed = false;
|
116
112
|
|
@@ -121,10 +117,36 @@ TEST(TestGpuIndexFlat, IP_Float32) {
|
|
121
117
|
}
|
122
118
|
}
|
123
119
|
|
120
|
+
TEST(TestGpuIndexFlat, L1_Float32) {
|
121
|
+
TestFlatOptions opt;
|
122
|
+
opt.metric = faiss::MetricType::METRIC_L1;
|
123
|
+
opt.useFloat16 = false;
|
124
|
+
opt.useTransposed = false;
|
125
|
+
|
126
|
+
testFlat(opt);
|
127
|
+
|
128
|
+
opt.useTransposed = true;
|
129
|
+
testFlat(opt);
|
130
|
+
}
|
131
|
+
|
132
|
+
TEST(TestGpuIndexFlat, Lp_Float32) {
|
133
|
+
TestFlatOptions opt;
|
134
|
+
opt.metric = faiss::MetricType::METRIC_Lp;
|
135
|
+
opt.metricArg = 5;
|
136
|
+
opt.useFloat16 = false;
|
137
|
+
opt.useTransposed = false;
|
138
|
+
|
139
|
+
testFlat(opt);
|
140
|
+
|
141
|
+
// Don't bother testing the transposed version, the L1 test should be good
|
142
|
+
// enough for that
|
143
|
+
}
|
144
|
+
|
124
145
|
TEST(TestGpuIndexFlat, L2_Float32) {
|
125
146
|
for (int tries = 0; tries < 3; ++tries) {
|
126
147
|
TestFlatOptions opt;
|
127
|
-
opt.
|
148
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
149
|
+
|
128
150
|
opt.useFloat16 = false;
|
129
151
|
opt.useTransposed = false;
|
130
152
|
|
@@ -139,7 +161,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
|
|
139
161
|
TEST(TestGpuIndexFlat, L2_Float32_K1) {
|
140
162
|
for (int tries = 0; tries < 3; ++tries) {
|
141
163
|
TestFlatOptions opt;
|
142
|
-
opt.
|
164
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
143
165
|
opt.useFloat16 = false;
|
144
166
|
opt.useTransposed = false;
|
145
167
|
opt.kOverride = 1;
|
@@ -151,7 +173,7 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
|
|
151
173
|
TEST(TestGpuIndexFlat, IP_Float16) {
|
152
174
|
for (int tries = 0; tries < 3; ++tries) {
|
153
175
|
TestFlatOptions opt;
|
154
|
-
opt.
|
176
|
+
opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
|
155
177
|
opt.useFloat16 = true;
|
156
178
|
opt.useTransposed = false;
|
157
179
|
|
@@ -165,7 +187,7 @@ TEST(TestGpuIndexFlat, IP_Float16) {
|
|
165
187
|
TEST(TestGpuIndexFlat, L2_Float16) {
|
166
188
|
for (int tries = 0; tries < 3; ++tries) {
|
167
189
|
TestFlatOptions opt;
|
168
|
-
opt.
|
190
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
169
191
|
opt.useFloat16 = true;
|
170
192
|
opt.useTransposed = false;
|
171
193
|
|
@@ -180,7 +202,7 @@ TEST(TestGpuIndexFlat, L2_Float16) {
|
|
180
202
|
TEST(TestGpuIndexFlat, L2_Float16_K1) {
|
181
203
|
for (int tries = 0; tries < 3; ++tries) {
|
182
204
|
TestFlatOptions opt;
|
183
|
-
opt.
|
205
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
184
206
|
opt.useFloat16 = true;
|
185
207
|
opt.useTransposed = false;
|
186
208
|
opt.kOverride = 1;
|
@@ -193,7 +215,7 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
|
|
193
215
|
TEST(TestGpuIndexFlat, L2_Tiling) {
|
194
216
|
for (int tries = 0; tries < 2; ++tries) {
|
195
217
|
TestFlatOptions opt;
|
196
|
-
opt.
|
218
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
197
219
|
opt.useFloat16 = false;
|
198
220
|
opt.useTransposed = false;
|
199
221
|
opt.numVecsOverride = 1000000;
|
@@ -117,7 +117,7 @@ struct Options {
|
|
117
117
|
int device;
|
118
118
|
};
|
119
119
|
|
120
|
-
TEST(TestGpuIndexIVFPQ,
|
120
|
+
TEST(TestGpuIndexIVFPQ, Query_L2) {
|
121
121
|
for (int tries = 0; tries < 2; ++tries) {
|
122
122
|
Options opt;
|
123
123
|
|
@@ -151,7 +151,78 @@ TEST(TestGpuIndexIVFPQ, Query) {
|
|
151
151
|
}
|
152
152
|
}
|
153
153
|
|
154
|
-
TEST(TestGpuIndexIVFPQ,
|
154
|
+
TEST(TestGpuIndexIVFPQ, Query_IP) {
|
155
|
+
for (int tries = 0; tries < 2; ++tries) {
|
156
|
+
Options opt;
|
157
|
+
|
158
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
159
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
160
|
+
|
161
|
+
faiss::IndexFlatIP coarseQuantizer(opt.dim);
|
162
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
163
|
+
opt.codes, opt.bitsPerCode);
|
164
|
+
cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
|
165
|
+
|
166
|
+
cpuIndex.nprobe = opt.nprobe;
|
167
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
168
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
169
|
+
|
170
|
+
faiss::gpu::StandardGpuResources res;
|
171
|
+
res.noTempMemory();
|
172
|
+
|
173
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
174
|
+
config.device = opt.device;
|
175
|
+
config.usePrecomputedTables = false; // not supported/required for IP
|
176
|
+
config.indicesOptions = opt.indicesOpt;
|
177
|
+
config.useFloat16LookupTables = opt.useFloat16;
|
178
|
+
|
179
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
180
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
181
|
+
|
182
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
183
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
184
|
+
opt.getCompareEpsilon(),
|
185
|
+
opt.getPctMaxDiff1(),
|
186
|
+
opt.getPctMaxDiffN());
|
187
|
+
}
|
188
|
+
}
|
189
|
+
|
190
|
+
TEST(TestGpuIndexIVFPQ, Float16Coarse) {
|
191
|
+
Options opt;
|
192
|
+
|
193
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
194
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
195
|
+
|
196
|
+
faiss::IndexFlatL2 coarseQuantizer(opt.dim);
|
197
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
198
|
+
opt.codes, opt.bitsPerCode);
|
199
|
+
cpuIndex.nprobe = opt.nprobe;
|
200
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
201
|
+
|
202
|
+
faiss::gpu::StandardGpuResources res;
|
203
|
+
res.noTempMemory();
|
204
|
+
|
205
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
206
|
+
config.device = opt.device;
|
207
|
+
config.flatConfig.useFloat16 = true;
|
208
|
+
config.usePrecomputedTables = opt.usePrecomputed;
|
209
|
+
config.indicesOptions = opt.indicesOpt;
|
210
|
+
config.useFloat16LookupTables = opt.useFloat16;
|
211
|
+
|
212
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
213
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
214
|
+
|
215
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
216
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
217
|
+
|
218
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
219
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
220
|
+
opt.getCompareEpsilon(),
|
221
|
+
opt.getPctMaxDiff1(),
|
222
|
+
opt.getPctMaxDiffN());
|
223
|
+
}
|
224
|
+
|
225
|
+
TEST(TestGpuIndexIVFPQ, Add_L2) {
|
155
226
|
for (int tries = 0; tries < 2; ++tries) {
|
156
227
|
Options opt;
|
157
228
|
|
@@ -187,6 +258,43 @@ TEST(TestGpuIndexIVFPQ, Add) {
|
|
187
258
|
}
|
188
259
|
}
|
189
260
|
|
261
|
+
TEST(TestGpuIndexIVFPQ, Add_IP) {
|
262
|
+
for (int tries = 0; tries < 2; ++tries) {
|
263
|
+
Options opt;
|
264
|
+
|
265
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
266
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
267
|
+
|
268
|
+
faiss::IndexFlatIP coarseQuantizer(opt.dim);
|
269
|
+
faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
|
270
|
+
opt.codes, opt.bitsPerCode);
|
271
|
+
cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
|
272
|
+
cpuIndex.nprobe = opt.nprobe;
|
273
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
274
|
+
|
275
|
+
faiss::gpu::StandardGpuResources res;
|
276
|
+
res.noTempMemory();
|
277
|
+
|
278
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
279
|
+
config.device = opt.device;
|
280
|
+
config.usePrecomputedTables = opt.usePrecomputed;
|
281
|
+
config.indicesOptions = opt.indicesOpt;
|
282
|
+
config.useFloat16LookupTables = opt.useFloat16;
|
283
|
+
|
284
|
+
faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
|
285
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
286
|
+
|
287
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
288
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
289
|
+
|
290
|
+
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
291
|
+
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
292
|
+
opt.getCompareEpsilon(),
|
293
|
+
opt.getPctMaxDiff1(),
|
294
|
+
opt.getPctMaxDiffN());
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
190
298
|
TEST(TestGpuIndexIVFPQ, CopyTo) {
|
191
299
|
Options opt;
|
192
300
|
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|