faiss 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +18 -18
  4. data/README.md +1 -1
  5. data/lib/faiss/version.rb +1 -1
  6. data/vendor/faiss/Clustering.cpp +318 -53
  7. data/vendor/faiss/Clustering.h +39 -11
  8. data/vendor/faiss/DirectMap.cpp +267 -0
  9. data/vendor/faiss/DirectMap.h +120 -0
  10. data/vendor/faiss/IVFlib.cpp +24 -4
  11. data/vendor/faiss/IVFlib.h +4 -0
  12. data/vendor/faiss/Index.h +5 -24
  13. data/vendor/faiss/Index2Layer.cpp +0 -1
  14. data/vendor/faiss/IndexBinary.h +7 -3
  15. data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
  16. data/vendor/faiss/IndexBinaryFlat.h +3 -0
  17. data/vendor/faiss/IndexBinaryHash.cpp +492 -0
  18. data/vendor/faiss/IndexBinaryHash.h +116 -0
  19. data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
  20. data/vendor/faiss/IndexBinaryIVF.h +14 -4
  21. data/vendor/faiss/IndexFlat.h +2 -1
  22. data/vendor/faiss/IndexHNSW.cpp +68 -16
  23. data/vendor/faiss/IndexHNSW.h +3 -3
  24. data/vendor/faiss/IndexIVF.cpp +72 -76
  25. data/vendor/faiss/IndexIVF.h +24 -5
  26. data/vendor/faiss/IndexIVFFlat.cpp +19 -54
  27. data/vendor/faiss/IndexIVFFlat.h +1 -11
  28. data/vendor/faiss/IndexIVFPQ.cpp +49 -26
  29. data/vendor/faiss/IndexIVFPQ.h +9 -10
  30. data/vendor/faiss/IndexIVFPQR.cpp +2 -2
  31. data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
  32. data/vendor/faiss/IndexLSH.h +4 -1
  33. data/vendor/faiss/IndexPreTransform.cpp +0 -1
  34. data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
  35. data/vendor/faiss/InvertedLists.cpp +0 -2
  36. data/vendor/faiss/MetaIndexes.cpp +0 -1
  37. data/vendor/faiss/MetricType.h +36 -0
  38. data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
  39. data/vendor/faiss/c_api/Clustering_c.h +11 -5
  40. data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
  41. data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
  42. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
  43. data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
  44. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
  45. data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
  46. data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
  47. data/vendor/faiss/gpu/GpuDistance.h +93 -0
  48. data/vendor/faiss/gpu/GpuIndex.h +7 -0
  49. data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
  50. data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
  51. data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
  52. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
  53. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
  54. data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
  55. data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
  56. data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
  57. data/vendor/faiss/impl/HNSW.cpp +0 -1
  58. data/vendor/faiss/impl/PolysemousTraining.h +5 -5
  59. data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
  60. data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
  61. data/vendor/faiss/impl/ProductQuantizer.h +42 -47
  62. data/vendor/faiss/impl/index_read.cpp +103 -7
  63. data/vendor/faiss/impl/index_write.cpp +101 -5
  64. data/vendor/faiss/impl/io.cpp +111 -1
  65. data/vendor/faiss/impl/io.h +38 -0
  66. data/vendor/faiss/index_factory.cpp +0 -1
  67. data/vendor/faiss/tests/test_merge.cpp +0 -1
  68. data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
  69. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
  70. data/vendor/faiss/utils/distances.cpp +4 -5
  71. data/vendor/faiss/utils/distances_simd.cpp +0 -1
  72. data/vendor/faiss/utils/hamming.cpp +85 -3
  73. data/vendor/faiss/utils/hamming.h +20 -0
  74. data/vendor/faiss/utils/utils.cpp +0 -96
  75. data/vendor/faiss/utils/utils.h +0 -15
  76. metadata +11 -3
  77. data/lib/faiss/ext.bundle +0 -0
@@ -300,6 +300,7 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
300
300
  index_ivfflat->quantizer, index->d,
301
301
  index_ivfflat->nlist, index_ivfflat->metric_type);
302
302
  idx2.nprobe = index_ivfflat->nprobe;
303
+ idx2.is_trained = index->is_trained;
303
304
  copy_ivf_shard (index_ivfflat, &idx2, n, i);
304
305
  shards[i] = sub_cloners[i].clone_Index(&idx2);
305
306
  } else if (index_ivfsq) {
@@ -308,7 +309,10 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
308
309
  index_ivfsq->sq.qtype,
309
310
  index_ivfsq->metric_type,
310
311
  index_ivfsq->by_residual);
312
+
311
313
  idx2.nprobe = index_ivfsq->nprobe;
314
+ idx2.is_trained = index->is_trained;
315
+ idx2.sq = index_ivfsq->sq;
312
316
  copy_ivf_shard (index_ivfsq, &idx2, n, i);
313
317
  shards[i] = sub_cloners[i].clone_Index(&idx2);
314
318
  } else if (index_flat) {
@@ -13,7 +13,7 @@ GpuClonerOptions::GpuClonerOptions()
13
13
  : indicesOptions(INDICES_64_BIT),
14
14
  useFloat16CoarseQuantizer(false),
15
15
  useFloat16(false),
16
- usePrecomputed(true),
16
+ usePrecomputed(false),
17
17
  reserveVecs(0),
18
18
  storeTransposed(false),
19
19
  verbose(false) {
@@ -14,6 +14,96 @@ namespace faiss { namespace gpu {
14
14
 
15
15
  class GpuResources;
16
16
 
17
+ // Scalar type of the vector data
18
+ enum class DistanceDataType {
19
+ F32 = 1,
20
+ F16,
21
+ };
22
+
23
+ /// Arguments to brute-force GPU k-nearest neighbor searching
24
+ struct GpuDistanceParams {
25
+ GpuDistanceParams()
26
+ : metric(faiss::MetricType::METRIC_L2),
27
+ metricArg(0),
28
+ k(0),
29
+ dims(0),
30
+ vectors(nullptr),
31
+ vectorType(DistanceDataType::F32),
32
+ vectorsRowMajor(true),
33
+ numVectors(0),
34
+ vectorNorms(nullptr),
35
+ queries(nullptr),
36
+ queryType(DistanceDataType::F32),
37
+ queriesRowMajor(true),
38
+ numQueries(0),
39
+ outDistances(nullptr),
40
+ ignoreOutDistances(false),
41
+ outIndices(nullptr) {
42
+ }
43
+
44
+ //
45
+ // Search parameters
46
+ //
47
+
48
+ // Search parameter: distance metric
49
+ faiss::MetricType metric;
50
+
51
+ // Search parameter: distance metric argument (if applicable)
52
+ // For metric == METRIC_Lp, this is the p-value
53
+ float metricArg;
54
+
55
+ // Search parameter: return k nearest neighbors
56
+ int k;
57
+
58
+ // Vector dimensionality
59
+ int dims;
60
+
61
+ //
62
+ // Vectors being queried
63
+ //
64
+
65
+ // If vectorsRowMajor is true, this is
66
+ // numVectors x dims, with dims innermost; otherwise,
67
+ // dims x numVectors, with numVectors innermost
68
+ const void* vectors;
69
+ DistanceDataType vectorType;
70
+ bool vectorsRowMajor;
71
+ int numVectors;
72
+
73
+ // Precomputed L2 norms for each vector in `vectors`, which can be optionally
74
+ // provided in advance to speed computation for METRIC_L2
75
+ const float* vectorNorms;
76
+
77
+ //
78
+ // The query vectors (i.e., find k-nearest neighbors in `vectors` for each of
79
+ // the `queries`
80
+ //
81
+
82
+ // If queriesRowMajor is true, this is
83
+ // numQueries x dims, with dims innermost; otherwise,
84
+ // dims x numQueries, with numQueries innermost
85
+ const void* queries;
86
+ DistanceDataType queryType;
87
+ bool queriesRowMajor;
88
+ int numQueries;
89
+
90
+ //
91
+ // Output results
92
+ //
93
+
94
+ // A region of memory size numQueries x k, with k
95
+ // innermost (row major)
96
+ float* outDistances;
97
+
98
+ // Do we only care abouty the indices reported, rather than the output
99
+ // distances?
100
+ bool ignoreOutDistances;
101
+
102
+ // A region of memory size numQueries x k, with k
103
+ // innermost (row major)
104
+ faiss::Index::idx_t* outIndices;
105
+ };
106
+
17
107
  /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
18
108
  /// neighbor searches on an externally-provided region of memory (e.g., from a
19
109
  /// pytorch tensor).
@@ -26,6 +116,9 @@ class GpuResources;
26
116
  ///
27
117
  /// For each vector in `queries`, searches all of `vectors` to find its k
28
118
  /// nearest neighbors with respect to the given metric
119
+ void bfKnn(GpuResources* resources, const GpuDistanceParams& args);
120
+
121
+ /// Deprecated legacy implementation
29
122
  void bruteForceKnn(GpuResources* resources,
30
123
  faiss::MetricType metric,
31
124
  // If vectorsRowMajor is true, this is
@@ -35,6 +35,7 @@ class GpuIndex : public faiss::Index {
35
35
  GpuIndex(GpuResources* resources,
36
36
  int dims,
37
37
  faiss::MetricType metric,
38
+ float metricArg,
38
39
  GpuIndexConfig config);
39
40
 
40
41
  inline int getDevice() const {
@@ -86,6 +87,12 @@ class GpuIndex : public faiss::Index {
86
87
  const Index::idx_t* keys) const override;
87
88
 
88
89
  protected:
90
+ /// Copy what we need from the CPU equivalent
91
+ void copyFrom(const faiss::Index* index);
92
+
93
+ /// Copy what we have to the CPU equivalent
94
+ void copyTo(faiss::Index* index) const;
95
+
89
96
  /// Does addImpl_ require IDs? If so, and no IDs are provided, we will
90
97
  /// generate them sequentially based on the order in which the IDs are added
91
98
  virtual bool addImplRequiresIDs_() const = 0;
@@ -25,18 +25,12 @@ struct FlatIndex;
25
25
  struct GpuIndexFlatConfig : public GpuIndexConfig {
26
26
  inline GpuIndexFlatConfig()
27
27
  : useFloat16(false),
28
- useFloat16Accumulator(false),
29
28
  storeTransposed(false) {
30
29
  }
31
30
 
32
31
  /// Whether or not data is stored as float16
33
32
  bool useFloat16;
34
33
 
35
- /// Whether or not all math is performed in float16, if useFloat16 is
36
- /// specified. If true, we use cublasHgemm, supported only on CC
37
- /// 5.3+. Otherwise, we use cublasSgemmEx.
38
- bool useFloat16Accumulator;
39
-
40
34
  /// Whether or not data is stored (transparently) in a transposed
41
35
  /// layout, enabling use of the NN GEMM call, which is ~10% faster.
42
36
  /// This will improve the speed of the flat index, but will
@@ -124,10 +118,6 @@ class GpuIndexFlat : public GpuIndex {
124
118
  float* distances,
125
119
  faiss::Index::idx_t* labels) const override;
126
120
 
127
- private:
128
- /// Checks user settings for consistency
129
- void verifySettings_() const;
130
-
131
121
  protected:
132
122
  /// Our config object
133
123
  const GpuIndexFlatConfig config_;
@@ -37,6 +37,7 @@ class GpuIndexIVF : public GpuIndex {
37
37
  GpuIndexIVF(GpuResources* resources,
38
38
  int dims,
39
39
  faiss::MetricType metric,
40
+ float metricArg,
40
41
  int nlist,
41
42
  GpuIndexIVFConfig config = GpuIndexIVFConfig());
42
43
 
@@ -7,6 +7,7 @@
7
7
 
8
8
 
9
9
  #include <faiss/gpu/StandardGpuResources.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
10
11
  #include <faiss/gpu/utils/MemorySpace.h>
11
12
  #include <faiss/impl/FaissAssert.h>
12
13
  #include <limits>
@@ -247,6 +248,13 @@ StandardGpuResources::initializeForDevice(int device) {
247
248
  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
248
249
  blasHandles_[device] = blasHandle;
249
250
 
251
+ // Enable tensor core support if available
252
+ #if CUDA_VERSION >= 9000
253
+ if (getTensorCoreSupport(device)) {
254
+ cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
255
+ }
256
+ #endif
257
+
250
258
  FAISS_ASSERT(memory_.count(device) == 0);
251
259
 
252
260
  auto mem = std::unique_ptr<StackDeviceMemory>(
@@ -21,7 +21,8 @@ constexpr float kF32MaxRelErr = 6e-3f;
21
21
 
22
22
  struct TestFlatOptions {
23
23
  TestFlatOptions()
24
- : useL2(true),
24
+ : metric(faiss::MetricType::METRIC_L2),
25
+ metricArg(0),
25
26
  useFloat16(false),
26
27
  useTransposed(false),
27
28
  numVecsOverride(-1),
@@ -30,7 +31,9 @@ struct TestFlatOptions {
30
31
  dimOverride(-1) {
31
32
  }
32
33
 
33
- bool useL2;
34
+ faiss::MetricType metric;
35
+ float metricArg;
36
+
34
37
  bool useFloat16;
35
38
  bool useTransposed;
36
39
  int numVecsOverride;
@@ -41,7 +44,7 @@ struct TestFlatOptions {
41
44
 
42
45
  void testFlat(const TestFlatOptions& opt) {
43
46
  int numVecs = opt.numVecsOverride > 0 ?
44
- opt.numVecsOverride : faiss::gpu::randVal(1000, 20000);
47
+ opt.numVecsOverride : faiss::gpu::randVal(1000, 5000);
45
48
  int dim = opt.dimOverride > 0 ?
46
49
  opt.dimOverride : faiss::gpu::randVal(50, 800);
47
50
  int numQuery = opt.numQueriesOverride > 0 ?
@@ -57,12 +60,8 @@ void testFlat(const TestFlatOptions& opt) {
57
60
  k = opt.kOverride;
58
61
  }
59
62
 
60
- faiss::IndexFlatIP cpuIndexIP(dim);
61
- faiss::IndexFlatL2 cpuIndexL2(dim);
62
-
63
- faiss::IndexFlat* cpuIndex =
64
- opt.useL2 ? (faiss::IndexFlat*) &cpuIndexL2 :
65
- (faiss::IndexFlat*) &cpuIndexIP;
63
+ faiss::IndexFlat cpuIndex(dim, opt.metric);
64
+ cpuIndex.metric_arg = opt.metricArg;
66
65
 
67
66
  // Construct on a random device to test multi-device, if we have
68
67
  // multiple devices
@@ -71,25 +70,22 @@ void testFlat(const TestFlatOptions& opt) {
71
70
  faiss::gpu::StandardGpuResources res;
72
71
  res.noTempMemory();
73
72
 
74
-
75
73
  faiss::gpu::GpuIndexFlatConfig config;
76
74
  config.device = device;
77
75
  config.useFloat16 = opt.useFloat16;
78
76
  config.storeTransposed = opt.useTransposed;
79
77
 
80
- faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);
81
- faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
82
-
83
- faiss::gpu::GpuIndexFlat* gpuIndex =
84
- opt.useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
85
- (faiss::gpu::GpuIndexFlat*) &gpuIndexIP;
78
+ faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
79
+ gpuIndex.metric_arg = opt.metricArg;
86
80
 
87
81
  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
88
- cpuIndex->add(numVecs, vecs.data());
89
- gpuIndex->add(numVecs, vecs.data());
82
+ cpuIndex.add(numVecs, vecs.data());
83
+ gpuIndex.add(numVecs, vecs.data());
90
84
 
91
85
  std::stringstream str;
92
- str << (opt.useL2 ? "L2" : "IP") << " numVecs " << numVecs
86
+ str << "metric " << opt.metric
87
+ << " marg " << opt.metricArg
88
+ << " numVecs " << numVecs
93
89
  << " dim " << dim
94
90
  << " useFloat16 " << opt.useFloat16
95
91
  << " transposed " << opt.useTransposed
@@ -98,7 +94,7 @@ void testFlat(const TestFlatOptions& opt) {
98
94
 
99
95
  // To some extent, we depend upon the relative error for the test
100
96
  // for float16
101
- faiss::gpu::compareIndices(*cpuIndex, *gpuIndex, numQuery, dim, k, str.str(),
97
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex, numQuery, dim, k, str.str(),
102
98
  opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
103
99
  // FIXME: the fp16 bounds are
104
100
  // useless when math (the accumulator) is
@@ -110,7 +106,7 @@ void testFlat(const TestFlatOptions& opt) {
110
106
  TEST(TestGpuIndexFlat, IP_Float32) {
111
107
  for (int tries = 0; tries < 3; ++tries) {
112
108
  TestFlatOptions opt;
113
- opt.useL2 = false;
109
+ opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
114
110
  opt.useFloat16 = false;
115
111
  opt.useTransposed = false;
116
112
 
@@ -121,10 +117,36 @@ TEST(TestGpuIndexFlat, IP_Float32) {
121
117
  }
122
118
  }
123
119
 
120
+ TEST(TestGpuIndexFlat, L1_Float32) {
121
+ TestFlatOptions opt;
122
+ opt.metric = faiss::MetricType::METRIC_L1;
123
+ opt.useFloat16 = false;
124
+ opt.useTransposed = false;
125
+
126
+ testFlat(opt);
127
+
128
+ opt.useTransposed = true;
129
+ testFlat(opt);
130
+ }
131
+
132
+ TEST(TestGpuIndexFlat, Lp_Float32) {
133
+ TestFlatOptions opt;
134
+ opt.metric = faiss::MetricType::METRIC_Lp;
135
+ opt.metricArg = 5;
136
+ opt.useFloat16 = false;
137
+ opt.useTransposed = false;
138
+
139
+ testFlat(opt);
140
+
141
+ // Don't bother testing the transposed version, the L1 test should be good
142
+ // enough for that
143
+ }
144
+
124
145
  TEST(TestGpuIndexFlat, L2_Float32) {
125
146
  for (int tries = 0; tries < 3; ++tries) {
126
147
  TestFlatOptions opt;
127
- opt.useL2 = true;
148
+ opt.metric = faiss::MetricType::METRIC_L2;
149
+
128
150
  opt.useFloat16 = false;
129
151
  opt.useTransposed = false;
130
152
 
@@ -139,7 +161,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
139
161
  TEST(TestGpuIndexFlat, L2_Float32_K1) {
140
162
  for (int tries = 0; tries < 3; ++tries) {
141
163
  TestFlatOptions opt;
142
- opt.useL2 = true;
164
+ opt.metric = faiss::MetricType::METRIC_L2;
143
165
  opt.useFloat16 = false;
144
166
  opt.useTransposed = false;
145
167
  opt.kOverride = 1;
@@ -151,7 +173,7 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
151
173
  TEST(TestGpuIndexFlat, IP_Float16) {
152
174
  for (int tries = 0; tries < 3; ++tries) {
153
175
  TestFlatOptions opt;
154
- opt.useL2 = false;
176
+ opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
155
177
  opt.useFloat16 = true;
156
178
  opt.useTransposed = false;
157
179
 
@@ -165,7 +187,7 @@ TEST(TestGpuIndexFlat, IP_Float16) {
165
187
  TEST(TestGpuIndexFlat, L2_Float16) {
166
188
  for (int tries = 0; tries < 3; ++tries) {
167
189
  TestFlatOptions opt;
168
- opt.useL2 = true;
190
+ opt.metric = faiss::MetricType::METRIC_L2;
169
191
  opt.useFloat16 = true;
170
192
  opt.useTransposed = false;
171
193
 
@@ -180,7 +202,7 @@ TEST(TestGpuIndexFlat, L2_Float16) {
180
202
  TEST(TestGpuIndexFlat, L2_Float16_K1) {
181
203
  for (int tries = 0; tries < 3; ++tries) {
182
204
  TestFlatOptions opt;
183
- opt.useL2 = true;
205
+ opt.metric = faiss::MetricType::METRIC_L2;
184
206
  opt.useFloat16 = true;
185
207
  opt.useTransposed = false;
186
208
  opt.kOverride = 1;
@@ -193,7 +215,7 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
193
215
  TEST(TestGpuIndexFlat, L2_Tiling) {
194
216
  for (int tries = 0; tries < 2; ++tries) {
195
217
  TestFlatOptions opt;
196
- opt.useL2 = true;
218
+ opt.metric = faiss::MetricType::METRIC_L2;
197
219
  opt.useFloat16 = false;
198
220
  opt.useTransposed = false;
199
221
  opt.numVecsOverride = 1000000;
@@ -117,7 +117,7 @@ struct Options {
117
117
  int device;
118
118
  };
119
119
 
120
- TEST(TestGpuIndexIVFPQ, Query) {
120
+ TEST(TestGpuIndexIVFPQ, Query_L2) {
121
121
  for (int tries = 0; tries < 2; ++tries) {
122
122
  Options opt;
123
123
 
@@ -151,7 +151,78 @@ TEST(TestGpuIndexIVFPQ, Query) {
151
151
  }
152
152
  }
153
153
 
154
- TEST(TestGpuIndexIVFPQ, Add) {
154
+ TEST(TestGpuIndexIVFPQ, Query_IP) {
155
+ for (int tries = 0; tries < 2; ++tries) {
156
+ Options opt;
157
+
158
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
159
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
160
+
161
+ faiss::IndexFlatIP coarseQuantizer(opt.dim);
162
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
163
+ opt.codes, opt.bitsPerCode);
164
+ cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
165
+
166
+ cpuIndex.nprobe = opt.nprobe;
167
+ cpuIndex.train(opt.numTrain, trainVecs.data());
168
+ cpuIndex.add(opt.numAdd, addVecs.data());
169
+
170
+ faiss::gpu::StandardGpuResources res;
171
+ res.noTempMemory();
172
+
173
+ faiss::gpu::GpuIndexIVFPQConfig config;
174
+ config.device = opt.device;
175
+ config.usePrecomputedTables = false; // not supported/required for IP
176
+ config.indicesOptions = opt.indicesOpt;
177
+ config.useFloat16LookupTables = opt.useFloat16;
178
+
179
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
180
+ gpuIndex.setNumProbes(opt.nprobe);
181
+
182
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
183
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
184
+ opt.getCompareEpsilon(),
185
+ opt.getPctMaxDiff1(),
186
+ opt.getPctMaxDiffN());
187
+ }
188
+ }
189
+
190
+ TEST(TestGpuIndexIVFPQ, Float16Coarse) {
191
+ Options opt;
192
+
193
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
194
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
195
+
196
+ faiss::IndexFlatL2 coarseQuantizer(opt.dim);
197
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
198
+ opt.codes, opt.bitsPerCode);
199
+ cpuIndex.nprobe = opt.nprobe;
200
+ cpuIndex.train(opt.numTrain, trainVecs.data());
201
+
202
+ faiss::gpu::StandardGpuResources res;
203
+ res.noTempMemory();
204
+
205
+ faiss::gpu::GpuIndexIVFPQConfig config;
206
+ config.device = opt.device;
207
+ config.flatConfig.useFloat16 = true;
208
+ config.usePrecomputedTables = opt.usePrecomputed;
209
+ config.indicesOptions = opt.indicesOpt;
210
+ config.useFloat16LookupTables = opt.useFloat16;
211
+
212
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
213
+ gpuIndex.setNumProbes(opt.nprobe);
214
+
215
+ gpuIndex.add(opt.numAdd, addVecs.data());
216
+ cpuIndex.add(opt.numAdd, addVecs.data());
217
+
218
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
219
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
220
+ opt.getCompareEpsilon(),
221
+ opt.getPctMaxDiff1(),
222
+ opt.getPctMaxDiffN());
223
+ }
224
+
225
+ TEST(TestGpuIndexIVFPQ, Add_L2) {
155
226
  for (int tries = 0; tries < 2; ++tries) {
156
227
  Options opt;
157
228
 
@@ -187,6 +258,43 @@ TEST(TestGpuIndexIVFPQ, Add) {
187
258
  }
188
259
  }
189
260
 
261
+ TEST(TestGpuIndexIVFPQ, Add_IP) {
262
+ for (int tries = 0; tries < 2; ++tries) {
263
+ Options opt;
264
+
265
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
266
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
267
+
268
+ faiss::IndexFlatIP coarseQuantizer(opt.dim);
269
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
270
+ opt.codes, opt.bitsPerCode);
271
+ cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
272
+ cpuIndex.nprobe = opt.nprobe;
273
+ cpuIndex.train(opt.numTrain, trainVecs.data());
274
+
275
+ faiss::gpu::StandardGpuResources res;
276
+ res.noTempMemory();
277
+
278
+ faiss::gpu::GpuIndexIVFPQConfig config;
279
+ config.device = opt.device;
280
+ config.usePrecomputedTables = opt.usePrecomputed;
281
+ config.indicesOptions = opt.indicesOpt;
282
+ config.useFloat16LookupTables = opt.useFloat16;
283
+
284
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
285
+ gpuIndex.setNumProbes(opt.nprobe);
286
+
287
+ gpuIndex.add(opt.numAdd, addVecs.data());
288
+ cpuIndex.add(opt.numAdd, addVecs.data());
289
+
290
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
291
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
292
+ opt.getCompareEpsilon(),
293
+ opt.getPctMaxDiff1(),
294
+ opt.getPctMaxDiffN());
295
+ }
296
+ }
297
+
190
298
  TEST(TestGpuIndexIVFPQ, CopyTo) {
191
299
  Options opt;
192
300
  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);