faiss 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +18 -18
  4. data/README.md +1 -1
  5. data/lib/faiss/version.rb +1 -1
  6. data/vendor/faiss/Clustering.cpp +318 -53
  7. data/vendor/faiss/Clustering.h +39 -11
  8. data/vendor/faiss/DirectMap.cpp +267 -0
  9. data/vendor/faiss/DirectMap.h +120 -0
  10. data/vendor/faiss/IVFlib.cpp +24 -4
  11. data/vendor/faiss/IVFlib.h +4 -0
  12. data/vendor/faiss/Index.h +5 -24
  13. data/vendor/faiss/Index2Layer.cpp +0 -1
  14. data/vendor/faiss/IndexBinary.h +7 -3
  15. data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
  16. data/vendor/faiss/IndexBinaryFlat.h +3 -0
  17. data/vendor/faiss/IndexBinaryHash.cpp +492 -0
  18. data/vendor/faiss/IndexBinaryHash.h +116 -0
  19. data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
  20. data/vendor/faiss/IndexBinaryIVF.h +14 -4
  21. data/vendor/faiss/IndexFlat.h +2 -1
  22. data/vendor/faiss/IndexHNSW.cpp +68 -16
  23. data/vendor/faiss/IndexHNSW.h +3 -3
  24. data/vendor/faiss/IndexIVF.cpp +72 -76
  25. data/vendor/faiss/IndexIVF.h +24 -5
  26. data/vendor/faiss/IndexIVFFlat.cpp +19 -54
  27. data/vendor/faiss/IndexIVFFlat.h +1 -11
  28. data/vendor/faiss/IndexIVFPQ.cpp +49 -26
  29. data/vendor/faiss/IndexIVFPQ.h +9 -10
  30. data/vendor/faiss/IndexIVFPQR.cpp +2 -2
  31. data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
  32. data/vendor/faiss/IndexLSH.h +4 -1
  33. data/vendor/faiss/IndexPreTransform.cpp +0 -1
  34. data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
  35. data/vendor/faiss/InvertedLists.cpp +0 -2
  36. data/vendor/faiss/MetaIndexes.cpp +0 -1
  37. data/vendor/faiss/MetricType.h +36 -0
  38. data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
  39. data/vendor/faiss/c_api/Clustering_c.h +11 -5
  40. data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
  41. data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
  42. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
  43. data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
  44. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
  45. data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
  46. data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
  47. data/vendor/faiss/gpu/GpuDistance.h +93 -0
  48. data/vendor/faiss/gpu/GpuIndex.h +7 -0
  49. data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
  50. data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
  51. data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
  52. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
  53. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
  54. data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
  55. data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
  56. data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
  57. data/vendor/faiss/impl/HNSW.cpp +0 -1
  58. data/vendor/faiss/impl/PolysemousTraining.h +5 -5
  59. data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
  60. data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
  61. data/vendor/faiss/impl/ProductQuantizer.h +42 -47
  62. data/vendor/faiss/impl/index_read.cpp +103 -7
  63. data/vendor/faiss/impl/index_write.cpp +101 -5
  64. data/vendor/faiss/impl/io.cpp +111 -1
  65. data/vendor/faiss/impl/io.h +38 -0
  66. data/vendor/faiss/index_factory.cpp +0 -1
  67. data/vendor/faiss/tests/test_merge.cpp +0 -1
  68. data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
  69. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
  70. data/vendor/faiss/utils/distances.cpp +4 -5
  71. data/vendor/faiss/utils/distances_simd.cpp +0 -1
  72. data/vendor/faiss/utils/hamming.cpp +85 -3
  73. data/vendor/faiss/utils/hamming.h +20 -0
  74. data/vendor/faiss/utils/utils.cpp +0 -96
  75. data/vendor/faiss/utils/utils.h +0 -15
  76. metadata +11 -3
  77. data/lib/faiss/ext.bundle +0 -0
@@ -300,6 +300,7 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
300
300
  index_ivfflat->quantizer, index->d,
301
301
  index_ivfflat->nlist, index_ivfflat->metric_type);
302
302
  idx2.nprobe = index_ivfflat->nprobe;
303
+ idx2.is_trained = index->is_trained;
303
304
  copy_ivf_shard (index_ivfflat, &idx2, n, i);
304
305
  shards[i] = sub_cloners[i].clone_Index(&idx2);
305
306
  } else if (index_ivfsq) {
@@ -308,7 +309,10 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
308
309
  index_ivfsq->sq.qtype,
309
310
  index_ivfsq->metric_type,
310
311
  index_ivfsq->by_residual);
312
+
311
313
  idx2.nprobe = index_ivfsq->nprobe;
314
+ idx2.is_trained = index->is_trained;
315
+ idx2.sq = index_ivfsq->sq;
312
316
  copy_ivf_shard (index_ivfsq, &idx2, n, i);
313
317
  shards[i] = sub_cloners[i].clone_Index(&idx2);
314
318
  } else if (index_flat) {
@@ -13,7 +13,7 @@ GpuClonerOptions::GpuClonerOptions()
13
13
  : indicesOptions(INDICES_64_BIT),
14
14
  useFloat16CoarseQuantizer(false),
15
15
  useFloat16(false),
16
- usePrecomputed(true),
16
+ usePrecomputed(false),
17
17
  reserveVecs(0),
18
18
  storeTransposed(false),
19
19
  verbose(false) {
@@ -14,6 +14,96 @@ namespace faiss { namespace gpu {
14
14
 
15
15
  class GpuResources;
16
16
 
17
+ // Scalar type of the vector data
18
+ enum class DistanceDataType {
19
+ F32 = 1,
20
+ F16,
21
+ };
22
+
23
+ /// Arguments to brute-force GPU k-nearest neighbor searching
24
+ struct GpuDistanceParams {
25
+ GpuDistanceParams()
26
+ : metric(faiss::MetricType::METRIC_L2),
27
+ metricArg(0),
28
+ k(0),
29
+ dims(0),
30
+ vectors(nullptr),
31
+ vectorType(DistanceDataType::F32),
32
+ vectorsRowMajor(true),
33
+ numVectors(0),
34
+ vectorNorms(nullptr),
35
+ queries(nullptr),
36
+ queryType(DistanceDataType::F32),
37
+ queriesRowMajor(true),
38
+ numQueries(0),
39
+ outDistances(nullptr),
40
+ ignoreOutDistances(false),
41
+ outIndices(nullptr) {
42
+ }
43
+
44
+ //
45
+ // Search parameters
46
+ //
47
+
48
+ // Search parameter: distance metric
49
+ faiss::MetricType metric;
50
+
51
+ // Search parameter: distance metric argument (if applicable)
52
+ // For metric == METRIC_Lp, this is the p-value
53
+ float metricArg;
54
+
55
+ // Search parameter: return k nearest neighbors
56
+ int k;
57
+
58
+ // Vector dimensionality
59
+ int dims;
60
+
61
+ //
62
+ // Vectors being queried
63
+ //
64
+
65
+ // If vectorsRowMajor is true, this is
66
+ // numVectors x dims, with dims innermost; otherwise,
67
+ // dims x numVectors, with numVectors innermost
68
+ const void* vectors;
69
+ DistanceDataType vectorType;
70
+ bool vectorsRowMajor;
71
+ int numVectors;
72
+
73
+ // Precomputed L2 norms for each vector in `vectors`, which can be optionally
74
+ // provided in advance to speed computation for METRIC_L2
75
+ const float* vectorNorms;
76
+
77
+ //
78
+ // The query vectors (i.e., find k-nearest neighbors in `vectors` for each of
79
+ // the `queries`
80
+ //
81
+
82
+ // If queriesRowMajor is true, this is
83
+ // numQueries x dims, with dims innermost; otherwise,
84
+ // dims x numQueries, with numQueries innermost
85
+ const void* queries;
86
+ DistanceDataType queryType;
87
+ bool queriesRowMajor;
88
+ int numQueries;
89
+
90
+ //
91
+ // Output results
92
+ //
93
+
94
+ // A region of memory size numQueries x k, with k
95
+ // innermost (row major)
96
+ float* outDistances;
97
+
98
+ // Do we only care abouty the indices reported, rather than the output
99
+ // distances?
100
+ bool ignoreOutDistances;
101
+
102
+ // A region of memory size numQueries x k, with k
103
+ // innermost (row major)
104
+ faiss::Index::idx_t* outIndices;
105
+ };
106
+
17
107
  /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
18
108
  /// neighbor searches on an externally-provided region of memory (e.g., from a
19
109
  /// pytorch tensor).
@@ -26,6 +116,9 @@ class GpuResources;
26
116
  ///
27
117
  /// For each vector in `queries`, searches all of `vectors` to find its k
28
118
  /// nearest neighbors with respect to the given metric
119
+ void bfKnn(GpuResources* resources, const GpuDistanceParams& args);
120
+
121
+ /// Deprecated legacy implementation
29
122
  void bruteForceKnn(GpuResources* resources,
30
123
  faiss::MetricType metric,
31
124
  // If vectorsRowMajor is true, this is
@@ -35,6 +35,7 @@ class GpuIndex : public faiss::Index {
35
35
  GpuIndex(GpuResources* resources,
36
36
  int dims,
37
37
  faiss::MetricType metric,
38
+ float metricArg,
38
39
  GpuIndexConfig config);
39
40
 
40
41
  inline int getDevice() const {
@@ -86,6 +87,12 @@ class GpuIndex : public faiss::Index {
86
87
  const Index::idx_t* keys) const override;
87
88
 
88
89
  protected:
90
+ /// Copy what we need from the CPU equivalent
91
+ void copyFrom(const faiss::Index* index);
92
+
93
+ /// Copy what we have to the CPU equivalent
94
+ void copyTo(faiss::Index* index) const;
95
+
89
96
  /// Does addImpl_ require IDs? If so, and no IDs are provided, we will
90
97
  /// generate them sequentially based on the order in which the IDs are added
91
98
  virtual bool addImplRequiresIDs_() const = 0;
@@ -25,18 +25,12 @@ struct FlatIndex;
25
25
  struct GpuIndexFlatConfig : public GpuIndexConfig {
26
26
  inline GpuIndexFlatConfig()
27
27
  : useFloat16(false),
28
- useFloat16Accumulator(false),
29
28
  storeTransposed(false) {
30
29
  }
31
30
 
32
31
  /// Whether or not data is stored as float16
33
32
  bool useFloat16;
34
33
 
35
- /// Whether or not all math is performed in float16, if useFloat16 is
36
- /// specified. If true, we use cublasHgemm, supported only on CC
37
- /// 5.3+. Otherwise, we use cublasSgemmEx.
38
- bool useFloat16Accumulator;
39
-
40
34
  /// Whether or not data is stored (transparently) in a transposed
41
35
  /// layout, enabling use of the NN GEMM call, which is ~10% faster.
42
36
  /// This will improve the speed of the flat index, but will
@@ -124,10 +118,6 @@ class GpuIndexFlat : public GpuIndex {
124
118
  float* distances,
125
119
  faiss::Index::idx_t* labels) const override;
126
120
 
127
- private:
128
- /// Checks user settings for consistency
129
- void verifySettings_() const;
130
-
131
121
  protected:
132
122
  /// Our config object
133
123
  const GpuIndexFlatConfig config_;
@@ -37,6 +37,7 @@ class GpuIndexIVF : public GpuIndex {
37
37
  GpuIndexIVF(GpuResources* resources,
38
38
  int dims,
39
39
  faiss::MetricType metric,
40
+ float metricArg,
40
41
  int nlist,
41
42
  GpuIndexIVFConfig config = GpuIndexIVFConfig());
42
43
 
@@ -7,6 +7,7 @@
7
7
 
8
8
 
9
9
  #include <faiss/gpu/StandardGpuResources.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
10
11
  #include <faiss/gpu/utils/MemorySpace.h>
11
12
  #include <faiss/impl/FaissAssert.h>
12
13
  #include <limits>
@@ -247,6 +248,13 @@ StandardGpuResources::initializeForDevice(int device) {
247
248
  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
248
249
  blasHandles_[device] = blasHandle;
249
250
 
251
+ // Enable tensor core support if available
252
+ #if CUDA_VERSION >= 9000
253
+ if (getTensorCoreSupport(device)) {
254
+ cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
255
+ }
256
+ #endif
257
+
250
258
  FAISS_ASSERT(memory_.count(device) == 0);
251
259
 
252
260
  auto mem = std::unique_ptr<StackDeviceMemory>(
@@ -21,7 +21,8 @@ constexpr float kF32MaxRelErr = 6e-3f;
21
21
 
22
22
  struct TestFlatOptions {
23
23
  TestFlatOptions()
24
- : useL2(true),
24
+ : metric(faiss::MetricType::METRIC_L2),
25
+ metricArg(0),
25
26
  useFloat16(false),
26
27
  useTransposed(false),
27
28
  numVecsOverride(-1),
@@ -30,7 +31,9 @@ struct TestFlatOptions {
30
31
  dimOverride(-1) {
31
32
  }
32
33
 
33
- bool useL2;
34
+ faiss::MetricType metric;
35
+ float metricArg;
36
+
34
37
  bool useFloat16;
35
38
  bool useTransposed;
36
39
  int numVecsOverride;
@@ -41,7 +44,7 @@ struct TestFlatOptions {
41
44
 
42
45
  void testFlat(const TestFlatOptions& opt) {
43
46
  int numVecs = opt.numVecsOverride > 0 ?
44
- opt.numVecsOverride : faiss::gpu::randVal(1000, 20000);
47
+ opt.numVecsOverride : faiss::gpu::randVal(1000, 5000);
45
48
  int dim = opt.dimOverride > 0 ?
46
49
  opt.dimOverride : faiss::gpu::randVal(50, 800);
47
50
  int numQuery = opt.numQueriesOverride > 0 ?
@@ -57,12 +60,8 @@ void testFlat(const TestFlatOptions& opt) {
57
60
  k = opt.kOverride;
58
61
  }
59
62
 
60
- faiss::IndexFlatIP cpuIndexIP(dim);
61
- faiss::IndexFlatL2 cpuIndexL2(dim);
62
-
63
- faiss::IndexFlat* cpuIndex =
64
- opt.useL2 ? (faiss::IndexFlat*) &cpuIndexL2 :
65
- (faiss::IndexFlat*) &cpuIndexIP;
63
+ faiss::IndexFlat cpuIndex(dim, opt.metric);
64
+ cpuIndex.metric_arg = opt.metricArg;
66
65
 
67
66
  // Construct on a random device to test multi-device, if we have
68
67
  // multiple devices
@@ -71,25 +70,22 @@ void testFlat(const TestFlatOptions& opt) {
71
70
  faiss::gpu::StandardGpuResources res;
72
71
  res.noTempMemory();
73
72
 
74
-
75
73
  faiss::gpu::GpuIndexFlatConfig config;
76
74
  config.device = device;
77
75
  config.useFloat16 = opt.useFloat16;
78
76
  config.storeTransposed = opt.useTransposed;
79
77
 
80
- faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);
81
- faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
82
-
83
- faiss::gpu::GpuIndexFlat* gpuIndex =
84
- opt.useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
85
- (faiss::gpu::GpuIndexFlat*) &gpuIndexIP;
78
+ faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
79
+ gpuIndex.metric_arg = opt.metricArg;
86
80
 
87
81
  std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
88
- cpuIndex->add(numVecs, vecs.data());
89
- gpuIndex->add(numVecs, vecs.data());
82
+ cpuIndex.add(numVecs, vecs.data());
83
+ gpuIndex.add(numVecs, vecs.data());
90
84
 
91
85
  std::stringstream str;
92
- str << (opt.useL2 ? "L2" : "IP") << " numVecs " << numVecs
86
+ str << "metric " << opt.metric
87
+ << " marg " << opt.metricArg
88
+ << " numVecs " << numVecs
93
89
  << " dim " << dim
94
90
  << " useFloat16 " << opt.useFloat16
95
91
  << " transposed " << opt.useTransposed
@@ -98,7 +94,7 @@ void testFlat(const TestFlatOptions& opt) {
98
94
 
99
95
  // To some extent, we depend upon the relative error for the test
100
96
  // for float16
101
- faiss::gpu::compareIndices(*cpuIndex, *gpuIndex, numQuery, dim, k, str.str(),
97
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex, numQuery, dim, k, str.str(),
102
98
  opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
103
99
  // FIXME: the fp16 bounds are
104
100
  // useless when math (the accumulator) is
@@ -110,7 +106,7 @@ void testFlat(const TestFlatOptions& opt) {
110
106
  TEST(TestGpuIndexFlat, IP_Float32) {
111
107
  for (int tries = 0; tries < 3; ++tries) {
112
108
  TestFlatOptions opt;
113
- opt.useL2 = false;
109
+ opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
114
110
  opt.useFloat16 = false;
115
111
  opt.useTransposed = false;
116
112
 
@@ -121,10 +117,36 @@ TEST(TestGpuIndexFlat, IP_Float32) {
121
117
  }
122
118
  }
123
119
 
120
+ TEST(TestGpuIndexFlat, L1_Float32) {
121
+ TestFlatOptions opt;
122
+ opt.metric = faiss::MetricType::METRIC_L1;
123
+ opt.useFloat16 = false;
124
+ opt.useTransposed = false;
125
+
126
+ testFlat(opt);
127
+
128
+ opt.useTransposed = true;
129
+ testFlat(opt);
130
+ }
131
+
132
+ TEST(TestGpuIndexFlat, Lp_Float32) {
133
+ TestFlatOptions opt;
134
+ opt.metric = faiss::MetricType::METRIC_Lp;
135
+ opt.metricArg = 5;
136
+ opt.useFloat16 = false;
137
+ opt.useTransposed = false;
138
+
139
+ testFlat(opt);
140
+
141
+ // Don't bother testing the transposed version, the L1 test should be good
142
+ // enough for that
143
+ }
144
+
124
145
  TEST(TestGpuIndexFlat, L2_Float32) {
125
146
  for (int tries = 0; tries < 3; ++tries) {
126
147
  TestFlatOptions opt;
127
- opt.useL2 = true;
148
+ opt.metric = faiss::MetricType::METRIC_L2;
149
+
128
150
  opt.useFloat16 = false;
129
151
  opt.useTransposed = false;
130
152
 
@@ -139,7 +161,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
139
161
  TEST(TestGpuIndexFlat, L2_Float32_K1) {
140
162
  for (int tries = 0; tries < 3; ++tries) {
141
163
  TestFlatOptions opt;
142
- opt.useL2 = true;
164
+ opt.metric = faiss::MetricType::METRIC_L2;
143
165
  opt.useFloat16 = false;
144
166
  opt.useTransposed = false;
145
167
  opt.kOverride = 1;
@@ -151,7 +173,7 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
151
173
  TEST(TestGpuIndexFlat, IP_Float16) {
152
174
  for (int tries = 0; tries < 3; ++tries) {
153
175
  TestFlatOptions opt;
154
- opt.useL2 = false;
176
+ opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
155
177
  opt.useFloat16 = true;
156
178
  opt.useTransposed = false;
157
179
 
@@ -165,7 +187,7 @@ TEST(TestGpuIndexFlat, IP_Float16) {
165
187
  TEST(TestGpuIndexFlat, L2_Float16) {
166
188
  for (int tries = 0; tries < 3; ++tries) {
167
189
  TestFlatOptions opt;
168
- opt.useL2 = true;
190
+ opt.metric = faiss::MetricType::METRIC_L2;
169
191
  opt.useFloat16 = true;
170
192
  opt.useTransposed = false;
171
193
 
@@ -180,7 +202,7 @@ TEST(TestGpuIndexFlat, L2_Float16) {
180
202
  TEST(TestGpuIndexFlat, L2_Float16_K1) {
181
203
  for (int tries = 0; tries < 3; ++tries) {
182
204
  TestFlatOptions opt;
183
- opt.useL2 = true;
205
+ opt.metric = faiss::MetricType::METRIC_L2;
184
206
  opt.useFloat16 = true;
185
207
  opt.useTransposed = false;
186
208
  opt.kOverride = 1;
@@ -193,7 +215,7 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
193
215
  TEST(TestGpuIndexFlat, L2_Tiling) {
194
216
  for (int tries = 0; tries < 2; ++tries) {
195
217
  TestFlatOptions opt;
196
- opt.useL2 = true;
218
+ opt.metric = faiss::MetricType::METRIC_L2;
197
219
  opt.useFloat16 = false;
198
220
  opt.useTransposed = false;
199
221
  opt.numVecsOverride = 1000000;
@@ -117,7 +117,7 @@ struct Options {
117
117
  int device;
118
118
  };
119
119
 
120
- TEST(TestGpuIndexIVFPQ, Query) {
120
+ TEST(TestGpuIndexIVFPQ, Query_L2) {
121
121
  for (int tries = 0; tries < 2; ++tries) {
122
122
  Options opt;
123
123
 
@@ -151,7 +151,78 @@ TEST(TestGpuIndexIVFPQ, Query) {
151
151
  }
152
152
  }
153
153
 
154
- TEST(TestGpuIndexIVFPQ, Add) {
154
+ TEST(TestGpuIndexIVFPQ, Query_IP) {
155
+ for (int tries = 0; tries < 2; ++tries) {
156
+ Options opt;
157
+
158
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
159
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
160
+
161
+ faiss::IndexFlatIP coarseQuantizer(opt.dim);
162
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
163
+ opt.codes, opt.bitsPerCode);
164
+ cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
165
+
166
+ cpuIndex.nprobe = opt.nprobe;
167
+ cpuIndex.train(opt.numTrain, trainVecs.data());
168
+ cpuIndex.add(opt.numAdd, addVecs.data());
169
+
170
+ faiss::gpu::StandardGpuResources res;
171
+ res.noTempMemory();
172
+
173
+ faiss::gpu::GpuIndexIVFPQConfig config;
174
+ config.device = opt.device;
175
+ config.usePrecomputedTables = false; // not supported/required for IP
176
+ config.indicesOptions = opt.indicesOpt;
177
+ config.useFloat16LookupTables = opt.useFloat16;
178
+
179
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
180
+ gpuIndex.setNumProbes(opt.nprobe);
181
+
182
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
183
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
184
+ opt.getCompareEpsilon(),
185
+ opt.getPctMaxDiff1(),
186
+ opt.getPctMaxDiffN());
187
+ }
188
+ }
189
+
190
+ TEST(TestGpuIndexIVFPQ, Float16Coarse) {
191
+ Options opt;
192
+
193
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
194
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
195
+
196
+ faiss::IndexFlatL2 coarseQuantizer(opt.dim);
197
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
198
+ opt.codes, opt.bitsPerCode);
199
+ cpuIndex.nprobe = opt.nprobe;
200
+ cpuIndex.train(opt.numTrain, trainVecs.data());
201
+
202
+ faiss::gpu::StandardGpuResources res;
203
+ res.noTempMemory();
204
+
205
+ faiss::gpu::GpuIndexIVFPQConfig config;
206
+ config.device = opt.device;
207
+ config.flatConfig.useFloat16 = true;
208
+ config.usePrecomputedTables = opt.usePrecomputed;
209
+ config.indicesOptions = opt.indicesOpt;
210
+ config.useFloat16LookupTables = opt.useFloat16;
211
+
212
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
213
+ gpuIndex.setNumProbes(opt.nprobe);
214
+
215
+ gpuIndex.add(opt.numAdd, addVecs.data());
216
+ cpuIndex.add(opt.numAdd, addVecs.data());
217
+
218
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
219
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
220
+ opt.getCompareEpsilon(),
221
+ opt.getPctMaxDiff1(),
222
+ opt.getPctMaxDiffN());
223
+ }
224
+
225
+ TEST(TestGpuIndexIVFPQ, Add_L2) {
155
226
  for (int tries = 0; tries < 2; ++tries) {
156
227
  Options opt;
157
228
 
@@ -187,6 +258,43 @@ TEST(TestGpuIndexIVFPQ, Add) {
187
258
  }
188
259
  }
189
260
 
261
+ TEST(TestGpuIndexIVFPQ, Add_IP) {
262
+ for (int tries = 0; tries < 2; ++tries) {
263
+ Options opt;
264
+
265
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
266
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
267
+
268
+ faiss::IndexFlatIP coarseQuantizer(opt.dim);
269
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
270
+ opt.codes, opt.bitsPerCode);
271
+ cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
272
+ cpuIndex.nprobe = opt.nprobe;
273
+ cpuIndex.train(opt.numTrain, trainVecs.data());
274
+
275
+ faiss::gpu::StandardGpuResources res;
276
+ res.noTempMemory();
277
+
278
+ faiss::gpu::GpuIndexIVFPQConfig config;
279
+ config.device = opt.device;
280
+ config.usePrecomputedTables = opt.usePrecomputed;
281
+ config.indicesOptions = opt.indicesOpt;
282
+ config.useFloat16LookupTables = opt.useFloat16;
283
+
284
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
285
+ gpuIndex.setNumProbes(opt.nprobe);
286
+
287
+ gpuIndex.add(opt.numAdd, addVecs.data());
288
+ cpuIndex.add(opt.numAdd, addVecs.data());
289
+
290
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
291
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
292
+ opt.getCompareEpsilon(),
293
+ opt.getPctMaxDiff1(),
294
+ opt.getPctMaxDiffN());
295
+ }
296
+ }
297
+
190
298
  TEST(TestGpuIndexIVFPQ, CopyTo) {
191
299
  Options opt;
192
300
  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);