RubyGems - faiss - Versions diffs - 0.1.1 → 0.1.2 - Mend

faiss 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +18 -18
data/README.md +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/Clustering.cpp +318 -53
data/vendor/faiss/Clustering.h +39 -11
data/vendor/faiss/DirectMap.cpp +267 -0
data/vendor/faiss/DirectMap.h +120 -0
data/vendor/faiss/IVFlib.cpp +24 -4
data/vendor/faiss/IVFlib.h +4 -0
data/vendor/faiss/Index.h +5 -24
data/vendor/faiss/Index2Layer.cpp +0 -1
data/vendor/faiss/IndexBinary.h +7 -3
data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
data/vendor/faiss/IndexBinaryFlat.h +3 -0
data/vendor/faiss/IndexBinaryHash.cpp +492 -0
data/vendor/faiss/IndexBinaryHash.h +116 -0
data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
data/vendor/faiss/IndexBinaryIVF.h +14 -4
data/vendor/faiss/IndexFlat.h +2 -1
data/vendor/faiss/IndexHNSW.cpp +68 -16
data/vendor/faiss/IndexHNSW.h +3 -3
data/vendor/faiss/IndexIVF.cpp +72 -76
data/vendor/faiss/IndexIVF.h +24 -5
data/vendor/faiss/IndexIVFFlat.cpp +19 -54
data/vendor/faiss/IndexIVFFlat.h +1 -11
data/vendor/faiss/IndexIVFPQ.cpp +49 -26
data/vendor/faiss/IndexIVFPQ.h +9 -10
data/vendor/faiss/IndexIVFPQR.cpp +2 -2
data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
data/vendor/faiss/IndexLSH.h +4 -1
data/vendor/faiss/IndexPreTransform.cpp +0 -1
data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
data/vendor/faiss/InvertedLists.cpp +0 -2
data/vendor/faiss/MetaIndexes.cpp +0 -1
data/vendor/faiss/MetricType.h +36 -0
data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
data/vendor/faiss/c_api/Clustering_c.h +11 -5
data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
data/vendor/faiss/gpu/GpuDistance.h +93 -0
data/vendor/faiss/gpu/GpuIndex.h +7 -0
data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
data/vendor/faiss/impl/HNSW.cpp +0 -1
data/vendor/faiss/impl/PolysemousTraining.h +5 -5
data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
data/vendor/faiss/impl/ProductQuantizer.h +42 -47
data/vendor/faiss/impl/index_read.cpp +103 -7
data/vendor/faiss/impl/index_write.cpp +101 -5
data/vendor/faiss/impl/io.cpp +111 -1
data/vendor/faiss/impl/io.h +38 -0
data/vendor/faiss/index_factory.cpp +0 -1
data/vendor/faiss/tests/test_merge.cpp +0 -1
data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
data/vendor/faiss/utils/distances.cpp +4 -5
data/vendor/faiss/utils/distances_simd.cpp +0 -1
data/vendor/faiss/utils/hamming.cpp +85 -3
data/vendor/faiss/utils/hamming.h +20 -0
data/vendor/faiss/utils/utils.cpp +0 -96
data/vendor/faiss/utils/utils.h +0 -15
metadata +11 -3
data/lib/faiss/ext.bundle +0 -0

data/vendor/faiss/gpu/GpuCloner.cpp CHANGED

@@ -300,6 +300,7 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
                        index_ivfflat->quantizer, index->d,
                        index_ivfflat->nlist, index_ivfflat->metric_type);
             idx2.nprobe = index_ivfflat->nprobe;
+            idx2.is_trained = index->is_trained;
             copy_ivf_shard (index_ivfflat, &idx2, n, i);
             shards[i] = sub_cloners[i].clone_Index(&idx2);
         } else if (index_ivfsq) {
@@ -308,7 +309,10 @@ Index * ToGpuClonerMultiple::clone_Index_to_shards (const Index *index)
                        index_ivfsq->sq.qtype,
                        index_ivfsq->metric_type,
                        index_ivfsq->by_residual);
             idx2.nprobe = index_ivfsq->nprobe;
+            idx2.is_trained = index->is_trained;
+            idx2.sq = index_ivfsq->sq;
             copy_ivf_shard (index_ivfsq, &idx2, n, i);
             shards[i] = sub_cloners[i].clone_Index(&idx2);
         } else if (index_flat) {

data/vendor/faiss/gpu/GpuClonerOptions.cpp CHANGED

@@ -13,7 +13,7 @@ GpuClonerOptions::GpuClonerOptions()
     : indicesOptions(INDICES_64_BIT),
       useFloat16CoarseQuantizer(false),
       useFloat16(false),
-      usePrecomputed(true),
+      usePrecomputed(false),
       reserveVecs(0),
       storeTransposed(false),
       verbose(false) {

data/vendor/faiss/gpu/GpuDistance.h CHANGED

@@ -14,6 +14,96 @@ namespace faiss { namespace gpu {
 class GpuResources;
+// Scalar type of the vector data
+enum class DistanceDataType {
+  F32 = 1,
+  F16,
+};
+/// Arguments to brute-force GPU k-nearest neighbor searching
+struct GpuDistanceParams {
+  GpuDistanceParams()
+      : metric(faiss::MetricType::METRIC_L2),
+        metricArg(0),
+        k(0),
+        dims(0),
+        vectors(nullptr),
+        vectorType(DistanceDataType::F32),
+        vectorsRowMajor(true),
+        numVectors(0),
+        vectorNorms(nullptr),
+        queries(nullptr),
+        queryType(DistanceDataType::F32),
+        queriesRowMajor(true),
+        numQueries(0),
+        outDistances(nullptr),
+        ignoreOutDistances(false),
+        outIndices(nullptr) {
+  }
+  //
+  // Search parameters
+  //
+  // Search parameter: distance metric
+  faiss::MetricType metric;
+  // Search parameter: distance metric argument (if applicable)
+  // For metric == METRIC_Lp, this is the p-value
+  float metricArg;
+  // Search parameter: return k nearest neighbors
+  int k;
+  // Vector dimensionality
+  int dims;
+  //
+  // Vectors being queried
+  //
+  // If vectorsRowMajor is true, this is
+  // numVectors x dims, with dims innermost; otherwise,
+  // dims x numVectors, with numVectors innermost
+  const void* vectors;
+  DistanceDataType vectorType;
+  bool vectorsRowMajor;
+  int numVectors;
+  // Precomputed L2 norms for each vector in `vectors`, which can be optionally
+  // provided in advance to speed computation for METRIC_L2
+  const float* vectorNorms;
+  //
+  // The query vectors (i.e., find k-nearest neighbors in `vectors` for each of
+  // the `queries`
+  //
+  // If queriesRowMajor is true, this is
+  // numQueries x dims, with dims innermost; otherwise,
+  // dims x numQueries, with numQueries innermost
+  const void* queries;
+  DistanceDataType queryType;
+  bool queriesRowMajor;
+  int numQueries;
+  //
+  // Output results
+  //
+  // A region of memory size numQueries x k, with k
+  // innermost (row major)
+  float* outDistances;
+  // Do we only care abouty the indices reported, rather than the output
+  // distances?
+  bool ignoreOutDistances;
+  // A region of memory size numQueries x k, with k
+  // innermost (row major)
+  faiss::Index::idx_t* outIndices;
+};
 /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
 /// neighbor searches on an externally-provided region of memory (e.g., from a
 /// pytorch tensor).
@@ -26,6 +116,9 @@ class GpuResources;
 ///
 /// For each vector in `queries`, searches all of `vectors` to find its k
 /// nearest neighbors with respect to the given metric
+void bfKnn(GpuResources* resources, const GpuDistanceParams& args);
+/// Deprecated legacy implementation
 void bruteForceKnn(GpuResources* resources,
                    faiss::MetricType metric,
                    // If vectorsRowMajor is true, this is

data/vendor/faiss/gpu/GpuIndex.h CHANGED

@@ -35,6 +35,7 @@ class GpuIndex : public faiss::Index {
   GpuIndex(GpuResources* resources,
            int dims,
            faiss::MetricType metric,
+           float metricArg,
            GpuIndexConfig config);
   inline int getDevice() const {
@@ -86,6 +87,12 @@ class GpuIndex : public faiss::Index {
                           const Index::idx_t* keys) const override;
  protected:
+  /// Copy what we need from the CPU equivalent
+  void copyFrom(const faiss::Index* index);
+  /// Copy what we have to the CPU equivalent
+  void copyTo(faiss::Index* index) const;
   /// Does addImpl_ require IDs? If so, and no IDs are provided, we will
   /// generate them sequentially based on the order in which the IDs are added
   virtual bool addImplRequiresIDs_() const = 0;

data/vendor/faiss/gpu/GpuIndexFlat.h CHANGED

@@ -25,18 +25,12 @@ struct FlatIndex;
 struct GpuIndexFlatConfig : public GpuIndexConfig {
   inline GpuIndexFlatConfig()
       : useFloat16(false),
-        useFloat16Accumulator(false),
         storeTransposed(false) {
   }
   /// Whether or not data is stored as float16
   bool useFloat16;
-  /// Whether or not all math is performed in float16, if useFloat16 is
-  /// specified. If true, we use cublasHgemm, supported only on CC
-  /// 5.3+. Otherwise, we use cublasSgemmEx.
-  bool useFloat16Accumulator;
   /// Whether or not data is stored (transparently) in a transposed
   /// layout, enabling use of the NN GEMM call, which is ~10% faster.
   /// This will improve the speed of the flat index, but will
@@ -124,10 +118,6 @@ class GpuIndexFlat : public GpuIndex {
                    float* distances,
                    faiss::Index::idx_t* labels) const override;
- private:
-  /// Checks user settings for consistency
-  void verifySettings_() const;
  protected:
   /// Our config object
   const GpuIndexFlatConfig config_;

data/vendor/faiss/gpu/GpuIndexIVF.h CHANGED

@@ -37,6 +37,7 @@ class GpuIndexIVF : public GpuIndex {
   GpuIndexIVF(GpuResources* resources,
               int dims,
               faiss::MetricType metric,
+              float metricArg,
               int nlist,
               GpuIndexIVFConfig config = GpuIndexIVFConfig());

data/vendor/faiss/gpu/StandardGpuResources.cpp CHANGED

@@ -7,6 +7,7 @@
 #include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/utils/DeviceUtils.h>
 #include <faiss/gpu/utils/MemorySpace.h>
 #include <faiss/impl/FaissAssert.h>
 #include <limits>
@@ -247,6 +248,13 @@ StandardGpuResources::initializeForDevice(int device) {
   FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
   blasHandles_[device] = blasHandle;
+  // Enable tensor core support if available
+#if CUDA_VERSION >= 9000
+  if (getTensorCoreSupport(device)) {
+    cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
+  }
+#endif
   FAISS_ASSERT(memory_.count(device) == 0);
   auto mem = std::unique_ptr<StackDeviceMemory>(

data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp CHANGED

@@ -21,7 +21,8 @@ constexpr float kF32MaxRelErr = 6e-3f;
 struct TestFlatOptions {
   TestFlatOptions()
-      : useL2(true),
+      : metric(faiss::MetricType::METRIC_L2),
+        metricArg(0),
         useFloat16(false),
         useTransposed(false),
         numVecsOverride(-1),
@@ -30,7 +31,9 @@ struct TestFlatOptions {
         dimOverride(-1) {
   }
-  bool useL2;
+  faiss::MetricType metric;
+  float metricArg;
   bool useFloat16;
   bool useTransposed;
   int numVecsOverride;
@@ -41,7 +44,7 @@ struct TestFlatOptions {
 void testFlat(const TestFlatOptions& opt) {
   int numVecs = opt.numVecsOverride > 0 ?
-    opt.numVecsOverride : faiss::gpu::randVal(1000, 20000);
+    opt.numVecsOverride : faiss::gpu::randVal(1000, 5000);
   int dim = opt.dimOverride > 0 ?
     opt.dimOverride : faiss::gpu::randVal(50, 800);
   int numQuery = opt.numQueriesOverride > 0 ?
@@ -57,12 +60,8 @@ void testFlat(const TestFlatOptions& opt) {
     k = opt.kOverride;
   }
-  faiss::IndexFlatIP cpuIndexIP(dim);
-  faiss::IndexFlatL2 cpuIndexL2(dim);
-  faiss::IndexFlat* cpuIndex =
-    opt.useL2 ? (faiss::IndexFlat*) &cpuIndexL2 :
-    (faiss::IndexFlat*) &cpuIndexIP;
+  faiss::IndexFlat cpuIndex(dim, opt.metric);
+  cpuIndex.metric_arg = opt.metricArg;
   // Construct on a random device to test multi-device, if we have
   // multiple devices
@@ -71,25 +70,22 @@ void testFlat(const TestFlatOptions& opt) {
   faiss::gpu::StandardGpuResources res;
   res.noTempMemory();
   faiss::gpu::GpuIndexFlatConfig config;
   config.device = device;
   config.useFloat16 = opt.useFloat16;
   config.storeTransposed = opt.useTransposed;
-  faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);
-  faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
-  faiss::gpu::GpuIndexFlat* gpuIndex =
-    opt.useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :
-    (faiss::gpu::GpuIndexFlat*) &gpuIndexIP;
+  faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
+  gpuIndex.metric_arg = opt.metricArg;
   std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
-  cpuIndex->add(numVecs, vecs.data());
-  gpuIndex->add(numVecs, vecs.data());
+  cpuIndex.add(numVecs, vecs.data());
+  gpuIndex.add(numVecs, vecs.data());
   std::stringstream str;
-  str << (opt.useL2 ? "L2" : "IP") << " numVecs " << numVecs
+  str << "metric " << opt.metric
+      << " marg " << opt.metricArg
+      << " numVecs " << numVecs
       << " dim " << dim
       << " useFloat16 " << opt.useFloat16
       << " transposed " << opt.useTransposed
@@ -98,7 +94,7 @@ void testFlat(const TestFlatOptions& opt) {
   // To some extent, we depend upon the relative error for the test
   // for float16
-  faiss::gpu::compareIndices(*cpuIndex, *gpuIndex, numQuery, dim, k, str.str(),
+  faiss::gpu::compareIndices(cpuIndex, gpuIndex, numQuery, dim, k, str.str(),
                              opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
                              // FIXME: the fp16 bounds are
                              // useless when math (the accumulator) is
@@ -110,7 +106,7 @@ void testFlat(const TestFlatOptions& opt) {
 TEST(TestGpuIndexFlat, IP_Float32) {
   for (int tries = 0; tries < 3; ++tries) {
     TestFlatOptions opt;
-    opt.useL2 = false;
+    opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
     opt.useFloat16 = false;
     opt.useTransposed = false;
@@ -121,10 +117,36 @@ TEST(TestGpuIndexFlat, IP_Float32) {
   }
 }
+TEST(TestGpuIndexFlat, L1_Float32) {
+  TestFlatOptions opt;
+  opt.metric = faiss::MetricType::METRIC_L1;
+  opt.useFloat16 = false;
+  opt.useTransposed = false;
+  testFlat(opt);
+  opt.useTransposed = true;
+  testFlat(opt);
+}
+TEST(TestGpuIndexFlat, Lp_Float32) {
+  TestFlatOptions opt;
+  opt.metric = faiss::MetricType::METRIC_Lp;
+  opt.metricArg = 5;
+  opt.useFloat16 = false;
+  opt.useTransposed = false;
+  testFlat(opt);
+  // Don't bother testing the transposed version, the L1 test should be good
+  // enough for that
+}
 TEST(TestGpuIndexFlat, L2_Float32) {
   for (int tries = 0; tries < 3; ++tries) {
     TestFlatOptions opt;
-    opt.useL2 = true;
+    opt.metric = faiss::MetricType::METRIC_L2;
     opt.useFloat16 = false;
     opt.useTransposed = false;
@@ -139,7 +161,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
 TEST(TestGpuIndexFlat, L2_Float32_K1) {
   for (int tries = 0; tries < 3; ++tries) {
     TestFlatOptions opt;
-    opt.useL2 = true;
+    opt.metric = faiss::MetricType::METRIC_L2;
     opt.useFloat16 = false;
     opt.useTransposed = false;
     opt.kOverride = 1;
@@ -151,7 +173,7 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
 TEST(TestGpuIndexFlat, IP_Float16) {
   for (int tries = 0; tries < 3; ++tries) {
     TestFlatOptions opt;
-    opt.useL2 = false;
+    opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
     opt.useFloat16 = true;
     opt.useTransposed = false;
@@ -165,7 +187,7 @@ TEST(TestGpuIndexFlat, IP_Float16) {
 TEST(TestGpuIndexFlat, L2_Float16) {
   for (int tries = 0; tries < 3; ++tries) {
     TestFlatOptions opt;
-    opt.useL2 = true;
+    opt.metric = faiss::MetricType::METRIC_L2;
     opt.useFloat16 = true;
     opt.useTransposed = false;
@@ -180,7 +202,7 @@ TEST(TestGpuIndexFlat, L2_Float16) {
 TEST(TestGpuIndexFlat, L2_Float16_K1) {
   for (int tries = 0; tries < 3; ++tries) {
     TestFlatOptions opt;
-    opt.useL2 = true;
+    opt.metric = faiss::MetricType::METRIC_L2;
     opt.useFloat16 = true;
     opt.useTransposed = false;
     opt.kOverride = 1;
@@ -193,7 +215,7 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
 TEST(TestGpuIndexFlat, L2_Tiling) {
   for (int tries = 0; tries < 2; ++tries) {
     TestFlatOptions opt;
-    opt.useL2 = true;
+    opt.metric = faiss::MetricType::METRIC_L2;
     opt.useFloat16 = false;
     opt.useTransposed = false;
     opt.numVecsOverride = 1000000;

data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp CHANGED

@@ -117,7 +117,7 @@ struct Options {
   int device;
 };
-TEST(TestGpuIndexIVFPQ, Query) {
+TEST(TestGpuIndexIVFPQ, Query_L2) {
   for (int tries = 0; tries < 2; ++tries) {
     Options opt;
@@ -151,7 +151,78 @@ TEST(TestGpuIndexIVFPQ, Query) {
   }
 }
-TEST(TestGpuIndexIVFPQ, Add) {
+TEST(TestGpuIndexIVFPQ, Query_IP) {
+  for (int tries = 0; tries < 2; ++tries) {
+    Options opt;
+    std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
+    std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+    faiss::IndexFlatIP coarseQuantizer(opt.dim);
+    faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
+                               opt.codes, opt.bitsPerCode);
+    cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
+    cpuIndex.nprobe = opt.nprobe;
+    cpuIndex.train(opt.numTrain, trainVecs.data());
+    cpuIndex.add(opt.numAdd, addVecs.data());
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexIVFPQConfig config;
+    config.device = opt.device;
+    config.usePrecomputedTables = false; // not supported/required for IP
+    config.indicesOptions = opt.indicesOpt;
+    config.useFloat16LookupTables = opt.useFloat16;
+    faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
+    gpuIndex.setNumProbes(opt.nprobe);
+    faiss::gpu::compareIndices(cpuIndex, gpuIndex,
+                               opt.numQuery, opt.dim, opt.k, opt.toString(),
+                               opt.getCompareEpsilon(),
+                               opt.getPctMaxDiff1(),
+                               opt.getPctMaxDiffN());
+  }
+}
+TEST(TestGpuIndexIVFPQ, Float16Coarse) {
+  Options opt;
+  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
+  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+  faiss::IndexFlatL2 coarseQuantizer(opt.dim);
+  faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
+                             opt.codes, opt.bitsPerCode);
+  cpuIndex.nprobe = opt.nprobe;
+  cpuIndex.train(opt.numTrain, trainVecs.data());
+  faiss::gpu::StandardGpuResources res;
+  res.noTempMemory();
+  faiss::gpu::GpuIndexIVFPQConfig config;
+  config.device = opt.device;
+  config.flatConfig.useFloat16 = true;
+  config.usePrecomputedTables = opt.usePrecomputed;
+  config.indicesOptions = opt.indicesOpt;
+  config.useFloat16LookupTables = opt.useFloat16;
+  faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
+  gpuIndex.setNumProbes(opt.nprobe);
+  gpuIndex.add(opt.numAdd, addVecs.data());
+  cpuIndex.add(opt.numAdd, addVecs.data());
+  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
+                             opt.numQuery, opt.dim, opt.k, opt.toString(),
+                             opt.getCompareEpsilon(),
+                             opt.getPctMaxDiff1(),
+                             opt.getPctMaxDiffN());
+}
+TEST(TestGpuIndexIVFPQ, Add_L2) {
   for (int tries = 0; tries < 2; ++tries) {
     Options opt;
@@ -187,6 +258,43 @@ TEST(TestGpuIndexIVFPQ, Add) {
   }
 }
+TEST(TestGpuIndexIVFPQ, Add_IP) {
+  for (int tries = 0; tries < 2; ++tries) {
+    Options opt;
+    std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
+    std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+    faiss::IndexFlatIP coarseQuantizer(opt.dim);
+    faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
+                               opt.codes, opt.bitsPerCode);
+    cpuIndex.metric_type = faiss::MetricType::METRIC_INNER_PRODUCT;
+    cpuIndex.nprobe = opt.nprobe;
+    cpuIndex.train(opt.numTrain, trainVecs.data());
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexIVFPQConfig config;
+    config.device = opt.device;
+    config.usePrecomputedTables = opt.usePrecomputed;
+    config.indicesOptions = opt.indicesOpt;
+    config.useFloat16LookupTables = opt.useFloat16;
+    faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
+    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.add(opt.numAdd, addVecs.data());
+    cpuIndex.add(opt.numAdd, addVecs.data());
+    faiss::gpu::compareIndices(cpuIndex, gpuIndex,
+                               opt.numQuery, opt.dim, opt.k, opt.toString(),
+                               opt.getCompareEpsilon(),
+                               opt.getPctMaxDiff1(),
+                               opt.getPctMaxDiffN());
+  }
+}
 TEST(TestGpuIndexIVFPQ, CopyTo) {
   Options opt;
   std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);