RubyGems - faiss - Versions diffs - 0.1.3 → 0.2.0 - Mend

faiss 0.1.3 → 0.2.0

Files changed (199) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +25 -0
data/LICENSE.txt +1 -1
data/README.md +16 -4
data/ext/faiss/ext.cpp +12 -308
data/ext/faiss/extconf.rb +6 -3
data/ext/faiss/index.cpp +189 -0
data/ext/faiss/index_binary.cpp +75 -0
data/ext/faiss/kmeans.cpp +40 -0
data/ext/faiss/numo.hpp +867 -0
data/ext/faiss/pca_matrix.cpp +33 -0
data/ext/faiss/product_quantizer.cpp +53 -0
data/ext/faiss/utils.cpp +13 -0
data/ext/faiss/utils.h +5 -0
data/lib/faiss.rb +0 -5
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +36 -33
data/vendor/faiss/faiss/AutoTune.h +6 -3
data/vendor/faiss/faiss/Clustering.cpp +16 -12
data/vendor/faiss/faiss/Index.cpp +3 -4
data/vendor/faiss/faiss/Index.h +3 -3
data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
data/vendor/faiss/faiss/IndexBinary.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
data/vendor/faiss/faiss/IndexFlat.h +0 -51
data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
data/vendor/faiss/faiss/IndexIVF.h +22 -15
data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
data/vendor/faiss/faiss/IndexRefine.h +73 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
data/vendor/faiss/faiss/impl/io.cpp +33 -2
data/vendor/faiss/faiss/impl/io.h +7 -2
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
data/vendor/faiss/faiss/index_factory.cpp +112 -7
data/vendor/faiss/faiss/index_io.h +1 -48
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
data/vendor/faiss/faiss/utils/Heap.h +61 -50
data/vendor/faiss/faiss/utils/distances.cpp +164 -319
data/vendor/faiss/faiss/utils/distances.h +28 -20
data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
data/vendor/faiss/faiss/utils/hamming.h +2 -7
data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
data/vendor/faiss/faiss/utils/partitioning.h +69 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
data/vendor/faiss/faiss/utils/simdlib.h +31 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
metadata +54 -149
data/lib/faiss/index.rb +0 -20
data/lib/faiss/index_binary.rb +0 -20
data/lib/faiss/kmeans.rb +0 -15
data/lib/faiss/pca_matrix.rb +0 -15
data/lib/faiss/product_quantizer.rb +0 -22
data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
data/vendor/faiss/c_api/AutoTune_c.h +0 -66
data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
data/vendor/faiss/c_api/Clustering_c.h +0 -123
data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
data/vendor/faiss/c_api/IndexShards_c.h +0 -39
data/vendor/faiss/c_api/Index_c.cpp +0 -105
data/vendor/faiss/c_api/Index_c.h +0 -183
data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
data/vendor/faiss/c_api/clone_index_c.h +0 -32
data/vendor/faiss/c_api/error_c.h +0 -42
data/vendor/faiss/c_api/error_impl.cpp +0 -27
data/vendor/faiss/c_api/error_impl.h +0 -16
data/vendor/faiss/c_api/faiss_c.h +0 -58
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
data/vendor/faiss/c_api/index_factory_c.h +0 -30
data/vendor/faiss/c_api/index_io_c.cpp +0 -42
data/vendor/faiss/c_api/index_io_c.h +0 -50
data/vendor/faiss/c_api/macros_impl.h +0 -110
data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
data/vendor/faiss/misc/test_blas.cpp +0 -87
data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
data/vendor/faiss/tests/test_merge.cpp +0 -260
data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
data/vendor/faiss/tests/test_params_override.cpp +0 -236
data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104

data/vendor/faiss/faiss/IndexScalarQuantizer.cpp CHANGED Viewed

@@ -192,7 +192,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
     size_t coarse_size = include_listnos ? coarse_code_size () : 0;
     memset(codes, 0, (code_size + coarse_size) * n);
-#pragma omp parallel if(n > 1)
+#pragma omp parallel if(n > 1000)
     {
         std::vector<float> residual (d);
@@ -222,7 +222,7 @@ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
     std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
     size_t coarse_size = coarse_code_size ();
-#pragma omp parallel if(n > 1)
+#pragma omp parallel if(n > 1000)
     {
         std::vector<float> residual (d);

data/vendor/faiss/faiss/IndexScalarQuantizer.h CHANGED Viewed

@@ -82,7 +82,7 @@ struct IndexScalarQuantizer: Index {
  /** An IVF implementation where the components of the residuals are
- * encoded with a scalar uniform quantizer. All distance computations
+ * encoded with a scalar quantizer. All distance computations
  * are asymmetric, so the encoded vectors are decoded and approximate
  * distances are computed.
  */

data/vendor/faiss/faiss/gpu/GpuDistance.h CHANGED Viewed

@@ -148,6 +148,6 @@ void bruteForceKnn(GpuResourcesProvider* resources,
                    float* outDistances,
                    // A region of memory size numQueries x k, with k
                    // innermost (row major)
-                   faiss::Index::idx_t* outIndices);
+                   Index::idx_t* outIndices);
 } } // namespace

data/vendor/faiss/faiss/gpu/GpuIndex.h CHANGED Viewed

@@ -36,9 +36,12 @@ class GpuIndex : public faiss::Index {
            float metricArg,
            GpuIndexConfig config);
-  inline int getDevice() const {
-    return device_;
-  }
+  /// Returns the device that this index is resident on
+  int getDevice() const;
+  /// Returns a reference to our GpuResources object that manages memory, stream
+  /// and handle resources on the GPU
+  std::shared_ptr<GpuResources> getResources();
   /// Set the minimum data size for searches (in MiB) for which we use
   /// CPU -> GPU paging
@@ -50,7 +53,7 @@ class GpuIndex : public faiss::Index {
   /// `x` can be resident on the CPU or any GPU; copies are performed
   /// as needed
   /// Handles paged adds if the add set is too large; calls addInternal_
-  void add(faiss::Index::idx_t, const float* x) override;
+  void add(Index::idx_t, const float* x) override;
   /// `x` and `ids` can be resident on the CPU or any GPU; copies are
   /// performed as needed
@@ -59,6 +62,13 @@ class GpuIndex : public faiss::Index {
                     const float* x,
                     const Index::idx_t* ids) override;
+  /// `x` and `labels` can be resident on the CPU or any GPU; copies are
+  /// performed as needed
+  void assign(Index::idx_t n,
+              const float* x,
+              Index::idx_t* labels,
+              Index::idx_t k = 1) const override;
   /// `x`, `distances` and `labels` can be resident on the CPU or any
   /// GPU; copies are performed as needed
   void search(Index::idx_t n,
@@ -136,11 +146,8 @@ private:
   /// Manages streams, cuBLAS handles and scratch memory for devices
   std::shared_ptr<GpuResources> resources_;
-  /// The GPU device we are resident on
-  const int device_;
-  /// The memory space of our primary storage on the GPU
-  const MemorySpace memorySpace_;
+  /// Our configuration options
+  const GpuIndexConfig config_;
   /// Size above which we page copies from the CPU to GPU
   size_t minPagedSize_;

data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h CHANGED Viewed

@@ -38,6 +38,13 @@ class GpuIndexBinaryFlat : public IndexBinary {
   ~GpuIndexBinaryFlat() override;
+  /// Returns the device that this index is resident on
+  int getDevice() const;
+  /// Returns a reference to our GpuResources object that manages memory, stream
+  /// and handle resources on the GPU
+  std::shared_ptr<GpuResources> getResources();
   /// Initialize ourselves from the given CPU index; will overwrite
   /// all data in ourselves
   void copyFrom(const faiss::IndexBinaryFlat* index);
@@ -80,7 +87,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
   std::shared_ptr<GpuResources> resources_;
   /// Configuration options
-  GpuIndexBinaryFlatConfig config_;
+  const GpuIndexBinaryFlatConfig binaryFlatConfig_;
   /// Holds our GPU data containing the list of vectors
   std::unique_ptr<BinaryFlatIndex> data_;

data/vendor/faiss/faiss/gpu/GpuIndexFlat.h CHANGED Viewed

@@ -21,7 +21,7 @@ struct IndexFlatIP;
 namespace faiss { namespace gpu {
-struct FlatIndex;
+class FlatIndex;
 struct GpuIndexFlatConfig : public GpuIndexConfig {
   inline GpuIndexFlatConfig()
@@ -87,27 +87,27 @@ class GpuIndexFlat : public GpuIndex {
   void train(Index::idx_t n, const float* x) override;
   /// Overrides to avoid excessive copies
-  void add(faiss::Index::idx_t, const float* x) override;
+  void add(Index::idx_t, const float* x) override;
   /// Reconstruction methods; prefer the batch reconstruct as it will
   /// be more efficient
-  void reconstruct(faiss::Index::idx_t key, float* out) const override;
+  void reconstruct(Index::idx_t key, float* out) const override;
   /// Batch reconstruction method
-  void reconstruct_n(faiss::Index::idx_t i0,
-                     faiss::Index::idx_t num,
+  void reconstruct_n(Index::idx_t i0,
+                     Index::idx_t num,
                      float* out) const override;
   /// Compute residual
   void compute_residual(const float* x,
                         float* residual,
-                        faiss::Index::idx_t key) const override;
+                        Index::idx_t key) const override;
   /// Compute residual (batch mode)
-  void compute_residual_n(faiss::Index::idx_t n,
+  void compute_residual_n(Index::idx_t n,
                           const float* xs,
                           float* residuals,
-                          const faiss::Index::idx_t* keys) const override;
+                          const Index::idx_t* keys) const override;
   /// For internal access
   inline FlatIndex* getGpuData() { return data_.get(); }
@@ -126,11 +126,11 @@ class GpuIndexFlat : public GpuIndex {
                    const float* x,
                    int k,
                    float* distances,
-                   faiss::Index::idx_t* labels) const override;
+                   Index::idx_t* labels) const override;
  protected:
-  /// Our config object
-  const GpuIndexFlatConfig config_;
+  /// Our configuration options
+  const GpuIndexFlatConfig flatConfig_;
   /// Holds our GPU data containing the list of vectors
   std::unique_ptr<FlatIndex> data_;

data/vendor/faiss/faiss/gpu/GpuIndexIVF.h CHANGED Viewed

@@ -56,6 +56,22 @@ class GpuIndexIVF : public GpuIndex {
   /// Returns the number of inverted lists we're managing
   int getNumLists() const;
+  /// Returns the number of vectors present in a particular inverted list
+  virtual int getListLength(int listId) const = 0;
+  /// Return the encoded vector data contained in a particular inverted list,
+  /// for debugging purposes.
+  /// If gpuFormat is true, the data is returned as it is encoded in the
+  /// GPU-side representation.
+  /// Otherwise, it is converted to the CPU format.
+  /// compliant format, while the native GPU format may differ.
+  virtual std::vector<uint8_t>
+  getListVectorData(int listId, bool gpuFormat = false) const = 0;
+  /// Return the vector indices contained in a particular inverted list, for
+  /// debugging purposes.
+  virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
   /// Return the quantizer we're using
   GpuIndexFlat* getQuantizer();
@@ -67,7 +83,7 @@ class GpuIndexIVF : public GpuIndex {
  protected:
   bool addImplRequiresIDs_() const override;
-  void trainQuantizer_(faiss::Index::idx_t n, const float* x);
+  void trainQuantizer_(Index::idx_t n, const float* x);
  public:
   /// Exposing this like the CPU version for manipulation
@@ -83,7 +99,8 @@ class GpuIndexIVF : public GpuIndex {
   GpuIndexFlat* quantizer;
  protected:
-  GpuIndexIVFConfig ivfConfig_;
+  /// Our configuration options
+  const GpuIndexIVFConfig ivfConfig_;
 };
 } } // namespace

data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h CHANGED Viewed

@@ -19,6 +19,13 @@ class IVFFlat;
 class GpuIndexFlat;
 struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
+  inline GpuIndexIVFFlatConfig()
+      : interleavedLayout(true) {
+  }
+  /// Use the alternative memory layout for the IVF lists
+  /// (currently the default)
+  bool interleavedLayout;
 };
 /// Wrapper around the GPU implementation that looks like
@@ -56,10 +63,28 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
   /// to exactly the amount needed. Returns space reclaimed in bytes
   size_t reclaimMemory();
+  /// Clears out all inverted lists, but retains the coarse centroid information
   void reset() override;
+  /// Trains the coarse quantizer based on the given vector data
   void train(Index::idx_t n, const float* x) override;
+  /// Returns the number of vectors present in a particular inverted list
+  int getListLength(int listId) const override;
+  /// Return the encoded vector data contained in a particular inverted list,
+  /// for debugging purposes.
+  /// If gpuFormat is true, the data is returned as it is encoded in the
+  /// GPU-side representation.
+  /// Otherwise, it is converted to the CPU format.
+  /// compliant format, while the native GPU format may differ.
+  std::vector<uint8_t>
+  getListVectorData(int listId, bool gpuFormat = false) const override;
+  /// Return the vector indices contained in a particular inverted list, for
+  /// debugging purposes.
+  std::vector<Index::idx_t> getListIndices(int listId) const override;
  protected:
   /// Called from GpuIndex for add/add_with_ids
   void addImpl_(int n,
@@ -73,8 +98,9 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
                    float* distances,
                    Index::idx_t* labels) const override;
- private:
-  GpuIndexIVFFlatConfig ivfFlatConfig_;
+ protected:
+  /// Our configuration options
+  const GpuIndexIVFFlatConfig ivfFlatConfig_;
   /// Desired inverted list memory reservation
   size_t reserveMemoryVecs_;

data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h CHANGED Viewed

@@ -23,7 +23,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
   inline GpuIndexIVFPQConfig()
       : useFloat16LookupTables(false),
         usePrecomputedTables(false),
-        alternativeLayout(false),
+        interleavedLayout(false),
         useMMCodeDistance(false) {
   }
@@ -38,7 +38,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
   /// Use the alternative memory layout for the IVF lists
   /// WARNING: this is a feature under development, do not use!
-  bool alternativeLayout;
+  bool interleavedLayout;
   /// Use GEMM-backed computation of PQ code distances for the no precomputed
   /// table version of IVFPQ.
@@ -108,19 +108,24 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
   /// product centroid information
   void reset() override;
+  /// Trains the coarse and product quantizer based on the given vector data
   void train(Index::idx_t n, const float* x) override;
-  /// For debugging purposes, return the list length of a particular
-  /// list
-  int getListLength(int listId) const;
+  /// Returns the number of vectors present in a particular inverted list
+  int getListLength(int listId) const override;
-  /// For debugging purposes, return the list codes of a particular
-  /// list
-  std::vector<unsigned char> getListCodes(int listId) const;
+  /// Return the encoded vector data contained in a particular inverted list,
+  /// for debugging purposes.
+  /// If gpuFormat is true, the data is returned as it is encoded in the
+  /// GPU-side representation.
+  /// Otherwise, it is converted to the CPU format.
+  /// compliant format, while the native GPU format may differ.
+  std::vector<uint8_t>
+  getListVectorData(int listId, bool gpuFormat = false) const override;
-  /// For debugging purposes, return the list indices of a particular
-  /// list
-  std::vector<long> getListIndices(int listId) const;
+  /// Return the vector indices contained in a particular inverted list, for
+  /// debugging purposes.
+  std::vector<Index::idx_t> getListIndices(int listId) const override;
  protected:
   /// Called from GpuIndex for add/add_with_ids
@@ -135,13 +140,18 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
                    float* distances,
                    Index::idx_t* labels) const override;
- private:
+  /// Throws errors if configuration settings are improper
   void verifySettings_() const;
+  /// Trains the PQ quantizer based on the given vector data
   void trainResidualQuantizer_(Index::idx_t n, const float* x);
- private:
-  GpuIndexIVFPQConfig ivfpqConfig_;
+ protected:
+  /// Our configuration options that we were initialized with
+  const GpuIndexIVFPQConfig ivfpqConfig_;
+  /// Runtime override: whether or not we use precomputed tables
+  bool usePrecomputedTables_;
   /// Number of sub-quantizers per encoded vector
   int subQuantizers_;

data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h CHANGED Viewed

@@ -18,6 +18,13 @@ class IVFFlat;
 class GpuIndexFlat;
 struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
+  inline GpuIndexIVFScalarQuantizerConfig()
+      : interleavedLayout(true) {
+  }
+  /// Use the alternative memory layout for the IVF lists
+  /// (currently the default)
+  bool interleavedLayout;
 };
 /// Wrapper around the GPU implementation that looks like
@@ -61,10 +68,29 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
   /// to exactly the amount needed. Returns space reclaimed in bytes
   size_t reclaimMemory();
+  /// Clears out all inverted lists, but retains the coarse and scalar quantizer
+  /// information
   void reset() override;
+  /// Trains the coarse and scalar quantizer based on the given vector data
   void train(Index::idx_t n, const float* x) override;
+  /// Returns the number of vectors present in a particular inverted list
+  int getListLength(int listId) const override;
+  /// Return the encoded vector data contained in a particular inverted list,
+  /// for debugging purposes.
+  /// If gpuFormat is true, the data is returned as it is encoded in the
+  /// GPU-side representation.
+  /// Otherwise, it is converted to the CPU format.
+  /// compliant format, while the native GPU format may differ.
+  std::vector<uint8_t>
+  getListVectorData(int listId, bool gpuFormat = false) const override;
+  /// Return the vector indices contained in a particular inverted list, for
+  /// debugging purposes.
+  std::vector<Index::idx_t> getListIndices(int listId) const override;
  protected:
   /// Called from GpuIndex for add/add_with_ids
   void addImpl_(int n,
@@ -88,8 +114,9 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
   /// Exposed like the CPU version
   bool by_residual;
- private:
-  GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
+protected:
+  /// Our configuration options
+  const GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
   /// Desired inverted list memory reservation
   size_t reserveMemoryVecs_;

data/vendor/faiss/faiss/gpu/GpuResources.h CHANGED Viewed

@@ -198,6 +198,10 @@ class GpuResources {
   /// given device
   virtual cudaStream_t getDefaultStream(int device) = 0;
+  /// Overrides the default stream for a device to the user-supplied stream. The
+  /// resources object does not own this stream (i.e., it will not destroy it).
+  virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
   /// Returns the set of alternative streams that we use for the given device
   virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;

data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp CHANGED Viewed

@@ -101,12 +101,8 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
   for (auto& entry : defaultStreams_) {
     DeviceScope scope(entry.first);
-    auto it = userDefaultStreams_.find(entry.first);
-    if (it == userDefaultStreams_.end()) {
-      // The user did not specify this stream, thus we are the ones
-      // who have created it
-      CUDA_VERIFY(cudaStreamDestroy(entry.second));
-    }
+    // We created these streams, so are responsible for destroying them
+    CUDA_VERIFY(cudaStreamDestroy(entry.second));
   }
   for (auto& entry : alternateStreams_) {
@@ -210,16 +206,47 @@ StandardGpuResourcesImpl::setPinnedMemory(size_t size) {
 void
 StandardGpuResourcesImpl::setDefaultStream(int device, cudaStream_t stream) {
-  auto it = defaultStreams_.find(device);
-  if (it != defaultStreams_.end()) {
-    // Replace this stream with the user stream
-    CUDA_VERIFY(cudaStreamDestroy(it->second));
-    it->second = stream;
+  if (isInitialized(device)) {
+     // A new series of calls may not be ordered with what was the previous
+     // stream, so if the stream being specified is different, then we need to
+     // ensure ordering between the two (new stream waits on old).
+    auto it = userDefaultStreams_.find(device);
+    cudaStream_t prevStream = nullptr;
+    if (it != userDefaultStreams_.end()) {
+      prevStream = it->second;
+    } else {
+      FAISS_ASSERT(defaultStreams_.count(device));
+      prevStream = defaultStreams_[device];
+    }
+    if (prevStream != stream) {
+      streamWait({stream}, {prevStream});
+    }
   }
   userDefaultStreams_[device] = stream;
 }
+void
+StandardGpuResourcesImpl::revertDefaultStream(int device) {
+  if (isInitialized(device)) {
+    auto it = userDefaultStreams_.find(device);
+    if (it != userDefaultStreams_.end()) {
+      // There was a user stream set that we need to synchronize against
+      cudaStream_t prevStream = userDefaultStreams_[device];
+      FAISS_ASSERT(defaultStreams_.count(device));
+      cudaStream_t newStream = defaultStreams_[device];
+      streamWait({newStream}, {prevStream});
+    }
+  }
+  userDefaultStreams_.erase(device);
+}
 void
 StandardGpuResourcesImpl::setDefaultNullStreamAllDevices() {
   for (int dev = 0; dev < getNumDevices(); ++dev) {
@@ -274,14 +301,8 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
   // Create streams
   cudaStream_t defaultStream = 0;
-  auto it = userDefaultStreams_.find(device);
-  if (it != userDefaultStreams_.end()) {
-    // We already have a stream provided by the user
-    defaultStream = it->second;
-  } else {
-    CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
-                                          cudaStreamNonBlocking));
-  }
+  CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
+                                        cudaStreamNonBlocking));
   defaultStreams_[device] = defaultStream;
@@ -308,15 +329,14 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
   FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
   blasHandles_[device] = blasHandle;
-  // Enable tensor core support if available
-#if CUDA_VERSION >= 9000 && CUDA_VERSION < 11000
-  // This flag was deprecated in CUDA 11
-  if (getTensorCoreSupport(device)) {
-    cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
-  }
-#endif
+  // For CUDA 10 on V100, enabling tensor core usage would enable automatic
+  // rounding down of inputs to f16 (though accumulate in f32) which results in
+  // unacceptable loss of precision in general.
+  // For CUDA 11 / A100, only enable tensor core support if it doesn't result in
+  // a loss of precision.
 #if CUDA_VERSION >= 11000
-  cublasSetMathMode(blasHandle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
+  cublasSetMathMode(blasHandle,
+                    CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
 #endif
   FAISS_ASSERT(allocs_.count(device) == 0);
@@ -341,6 +361,14 @@ StandardGpuResourcesImpl::getBlasHandle(int device) {
 cudaStream_t
 StandardGpuResourcesImpl::getDefaultStream(int device) {
   initializeForDevice(device);
+  auto it = userDefaultStreams_.find(device);
+  if (it != userDefaultStreams_.end()) {
+    // There is a user override stream set
+    return it->second;
+  }
+  // Otherwise, our base default stream
   return defaultStreams_[device];
 }
@@ -539,6 +567,11 @@ StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
   res_->setDefaultStream(device, stream);
 }
+void
+StandardGpuResources::revertDefaultStream(int device) {
+  res_->revertDefaultStream(device);
+}
 void
 StandardGpuResources::setDefaultNullStreamAllDevices() {
   res_->setDefaultNullStreamAllDevices();