RubyGems - faiss - Versions diffs - 0.1.3 → 0.1.4 - Mend

faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +36 -33
data/vendor/faiss/faiss/AutoTune.h +6 -3
data/vendor/faiss/faiss/Clustering.cpp +16 -12
data/vendor/faiss/faiss/Index.cpp +3 -4
data/vendor/faiss/faiss/Index.h +3 -3
data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
data/vendor/faiss/faiss/IndexBinary.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
data/vendor/faiss/faiss/IndexFlat.h +0 -51
data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
data/vendor/faiss/faiss/IndexIVF.h +22 -15
data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
data/vendor/faiss/faiss/IndexRefine.h +73 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
data/vendor/faiss/faiss/impl/io.cpp +33 -2
data/vendor/faiss/faiss/impl/io.h +7 -2
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
data/vendor/faiss/faiss/index_factory.cpp +112 -7
data/vendor/faiss/faiss/index_io.h +1 -48
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
data/vendor/faiss/faiss/utils/Heap.h +61 -50
data/vendor/faiss/faiss/utils/distances.cpp +164 -319
data/vendor/faiss/faiss/utils/distances.h +28 -20
data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
data/vendor/faiss/faiss/utils/hamming.h +2 -7
data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
data/vendor/faiss/faiss/utils/partitioning.h +69 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
data/vendor/faiss/faiss/utils/simdlib.h +31 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
metadata +43 -141
data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
data/vendor/faiss/c_api/AutoTune_c.h +0 -66
data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
data/vendor/faiss/c_api/Clustering_c.h +0 -123
data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
data/vendor/faiss/c_api/IndexShards_c.h +0 -39
data/vendor/faiss/c_api/Index_c.cpp +0 -105
data/vendor/faiss/c_api/Index_c.h +0 -183
data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
data/vendor/faiss/c_api/clone_index_c.h +0 -32
data/vendor/faiss/c_api/error_c.h +0 -42
data/vendor/faiss/c_api/error_impl.cpp +0 -27
data/vendor/faiss/c_api/error_impl.h +0 -16
data/vendor/faiss/c_api/faiss_c.h +0 -58
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
data/vendor/faiss/c_api/index_factory_c.h +0 -30
data/vendor/faiss/c_api/index_io_c.cpp +0 -42
data/vendor/faiss/c_api/index_io_c.h +0 -50
data/vendor/faiss/c_api/macros_impl.h +0 -110
data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
data/vendor/faiss/misc/test_blas.cpp +0 -87
data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
data/vendor/faiss/tests/test_merge.cpp +0 -260
data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
data/vendor/faiss/tests/test_params_override.cpp +0 -236
data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104

data/vendor/faiss/faiss/IndexScalarQuantizer.cpp CHANGED Viewed

@@ -192,7 +192,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
     size_t coarse_size = include_listnos ? coarse_code_size () : 0;
     memset(codes, 0, (code_size + coarse_size) * n);
-#pragma omp parallel if(n > 1)
+#pragma omp parallel if(n > 1000)
     {
         std::vector<float> residual (d);
@@ -222,7 +222,7 @@ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
     std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
     size_t coarse_size = coarse_code_size ();
-#pragma omp parallel if(n > 1)
+#pragma omp parallel if(n > 1000)
     {
         std::vector<float> residual (d);

data/vendor/faiss/faiss/IndexScalarQuantizer.h CHANGED Viewed

@@ -82,7 +82,7 @@ struct IndexScalarQuantizer: Index {
  /** An IVF implementation where the components of the residuals are
- * encoded with a scalar uniform quantizer. All distance computations
+ * encoded with a scalar quantizer. All distance computations
  * are asymmetric, so the encoded vectors are decoded and approximate
  * distances are computed.
  */

data/vendor/faiss/faiss/gpu/GpuDistance.h CHANGED Viewed

@@ -148,6 +148,6 @@ void bruteForceKnn(GpuResourcesProvider* resources,
                    float* outDistances,
                    // A region of memory size numQueries x k, with k
                    // innermost (row major)
-                   faiss::Index::idx_t* outIndices);
+                   Index::idx_t* outIndices);
 } } // namespace

data/vendor/faiss/faiss/gpu/GpuIndex.h CHANGED Viewed

@@ -36,9 +36,12 @@ class GpuIndex : public faiss::Index {
            float metricArg,
            GpuIndexConfig config);
-  inline int getDevice() const {
-    return device_;
-  }
+  /// Returns the device that this index is resident on
+  int getDevice() const;
+  /// Returns a reference to our GpuResources object that manages memory, stream
+  /// and handle resources on the GPU
+  std::shared_ptr<GpuResources> getResources();
   /// Set the minimum data size for searches (in MiB) for which we use
   /// CPU -> GPU paging
@@ -50,7 +53,7 @@ class GpuIndex : public faiss::Index {
   /// `x` can be resident on the CPU or any GPU; copies are performed
   /// as needed
   /// Handles paged adds if the add set is too large; calls addInternal_
-  void add(faiss::Index::idx_t, const float* x) override;
+  void add(Index::idx_t, const float* x) override;
   /// `x` and `ids` can be resident on the CPU or any GPU; copies are
   /// performed as needed
@@ -59,6 +62,13 @@ class GpuIndex : public faiss::Index {
                     const float* x,
                     const Index::idx_t* ids) override;
+  /// `x` and `labels` can be resident on the CPU or any GPU; copies are
+  /// performed as needed
+  void assign(Index::idx_t n,
+              const float* x,
+              Index::idx_t* labels,
+              Index::idx_t k = 1) const override;
   /// `x`, `distances` and `labels` can be resident on the CPU or any
   /// GPU; copies are performed as needed
   void search(Index::idx_t n,
@@ -136,11 +146,8 @@ private:
   /// Manages streams, cuBLAS handles and scratch memory for devices
   std::shared_ptr<GpuResources> resources_;
-  /// The GPU device we are resident on
-  const int device_;
-  /// The memory space of our primary storage on the GPU
-  const MemorySpace memorySpace_;
+  /// Our configuration options
+  const GpuIndexConfig config_;
   /// Size above which we page copies from the CPU to GPU
   size_t minPagedSize_;

data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h CHANGED Viewed

@@ -38,6 +38,13 @@ class GpuIndexBinaryFlat : public IndexBinary {
   ~GpuIndexBinaryFlat() override;
+  /// Returns the device that this index is resident on
+  int getDevice() const;
+  /// Returns a reference to our GpuResources object that manages memory, stream
+  /// and handle resources on the GPU
+  std::shared_ptr<GpuResources> getResources();
   /// Initialize ourselves from the given CPU index; will overwrite
   /// all data in ourselves
   void copyFrom(const faiss::IndexBinaryFlat* index);
@@ -80,7 +87,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
   std::shared_ptr<GpuResources> resources_;
   /// Configuration options
-  GpuIndexBinaryFlatConfig config_;
+  const GpuIndexBinaryFlatConfig binaryFlatConfig_;
   /// Holds our GPU data containing the list of vectors
   std::unique_ptr<BinaryFlatIndex> data_;

data/vendor/faiss/faiss/gpu/GpuIndexFlat.h CHANGED Viewed

@@ -21,7 +21,7 @@ struct IndexFlatIP;
 namespace faiss { namespace gpu {
-struct FlatIndex;
+class FlatIndex;
 struct GpuIndexFlatConfig : public GpuIndexConfig {
   inline GpuIndexFlatConfig()
@@ -87,27 +87,27 @@ class GpuIndexFlat : public GpuIndex {
   void train(Index::idx_t n, const float* x) override;
   /// Overrides to avoid excessive copies
-  void add(faiss::Index::idx_t, const float* x) override;
+  void add(Index::idx_t, const float* x) override;
   /// Reconstruction methods; prefer the batch reconstruct as it will
   /// be more efficient
-  void reconstruct(faiss::Index::idx_t key, float* out) const override;
+  void reconstruct(Index::idx_t key, float* out) const override;
   /// Batch reconstruction method
-  void reconstruct_n(faiss::Index::idx_t i0,
-                     faiss::Index::idx_t num,
+  void reconstruct_n(Index::idx_t i0,
+                     Index::idx_t num,
                      float* out) const override;
   /// Compute residual
   void compute_residual(const float* x,
                         float* residual,
-                        faiss::Index::idx_t key) const override;
+                        Index::idx_t key) const override;
   /// Compute residual (batch mode)
-  void compute_residual_n(faiss::Index::idx_t n,
+  void compute_residual_n(Index::idx_t n,
                           const float* xs,
                           float* residuals,
-                          const faiss::Index::idx_t* keys) const override;
+                          const Index::idx_t* keys) const override;
   /// For internal access
   inline FlatIndex* getGpuData() { return data_.get(); }
@@ -126,11 +126,11 @@ class GpuIndexFlat : public GpuIndex {
                    const float* x,
                    int k,
                    float* distances,
-                   faiss::Index::idx_t* labels) const override;
+                   Index::idx_t* labels) const override;
  protected:
-  /// Our config object
-  const GpuIndexFlatConfig config_;
+  /// Our configuration options
+  const GpuIndexFlatConfig flatConfig_;
   /// Holds our GPU data containing the list of vectors
   std::unique_ptr<FlatIndex> data_;

data/vendor/faiss/faiss/gpu/GpuIndexIVF.h CHANGED Viewed

@@ -56,6 +56,22 @@ class GpuIndexIVF : public GpuIndex {
   /// Returns the number of inverted lists we're managing
   int getNumLists() const;
+  /// Returns the number of vectors present in a particular inverted list
+  virtual int getListLength(int listId) const = 0;
+  /// Return the encoded vector data contained in a particular inverted list,
+  /// for debugging purposes.
+  /// If gpuFormat is true, the data is returned as it is encoded in the
+  /// GPU-side representation.
+  /// Otherwise, it is converted to the CPU format.
+  /// compliant format, while the native GPU format may differ.
+  virtual std::vector<uint8_t>
+  getListVectorData(int listId, bool gpuFormat = false) const = 0;
+  /// Return the vector indices contained in a particular inverted list, for
+  /// debugging purposes.
+  virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
   /// Return the quantizer we're using
   GpuIndexFlat* getQuantizer();
@@ -67,7 +83,7 @@ class GpuIndexIVF : public GpuIndex {
  protected:
   bool addImplRequiresIDs_() const override;
-  void trainQuantizer_(faiss::Index::idx_t n, const float* x);
+  void trainQuantizer_(Index::idx_t n, const float* x);
  public:
   /// Exposing this like the CPU version for manipulation
@@ -83,7 +99,8 @@ class GpuIndexIVF : public GpuIndex {
   GpuIndexFlat* quantizer;
  protected:
-  GpuIndexIVFConfig ivfConfig_;
+  /// Our configuration options
+  const GpuIndexIVFConfig ivfConfig_;
 };
 } } // namespace

data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h CHANGED Viewed

@@ -19,6 +19,13 @@ class IVFFlat;
 class GpuIndexFlat;
 struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
+  inline GpuIndexIVFFlatConfig()
+      : interleavedLayout(true) {
+  }
+  /// Use the alternative memory layout for the IVF lists
+  /// (currently the default)
+  bool interleavedLayout;
 };
 /// Wrapper around the GPU implementation that looks like
@@ -56,10 +63,28 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
   /// to exactly the amount needed. Returns space reclaimed in bytes
   size_t reclaimMemory();
+  /// Clears out all inverted lists, but retains the coarse centroid information
   void reset() override;
+  /// Trains the coarse quantizer based on the given vector data
   void train(Index::idx_t n, const float* x) override;
+  /// Returns the number of vectors present in a particular inverted list
+  int getListLength(int listId) const override;
+  /// Return the encoded vector data contained in a particular inverted list,
+  /// for debugging purposes.
+  /// If gpuFormat is true, the data is returned as it is encoded in the
+  /// GPU-side representation.
+  /// Otherwise, it is converted to the CPU format.
+  /// compliant format, while the native GPU format may differ.
+  std::vector<uint8_t>
+  getListVectorData(int listId, bool gpuFormat = false) const override;
+  /// Return the vector indices contained in a particular inverted list, for
+  /// debugging purposes.
+  std::vector<Index::idx_t> getListIndices(int listId) const override;
  protected:
   /// Called from GpuIndex for add/add_with_ids
   void addImpl_(int n,
@@ -73,8 +98,9 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
                    float* distances,
                    Index::idx_t* labels) const override;
- private:
-  GpuIndexIVFFlatConfig ivfFlatConfig_;
+ protected:
+  /// Our configuration options
+  const GpuIndexIVFFlatConfig ivfFlatConfig_;
   /// Desired inverted list memory reservation
   size_t reserveMemoryVecs_;

data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h CHANGED Viewed

@@ -23,7 +23,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
   inline GpuIndexIVFPQConfig()
       : useFloat16LookupTables(false),
         usePrecomputedTables(false),
-        alternativeLayout(false),
+        interleavedLayout(false),
         useMMCodeDistance(false) {
   }
@@ -38,7 +38,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
   /// Use the alternative memory layout for the IVF lists
   /// WARNING: this is a feature under development, do not use!
-  bool alternativeLayout;
+  bool interleavedLayout;
   /// Use GEMM-backed computation of PQ code distances for the no precomputed
   /// table version of IVFPQ.
@@ -108,19 +108,24 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
   /// product centroid information
   void reset() override;
+  /// Trains the coarse and product quantizer based on the given vector data
   void train(Index::idx_t n, const float* x) override;
-  /// For debugging purposes, return the list length of a particular
-  /// list
-  int getListLength(int listId) const;
+  /// Returns the number of vectors present in a particular inverted list
+  int getListLength(int listId) const override;
-  /// For debugging purposes, return the list codes of a particular
-  /// list
-  std::vector<unsigned char> getListCodes(int listId) const;
+  /// Return the encoded vector data contained in a particular inverted list,
+  /// for debugging purposes.
+  /// If gpuFormat is true, the data is returned as it is encoded in the
+  /// GPU-side representation.
+  /// Otherwise, it is converted to the CPU format.
+  /// compliant format, while the native GPU format may differ.
+  std::vector<uint8_t>
+  getListVectorData(int listId, bool gpuFormat = false) const override;
-  /// For debugging purposes, return the list indices of a particular
-  /// list
-  std::vector<long> getListIndices(int listId) const;
+  /// Return the vector indices contained in a particular inverted list, for
+  /// debugging purposes.
+  std::vector<Index::idx_t> getListIndices(int listId) const override;
  protected:
   /// Called from GpuIndex for add/add_with_ids
@@ -135,13 +140,18 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
                    float* distances,
                    Index::idx_t* labels) const override;
- private:
+  /// Throws errors if configuration settings are improper
   void verifySettings_() const;
+  /// Trains the PQ quantizer based on the given vector data
   void trainResidualQuantizer_(Index::idx_t n, const float* x);
- private:
-  GpuIndexIVFPQConfig ivfpqConfig_;
+ protected:
+  /// Our configuration options that we were initialized with
+  const GpuIndexIVFPQConfig ivfpqConfig_;
+  /// Runtime override: whether or not we use precomputed tables
+  bool usePrecomputedTables_;
   /// Number of sub-quantizers per encoded vector
   int subQuantizers_;

data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h CHANGED Viewed

@@ -18,6 +18,13 @@ class IVFFlat;
 class GpuIndexFlat;
 struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
+  inline GpuIndexIVFScalarQuantizerConfig()
+      : interleavedLayout(true) {
+  }
+  /// Use the alternative memory layout for the IVF lists
+  /// (currently the default)
+  bool interleavedLayout;
 };
 /// Wrapper around the GPU implementation that looks like
@@ -61,10 +68,29 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
   /// to exactly the amount needed. Returns space reclaimed in bytes
   size_t reclaimMemory();
+  /// Clears out all inverted lists, but retains the coarse and scalar quantizer
+  /// information
   void reset() override;
+  /// Trains the coarse and scalar quantizer based on the given vector data
   void train(Index::idx_t n, const float* x) override;
+  /// Returns the number of vectors present in a particular inverted list
+  int getListLength(int listId) const override;
+  /// Return the encoded vector data contained in a particular inverted list,
+  /// for debugging purposes.
+  /// If gpuFormat is true, the data is returned as it is encoded in the
+  /// GPU-side representation.
+  /// Otherwise, it is converted to the CPU format.
+  /// compliant format, while the native GPU format may differ.
+  std::vector<uint8_t>
+  getListVectorData(int listId, bool gpuFormat = false) const override;
+  /// Return the vector indices contained in a particular inverted list, for
+  /// debugging purposes.
+  std::vector<Index::idx_t> getListIndices(int listId) const override;
  protected:
   /// Called from GpuIndex for add/add_with_ids
   void addImpl_(int n,
@@ -88,8 +114,9 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
   /// Exposed like the CPU version
   bool by_residual;
- private:
-  GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
+protected:
+  /// Our configuration options
+  const GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
   /// Desired inverted list memory reservation
   size_t reserveMemoryVecs_;

data/vendor/faiss/faiss/gpu/GpuResources.h CHANGED Viewed

@@ -198,6 +198,10 @@ class GpuResources {
   /// given device
   virtual cudaStream_t getDefaultStream(int device) = 0;
+  /// Overrides the default stream for a device to the user-supplied stream. The
+  /// resources object does not own this stream (i.e., it will not destroy it).
+  virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
   /// Returns the set of alternative streams that we use for the given device
   virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;

data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp CHANGED Viewed

@@ -101,12 +101,8 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
   for (auto& entry : defaultStreams_) {
     DeviceScope scope(entry.first);
-    auto it = userDefaultStreams_.find(entry.first);
-    if (it == userDefaultStreams_.end()) {
-      // The user did not specify this stream, thus we are the ones
-      // who have created it
-      CUDA_VERIFY(cudaStreamDestroy(entry.second));
-    }
+    // We created these streams, so are responsible for destroying them
+    CUDA_VERIFY(cudaStreamDestroy(entry.second));
   }
   for (auto& entry : alternateStreams_) {
@@ -210,16 +206,47 @@ StandardGpuResourcesImpl::setPinnedMemory(size_t size) {
 void
 StandardGpuResourcesImpl::setDefaultStream(int device, cudaStream_t stream) {
-  auto it = defaultStreams_.find(device);
-  if (it != defaultStreams_.end()) {
-    // Replace this stream with the user stream
-    CUDA_VERIFY(cudaStreamDestroy(it->second));
-    it->second = stream;
+  if (isInitialized(device)) {
+     // A new series of calls may not be ordered with what was the previous
+     // stream, so if the stream being specified is different, then we need to
+     // ensure ordering between the two (new stream waits on old).
+    auto it = userDefaultStreams_.find(device);
+    cudaStream_t prevStream = nullptr;
+    if (it != userDefaultStreams_.end()) {
+      prevStream = it->second;
+    } else {
+      FAISS_ASSERT(defaultStreams_.count(device));
+      prevStream = defaultStreams_[device];
+    }
+    if (prevStream != stream) {
+      streamWait({stream}, {prevStream});
+    }
   }
   userDefaultStreams_[device] = stream;
 }
+void
+StandardGpuResourcesImpl::revertDefaultStream(int device) {
+  if (isInitialized(device)) {
+    auto it = userDefaultStreams_.find(device);
+    if (it != userDefaultStreams_.end()) {
+      // There was a user stream set that we need to synchronize against
+      cudaStream_t prevStream = userDefaultStreams_[device];
+      FAISS_ASSERT(defaultStreams_.count(device));
+      cudaStream_t newStream = defaultStreams_[device];
+      streamWait({newStream}, {prevStream});
+    }
+  }
+  userDefaultStreams_.erase(device);
+}
 void
 StandardGpuResourcesImpl::setDefaultNullStreamAllDevices() {
   for (int dev = 0; dev < getNumDevices(); ++dev) {
@@ -274,14 +301,8 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
   // Create streams
   cudaStream_t defaultStream = 0;
-  auto it = userDefaultStreams_.find(device);
-  if (it != userDefaultStreams_.end()) {
-    // We already have a stream provided by the user
-    defaultStream = it->second;
-  } else {
-    CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
-                                          cudaStreamNonBlocking));
-  }
+  CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
+                                        cudaStreamNonBlocking));
   defaultStreams_[device] = defaultStream;
@@ -308,15 +329,14 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
   FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
   blasHandles_[device] = blasHandle;
-  // Enable tensor core support if available
-#if CUDA_VERSION >= 9000 && CUDA_VERSION < 11000
-  // This flag was deprecated in CUDA 11
-  if (getTensorCoreSupport(device)) {
-    cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
-  }
-#endif
+  // For CUDA 10 on V100, enabling tensor core usage would enable automatic
+  // rounding down of inputs to f16 (though accumulate in f32) which results in
+  // unacceptable loss of precision in general.
+  // For CUDA 11 / A100, only enable tensor core support if it doesn't result in
+  // a loss of precision.
 #if CUDA_VERSION >= 11000
-  cublasSetMathMode(blasHandle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
+  cublasSetMathMode(blasHandle,
+                    CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
 #endif
   FAISS_ASSERT(allocs_.count(device) == 0);
@@ -341,6 +361,14 @@ StandardGpuResourcesImpl::getBlasHandle(int device) {
 cudaStream_t
 StandardGpuResourcesImpl::getDefaultStream(int device) {
   initializeForDevice(device);
+  auto it = userDefaultStreams_.find(device);
+  if (it != userDefaultStreams_.end()) {
+    // There is a user override stream set
+    return it->second;
+  }
+  // Otherwise, our base default stream
   return defaultStreams_[device];
 }
@@ -539,6 +567,11 @@ StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
   res_->setDefaultStream(device, stream);
 }
+void
+StandardGpuResources::revertDefaultStream(int device) {
+  res_->revertDefaultStream(device);
+}
 void
 StandardGpuResources::setDefaultNullStreamAllDevices() {
   res_->setDefaultNullStreamAllDevices();