RubyGems - faiss - Versions diffs - 0.1.2 → 0.1.3 - Mend

faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h RENAMED

@@ -9,6 +9,7 @@
 #pragma once
 #include <faiss/gpu/GpuIndexIVF.h>
+#include <memory>
 #include <vector>
 namespace faiss { struct IndexIVFPQ; }
@@ -21,7 +22,9 @@ class IVFPQ;
 struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
   inline GpuIndexIVFPQConfig()
       : useFloat16LookupTables(false),
-        usePrecomputedTables(false) {
+        usePrecomputedTables(false),
+        alternativeLayout(false),
+        useMMCodeDistance(false) {
   }
   /// Whether or not float16 residual distance tables are used in the
@@ -32,6 +35,20 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
   /// Whether or not we enable the precomputed table option for
   /// search, which can substantially increase the memory requirement.
   bool usePrecomputedTables;
+  /// Use the alternative memory layout for the IVF lists
+  /// WARNING: this is a feature under development, do not use!
+  bool alternativeLayout;
+  /// Use GEMM-backed computation of PQ code distances for the no precomputed
+  /// table version of IVFPQ.
+  /// This is for debugging purposes, it should not substantially affect the
+  /// results one way for another.
+  ///
+  /// Note that MM code distance is enabled automatically if one uses a number
+  /// of dimensions per sub-quantizer that is not natively specialized (an odd
+  /// number like 7 or so).
+  bool useMMCodeDistance;
 };
 /// IVFPQ index for the GPU
@@ -39,12 +56,12 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
  public:
   /// Construct from a pre-existing faiss::IndexIVFPQ instance, copying
   /// data over to the given GPU, if the input index is trained.
-  GpuIndexIVFPQ(GpuResources* resources,
+  GpuIndexIVFPQ(GpuResourcesProvider* provider,
                 const faiss::IndexIVFPQ* index,
                 GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
   /// Construct an empty index
-  GpuIndexIVFPQ(GpuResources* resources,
+  GpuIndexIVFPQ(GpuResourcesProvider* provider,
                 int dims,
                 int nlist,
                 int subQuantizers,
@@ -137,7 +154,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
   /// The product quantizer instance that we own; contains the
   /// inverted lists
-  IVFPQ* index_;
+  std::unique_ptr<IVFPQ> index_;
 };
 } } // namespace

data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h RENAMED

@@ -10,6 +10,7 @@
 #include <faiss/gpu/GpuIndexIVF.h>
 #include <faiss/IndexScalarQuantizer.h>
+#include <memory>
 namespace faiss { namespace gpu {
@@ -26,7 +27,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
   /// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance,
   /// copying data over to the given GPU, if the input index is trained.
   GpuIndexIVFScalarQuantizer(
-    GpuResources* resources,
+    GpuResourcesProvider* provider,
     const faiss::IndexIVFScalarQuantizer* index,
     GpuIndexIVFScalarQuantizerConfig config =
     GpuIndexIVFScalarQuantizerConfig());
@@ -34,7 +35,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
   /// Constructs a new instance with an empty flat quantizer; the user
   /// provides the number of lists desired.
   GpuIndexIVFScalarQuantizer(
-    GpuResources* resources,
+    GpuResourcesProvider* provider,
     int dims,
     int nlist,
     faiss::ScalarQuantizer::QuantizerType qtype,
@@ -94,7 +95,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
   size_t reserveMemoryVecs_;
   /// Instance that we own; contains the inverted list
-  IVFFlat* index_;
+  std::unique_ptr<IVFFlat> index_;
 };
 } } // namespace

data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h RENAMED

File without changes

data/vendor/faiss/faiss/gpu/GpuResources.cpp ADDED

@@ -0,0 +1,200 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/gpu/GpuResources.h>
+#include <faiss/gpu/utils/DeviceUtils.h>
+#include <sstream>
+namespace faiss { namespace gpu {
+std::string allocTypeToString(AllocType t) {
+  switch (t) {
+    case AllocType::Other:
+      return "Other";
+    case AllocType::FlatData:
+      return "FlatData";
+    case AllocType::IVFLists:
+      return "IVFLists";
+    case AllocType::Quantizer:
+      return "Quantizer";
+    case AllocType::QuantizerPrecomputedCodes:
+      return "QuantizerPrecomputedCodes";
+    case AllocType::TemporaryMemoryBuffer:
+      return "TemporaryMemoryBuffer";
+    case AllocType::TemporaryMemoryOverflow:
+      return "TemporaryMemoryOverflow";
+    default:
+      return "Unknown";
+  }
+}
+std::string memorySpaceToString(MemorySpace s) {
+  switch (s) {
+    case MemorySpace::Temporary:
+      return "Temporary";
+    case MemorySpace::Device:
+      return "Device";
+    case MemorySpace::Unified:
+      return "Unified";
+    default:
+      return "Unknown";
+  }
+}
+std::string
+AllocInfo::toString() const {
+  std::stringstream ss;
+  ss << "type " << allocTypeToString(type)
+     << " dev " << device
+     << " space " << memorySpaceToString(space)
+     << " stream " << (void*) stream;
+  return ss.str();
+}
+std::string
+AllocRequest::toString() const {
+  std::stringstream ss;
+  ss << AllocInfo::toString() << " size " << size << " bytes";
+  return ss.str();
+}
+AllocInfo makeDevAlloc(AllocType at, cudaStream_t st) {
+  return AllocInfo(at, getCurrentDevice(), MemorySpace::Device, st);
+}
+AllocInfo makeTempAlloc(AllocType at, cudaStream_t st) {
+  return AllocInfo(at, getCurrentDevice(), MemorySpace::Temporary, st);
+}
+AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st) {
+  return AllocInfo(at, getCurrentDevice(), sp, st);
+}
+//
+// GpuMemoryReservation
+//
+GpuMemoryReservation::GpuMemoryReservation()
+    : res(nullptr),
+      device(0),
+      stream(nullptr),
+      data(nullptr),
+      size(0) {
+}
+GpuMemoryReservation::GpuMemoryReservation(GpuResources* r,
+                                           int dev,
+                                           cudaStream_t str,
+                                           void* p,
+                                           size_t sz)
+    : res(r),
+      device(dev),
+      stream(str),
+      data(p),
+      size(sz) {
+}
+GpuMemoryReservation::GpuMemoryReservation(GpuMemoryReservation&& m) noexcept {
+  res = m.res; m.res = nullptr;
+  device = m.device; m.device = 0;
+  stream = m.stream; m.stream = nullptr;
+  data = m.data; m.data = nullptr;
+  size = m.size; m.size = 0;
+}
+GpuMemoryReservation&
+GpuMemoryReservation::operator=(GpuMemoryReservation&& m) {
+  // Can't be both a valid allocation and the same allocation
+  FAISS_ASSERT(!(res && res == m.res && device == m.device && data == m.data));
+  release();
+  res = m.res; m.res = nullptr;
+  device = m.device; m.device = 0;
+  stream = m.stream; m.stream = nullptr;
+  data = m.data; m.data = nullptr;
+  size = m.size; m.size = 0;
+  return *this;
+}
+void
+GpuMemoryReservation::release() {
+  if (res) {
+    res->deallocMemory(device, data);
+    res = nullptr;
+    device = 0;
+    stream = nullptr;
+    data = nullptr;
+    size = 0;
+  }
+}
+GpuMemoryReservation::~GpuMemoryReservation() {
+  if (res) {
+    res->deallocMemory(device, data);
+  }
+}
+//
+// GpuResources
+//
+GpuResources::~GpuResources() {
+}
+cublasHandle_t
+GpuResources::getBlasHandleCurrentDevice() {
+  return getBlasHandle(getCurrentDevice());
+}
+cudaStream_t
+GpuResources::getDefaultStreamCurrentDevice() {
+  return getDefaultStream(getCurrentDevice());
+}
+std::vector<cudaStream_t>
+GpuResources::getAlternateStreamsCurrentDevice() {
+  return getAlternateStreams(getCurrentDevice());
+}
+cudaStream_t
+GpuResources::getAsyncCopyStreamCurrentDevice() {
+  return getAsyncCopyStream(getCurrentDevice());
+}
+void
+GpuResources::syncDefaultStream(int device) {
+  CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
+}
+void
+GpuResources::syncDefaultStreamCurrentDevice() {
+  syncDefaultStream(getCurrentDevice());
+}
+GpuMemoryReservation
+GpuResources::allocMemoryHandle(const AllocRequest& req) {
+  return GpuMemoryReservation(
+    this, req.device, req.stream, allocMemory(req), req.size);
+}
+size_t
+GpuResources::getTempMemoryAvailableCurrentDevice() const {
+  return getTempMemoryAvailable(getCurrentDevice());
+}
+//
+// GpuResourcesProvider
+//
+GpuResourcesProvider::~GpuResourcesProvider() {
+}
+} } // namespace

data/vendor/faiss/faiss/gpu/GpuResources.h ADDED

@@ -0,0 +1,264 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <faiss/impl/FaissAssert.h>
+#include <cuda_runtime.h>
+#include <cublas_v2.h>
+#include <memory>
+#include <utility>
+#include <vector>
+namespace faiss { namespace gpu {
+class GpuResources;
+enum AllocType {
+  /// Unknown allocation type or miscellaneous (not currently categorized)
+  Other = 0,
+  /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
+  /// vector norms if needed)
+  FlatData = 1,
+  /// Primary data storage for GpuIndexIVF* (the storage for each individual IVF
+  /// list)
+  IVFLists = 2,
+  /// Quantizer (PQ, SQ) dictionary information
+  Quantizer = 3,
+  /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
+  /// require the use of possibly large tables. These are marked separately from
+  /// Quantizer as these can frequently be 100s - 1000s of MiB in size
+  QuantizerPrecomputedCodes = 4,
+  ///
+  /// StandardGpuResources implementation specific types
+  ///
+  /// When using StandardGpuResources, temporary memory allocations
+  /// (MemorySpace::Temporary) come out of a stack region of memory that is
+  /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization). This
+  /// allocation by StandardGpuResources is marked with this AllocType.
+  TemporaryMemoryBuffer = 10,
+  /// When using StandardGpuResources, any MemorySpace::Temporary allocations
+  /// that cannot be satisfied within the TemporaryMemoryBuffer region fall back
+  /// to calling cudaMalloc which are sized to just the request at hand. These
+  /// "overflow" temporary allocations are marked with this AllocType.
+  TemporaryMemoryOverflow = 11,
+};
+/// Convert an AllocType to string
+std::string allocTypeToString(AllocType t);
+/// Memory regions accessible to the GPU
+enum MemorySpace {
+  /// Temporary device memory (guaranteed to no longer be used upon exit of a
+  /// top-level index call, and where the streams using it have completed GPU
+  /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
+  Temporary = 0,
+  /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
+  Device = 1,
+  /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU memory)
+  Unified = 2,
+};
+/// Convert a MemorySpace to string
+std::string memorySpaceToString(MemorySpace s);
+/// Information on what/where an allocation is
+struct AllocInfo {
+  inline AllocInfo()
+      : type(AllocType::Other),
+        device(0),
+        space(MemorySpace::Device),
+        stream(nullptr) {
+  }
+  inline AllocInfo(AllocType at,
+                   int dev,
+                   MemorySpace sp,
+                   cudaStream_t st)
+      : type(at),
+        device(dev),
+        space(sp),
+        stream(st) {
+  }
+  /// Returns a string representation of this info
+  std::string toString() const;
+  /// The internal category of the allocation
+  AllocType type;
+  /// The device on which the allocation is happening
+  int device;
+  /// The memory space of the allocation
+  MemorySpace space;
+  /// The stream on which new work on the memory will be ordered (e.g., if a
+  /// piece of memory cached and to be returned for this call was last used on
+  /// stream 3 and a new memory request is for stream 4, the memory manager will
+  /// synchronize stream 4 to wait for the completion of stream 3 via events or
+  /// other stream synchronization.
+  ///
+  /// The memory manager guarantees that the returned memory is free to use
+  /// without data races on this stream specified.
+  cudaStream_t stream;
+};
+/// Create an AllocInfo for the current device with MemorySpace::Device
+AllocInfo makeDevAlloc(AllocType at, cudaStream_t st);
+/// Create an AllocInfo for the current device with MemorySpace::Temporary
+AllocInfo makeTempAlloc(AllocType at, cudaStream_t st);
+/// Create an AllocInfo for the current device
+AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
+/// Information on what/where an allocation is, along with how big it should be
+struct AllocRequest : public AllocInfo {
+  inline AllocRequest()
+      : AllocInfo(),
+        size(0) {
+  }
+  inline AllocRequest(const AllocInfo& info,
+                      size_t sz)
+      : AllocInfo(info),
+        size(sz) {
+  }
+  inline AllocRequest(AllocType at,
+                      int dev,
+                      MemorySpace sp,
+                      cudaStream_t st,
+                      size_t sz)
+      : AllocInfo(at, dev, sp, st),
+        size(sz) {
+  }
+  /// Returns a string representation of this request
+  std::string toString() const;
+  /// The size in bytes of the allocation
+  size_t size;
+};
+/// A RAII object that manages a temporary memory request
+struct GpuMemoryReservation {
+  GpuMemoryReservation();
+  GpuMemoryReservation(GpuResources* r,
+                       int dev,
+                       cudaStream_t str,
+                       void* p,
+                       size_t sz);
+  GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
+  ~GpuMemoryReservation();
+  GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
+  inline void* get() { return data; }
+  void release();
+  GpuResources* res;
+  int device;
+  cudaStream_t stream;
+  void* data;
+  size_t size;
+};
+/// Base class of GPU-side resource provider; hides provision of
+/// cuBLAS handles, CUDA streams and all device memory allocation performed
+class GpuResources {
+ public:
+  virtual ~GpuResources();
+  /// Call to pre-allocate resources for a particular device. If this is
+  /// not called, then resources will be allocated at the first time
+  /// of demand
+  virtual void initializeForDevice(int device) = 0;
+  /// Returns the cuBLAS handle that we use for the given device
+  virtual cublasHandle_t getBlasHandle(int device) = 0;
+  /// Returns the stream that we order all computation on for the
+  /// given device
+  virtual cudaStream_t getDefaultStream(int device) = 0;
+  /// Returns the set of alternative streams that we use for the given device
+  virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
+  /// Memory management
+  /// Returns an allocation from the given memory space, ordered with respect to
+  /// the given stream (i.e., the first user will be a kernel in this stream).
+  /// All allocations are sized internally to be the next highest multiple of 16
+  /// bytes, and all allocations returned are guaranteed to be 16 byte aligned.
+  virtual void* allocMemory(const AllocRequest& req) = 0;
+  /// Returns a previous allocation
+  virtual void deallocMemory(int device, void* in) = 0;
+  /// For MemorySpace::Temporary, how much space is immediately available
+  /// without cudaMalloc allocation?
+  virtual size_t getTempMemoryAvailable(int device) const = 0;
+  /// Returns the available CPU pinned memory buffer
+  virtual std::pair<void*, size_t> getPinnedMemory() = 0;
+  /// Returns the stream on which we perform async CPU <-> GPU copies
+  virtual cudaStream_t getAsyncCopyStream(int device) = 0;
+  ///
+  /// Functions provided by default
+  ///
+  /// Calls getBlasHandle with the current device
+  cublasHandle_t getBlasHandleCurrentDevice();
+  /// Calls getDefaultStream with the current device
+  cudaStream_t getDefaultStreamCurrentDevice();
+  /// Calls getTempMemoryAvailable with the current device
+  size_t getTempMemoryAvailableCurrentDevice() const;
+  /// Returns a temporary memory allocation via a RAII object
+  GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
+  /// Synchronizes the CPU with respect to the default stream for the
+  /// given device
+  // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
+  void syncDefaultStream(int device);
+  /// Calls syncDefaultStream for the current device
+  void syncDefaultStreamCurrentDevice();
+  /// Calls getAlternateStreams for the current device
+  std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
+  /// Calls getAsyncCopyStream for the current device
+  cudaStream_t getAsyncCopyStreamCurrentDevice();
+};
+/// Interface for a provider of a shared resources object
+class GpuResourcesProvider {
+ public:
+  virtual ~GpuResourcesProvider();
+  /// Returns the shared resources object
+  virtual std::shared_ptr<GpuResources> getResources() = 0;
+};
+} } // namespace