RubyGems - faiss - Versions diffs - 0.1.2 → 0.1.3 - Mend

faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp RENAMED

File without changes

data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp RENAMED

@@ -24,6 +24,8 @@ inline float relativeError(float a, float b) {
 // This seed is also used for the faiss float_rand API; in a test it
 // is all within a single thread, so it is ok
 long s_seed = 1;
+std::mt19937 rng(1);
+std::uniform_int_distribution<> distrib;
 void newTestSeed() {
   struct timespec t;
@@ -35,7 +37,7 @@ void newTestSeed() {
 void setTestSeed(long seed) {
   printf("testing with random seed %ld\n", seed);
-  srand48(seed);
+  rng = std::mt19937(seed);
   s_seed = seed;
 }
@@ -43,7 +45,7 @@ int randVal(int a, int b) {
   EXPECT_GE(a, 0);
   EXPECT_LE(a, b);
-  return a + (lrand48() % (b + 1 - a));
+  return a + (distrib(rng) % (b + 1 - a));
 }
 bool randBool() {

data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h RENAMED

File without changes

data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp RENAMED

@@ -5,12 +5,10 @@
  * LICENSE file in the root directory of this source tree.
  */
-// Copyright 2004-present Facebook. All Rights Reserved
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
+#include <random>
 #include <sys/time.h>
@@ -64,13 +62,16 @@ int main ()
     faiss::gpu::GpuIndexIVFPQ index (
       &resources, d, ncentroids, 4, 8, faiss::METRIC_L2, config);
+    std::mt19937 rng;
     { // training
         printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
                 elapsed() - t0, nt, d);
         std::vector <float> trainvecs (nt * d);
+        std::uniform_real_distribution<> distrib;
         for (size_t i = 0; i < nt * d; i++) {
-            trainvecs[i] = drand48();
+            trainvecs[i] = distrib(rng);
         }
         printf ("[%.3f s] Training the index\n",
@@ -100,8 +101,9 @@ int main ()
                 elapsed() - t0, nb);
         std::vector <float> database (nb * d);
+        std::uniform_real_distribution<> distrib;
         for (size_t i = 0; i < nb * d; i++) {
-            database[i] = drand48();
+            database[i] = distrib(rng);
         }
         printf ("[%.3f s] Adding the vectors to the index\n",

data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h RENAMED

@@ -102,7 +102,7 @@ class CublasHandleScope {
 class CudaEvent {
  public:
   /// Creates an event and records it in this stream
-  explicit CudaEvent(cudaStream_t stream);
+  explicit CudaEvent(cudaStream_t stream, bool timer = false);
   CudaEvent(const CudaEvent& event) = delete;
   CudaEvent(CudaEvent&& event) noexcept;
   ~CudaEvent();

data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp ADDED

@@ -0,0 +1,213 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/gpu/utils/StackDeviceMemory.h>
+#include <faiss/gpu/utils/DeviceUtils.h>
+#include <faiss/gpu/utils/StaticUtils.h>
+#include <faiss/impl/FaissAssert.h>
+#include <sstream>
+namespace faiss { namespace gpu {
+namespace {
+size_t adjustStackSize(size_t sz) {
+  if (sz == 0) {
+    return 0;
+  } else {
+    // ensure that we have at least 16 bytes, as all allocations are bumped up
+    // to 16
+    return utils::roundUp(sz, (size_t) 16);
+  }
+}
+} // namespace
+StackDeviceMemory::Stack::Stack(GpuResources* res, int d, size_t sz)
+    : res_(res),
+      device_(d),
+      alloc_(nullptr),
+      allocSize_(adjustStackSize(sz)),
+      start_(nullptr),
+      end_(nullptr),
+      head_(nullptr),
+      highWaterMemoryUsed_(0) {
+  if (allocSize_ == 0) {
+    return;
+  }
+  DeviceScope s(device_);
+  auto req = AllocRequest(AllocType::TemporaryMemoryBuffer,
+                          device_,
+                          MemorySpace::Device,
+                          res_->getDefaultStream(device_),
+                          allocSize_);
+  alloc_ = (char*) res_->allocMemory(req);
+  FAISS_ASSERT_FMT(
+    alloc_,
+    "could not reserve temporary memory region of size %zu", allocSize_);
+  // In order to disambiguate between our entire region of temporary memory
+  // versus the first allocation in the temporary memory region, ensure that the
+  // first address returned is +16 bytes from the beginning
+  start_ = alloc_ + 16;
+  head_ = start_;
+  end_ = alloc_ + allocSize_;
+}
+StackDeviceMemory::Stack::~Stack() {
+  DeviceScope s(device_);
+  // FIXME: make sure there are no outstanding memory allocations?
+  if (alloc_) {
+    res_->deallocMemory(device_, alloc_);
+  }
+}
+size_t
+StackDeviceMemory::Stack::getSizeAvailable() const {
+  return (end_ - head_);
+}
+char*
+StackDeviceMemory::Stack::getAlloc(size_t size,
+                                   cudaStream_t stream) {
+  // The user must check to see that the allocation fit within us
+  auto sizeRemaining = getSizeAvailable();
+  FAISS_ASSERT(size <= sizeRemaining);
+  // We can make the allocation out of our stack
+  // Find all the ranges that we overlap that may have been
+  // previously allocated; our allocation will be [head, endAlloc)
+  char* startAlloc = head_;
+  char* endAlloc = head_ + size;
+  while (lastUsers_.size() > 0) {
+    auto& prevUser = lastUsers_.back();
+    // Because there is a previous user, we must overlap it
+    FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
+    if (stream != prevUser.stream_) {
+      // Synchronization required
+      streamWait({stream}, {prevUser.stream_});
+    }
+    if (endAlloc < prevUser.end_) {
+      // Update the previous user info
+      prevUser.start_ = endAlloc;
+      break;
+    }
+    // If we're the exact size of the previous request, then we
+    // don't need to continue
+    bool done = (prevUser.end_ == endAlloc);
+    lastUsers_.pop_back();
+    if (done) {
+      break;
+    }
+  }
+  head_ = endAlloc;
+  FAISS_ASSERT(head_ <= end_);
+  highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
+                                  (size_t) (head_ - start_));
+  FAISS_ASSERT(startAlloc);
+  return startAlloc;
+}
+void
+StackDeviceMemory::Stack::returnAlloc(char* p,
+                                      size_t size,
+                                      cudaStream_t stream) {
+  // This allocation should be within ourselves
+  FAISS_ASSERT(p >= start_ && p < end_);
+  // All allocations should have been adjusted to a multiple of 16 bytes
+  FAISS_ASSERT(size % 16 == 0);
+  // This is on our stack
+  // Allocations should be freed in the reverse order they are made
+  if (p + size != head_) {
+    FAISS_ASSERT(p + size == head_);
+  }
+  head_ = p;
+  lastUsers_.push_back(Range(p, p + size, stream));
+}
+std::string
+StackDeviceMemory::Stack::toString() const {
+  std::stringstream s;
+  s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
+    << (void*) start_ << ", " << (void*) end_ << ")\n";
+  s << "     Available memory " << (size_t) (end_ - head_)
+    << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
+  s << "     High water temp alloc " << highWaterMemoryUsed_ << "\n";
+  int i = lastUsers_.size();
+  for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
+    s << i-- << ": size " << (size_t) (it->end_ - it->start_)
+      << " stream " << it->stream_
+      << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
+  }
+  return s.str();
+}
+StackDeviceMemory::StackDeviceMemory(GpuResources* res,
+                                     int device,
+                                     size_t allocPerDevice)
+    : device_(device),
+      stack_(res, device, allocPerDevice) {
+}
+StackDeviceMemory::~StackDeviceMemory() {
+}
+int
+StackDeviceMemory::getDevice() const {
+  return device_;
+}
+size_t
+StackDeviceMemory::getSizeAvailable() const {
+  return stack_.getSizeAvailable();
+}
+std::string
+StackDeviceMemory::toString() const {
+  return stack_.toString();
+}
+void*
+StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
+  // All allocations should have been adjusted to a multiple of 16 bytes
+  FAISS_ASSERT(size % 16 == 0);
+  return stack_.getAlloc(size, stream);
+}
+void
+StackDeviceMemory::deallocMemory(int device,
+                                 cudaStream_t stream,
+                                 size_t size,
+                                 void* p) {
+  FAISS_ASSERT(p);
+  FAISS_ASSERT(device == device_);
+  stack_.returnAlloc((char*) p, size, stream);
+}
+} } // namespace

data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h RENAMED

@@ -8,41 +8,38 @@
 #pragma once
-#include <faiss/gpu/utils/DeviceMemory.h>
+#include <faiss/gpu/GpuResources.h>
+#include <cuda_runtime.h>
 #include <list>
 #include <memory>
 #include <unordered_map>
+#include <tuple>
 namespace faiss { namespace gpu {
 /// Device memory manager that provides temporary memory allocations
-/// out of a region of memory
-class StackDeviceMemory : public DeviceMemory {
+/// out of a region of memory, for a single device
+class StackDeviceMemory {
  public:
   /// Allocate a new region of memory that we manage
-  explicit StackDeviceMemory(int device, size_t allocPerDevice);
+  StackDeviceMemory(GpuResources* res,
+                    int device,
+                    size_t allocPerDevice);
   /// Manage a region of memory for a particular device, with or
   /// without ownership
   StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
-  ~StackDeviceMemory() override;
+  ~StackDeviceMemory();
-  /// Enable or disable the warning about not having enough temporary memory
-  /// when cudaMalloc gets called
-  void setCudaMallocWarning(bool b);
+  int getDevice() const;
-  int getDevice() const override;
+  /// All allocations requested should be a multiple of 16 bytes
+  void* allocMemory(cudaStream_t stream, size_t size);
+  void deallocMemory(int device, cudaStream_t, size_t size, void* p);
-  DeviceMemoryReservation getMemory(cudaStream_t stream,
-                                    size_t size) override;
-  size_t getSizeAvailable() const override;
-  std::string toString() const override;
-  size_t getHighWaterCudaMalloc() const override;
- protected:
-  void returnAllocation(DeviceMemoryReservation& m) override;
+  size_t getSizeAvailable() const;
+  std::string toString() const;
  protected:
   /// Previous allocation ranges and the streams for which
@@ -60,10 +57,8 @@ class StackDeviceMemory : public DeviceMemory {
   struct Stack {
     /// Constructor that allocates memory via cudaMalloc
-    Stack(int device, size_t size);
+    Stack(GpuResources* res, int device, size_t size);
-    /// Constructor that references a pre-allocated region of memory
-    Stack(int device, void* p, size_t size, bool isOwner);
     ~Stack();
     /// Returns how much size is available for an allocation without
@@ -80,23 +75,23 @@ class StackDeviceMemory : public DeviceMemory {
     /// Returns the stack state
     std::string toString() const;
-    /// Returns the high-water mark of cudaMalloc activity
-    size_t getHighWaterCudaMalloc() const;
+    /// Our GpuResources object
+    GpuResources* res_;
     /// Device this allocation is on
     int device_;
-    /// Do we own our region of memory?
-    bool isOwner_;
+    /// Where our temporary memory buffer is allocated; we allocate starting 16
+    /// bytes into this
+    char* alloc_;
+    /// Total size of our allocation
+    size_t allocSize_;
-    /// Where our allocation begins and ends
-    /// [start_, end_) is valid
+    /// Our temporary memory region; [start_, end_) is valid
     char* start_;
     char* end_;
-    /// Total size end_ - start_
-    size_t size_;
     /// Stack head within [start, end)
     char* head_;
@@ -104,19 +99,9 @@ class StackDeviceMemory : public DeviceMemory {
     /// possible synchronization purposes
     std::list<Range> lastUsers_;
-    /// How much cudaMalloc memory is currently outstanding?
-    size_t mallocCurrent_;
     /// What's the high water mark in terms of memory used from the
     /// temporary buffer?
     size_t highWaterMemoryUsed_;
-    /// What's the high water mark in terms of memory allocated via
-    /// cudaMalloc?
-    size_t highWaterMalloc_;
-    /// Whether or not a warning upon cudaMalloc is generated
-    bool cudaMallocWarning_;
   };
   /// Our device

data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h RENAMED

File without changes

data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp RENAMED

File without changes

data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h RENAMED

File without changes

data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp RENAMED

@@ -7,6 +7,7 @@
 // -*- c++ -*-
+#include <algorithm>
 #include <cstring>
 #include <faiss/impl/AuxIndexStructures.h>

data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h RENAMED

@@ -21,6 +21,8 @@
 #include <mutex>
 #include <faiss/Index.h>
+#include <faiss/impl/platform_macros.h>
 namespace faiss {
@@ -218,7 +220,7 @@ struct DistanceComputer {
  * Interrupt callback
  ***********************************************************/
-struct InterruptCallback {
+struct FAISS_API InterruptCallback {
     virtual bool want_interrupt () = 0;
     virtual ~InterruptCallback() {}