RubyGems - faiss - Versions diffs - 0.1.0 → 0.1.1 - Mend

faiss 0.1.0 → 0.1.1

Files changed (226) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +103 -3
data/ext/faiss/ext.cpp +99 -32
data/ext/faiss/extconf.rb +12 -2
data/lib/faiss/ext.bundle +0 -0
data/lib/faiss/index.rb +3 -3
data/lib/faiss/index_binary.rb +3 -3
data/lib/faiss/kmeans.rb +1 -1
data/lib/faiss/pca_matrix.rb +2 -2
data/lib/faiss/product_quantizer.rb +3 -3
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/AutoTune.cpp +719 -0
data/vendor/faiss/AutoTune.h +212 -0
data/vendor/faiss/Clustering.cpp +261 -0
data/vendor/faiss/Clustering.h +101 -0
data/vendor/faiss/IVFlib.cpp +339 -0
data/vendor/faiss/IVFlib.h +132 -0
data/vendor/faiss/Index.cpp +171 -0
data/vendor/faiss/Index.h +261 -0
data/vendor/faiss/Index2Layer.cpp +437 -0
data/vendor/faiss/Index2Layer.h +85 -0
data/vendor/faiss/IndexBinary.cpp +77 -0
data/vendor/faiss/IndexBinary.h +163 -0
data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
data/vendor/faiss/IndexBinaryFlat.h +54 -0
data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
data/vendor/faiss/IndexBinaryHNSW.h +56 -0
data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
data/vendor/faiss/IndexBinaryIVF.h +211 -0
data/vendor/faiss/IndexFlat.cpp +508 -0
data/vendor/faiss/IndexFlat.h +175 -0
data/vendor/faiss/IndexHNSW.cpp +1090 -0
data/vendor/faiss/IndexHNSW.h +170 -0
data/vendor/faiss/IndexIVF.cpp +909 -0
data/vendor/faiss/IndexIVF.h +353 -0
data/vendor/faiss/IndexIVFFlat.cpp +502 -0
data/vendor/faiss/IndexIVFFlat.h +118 -0
data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
data/vendor/faiss/IndexIVFPQ.h +161 -0
data/vendor/faiss/IndexIVFPQR.cpp +219 -0
data/vendor/faiss/IndexIVFPQR.h +65 -0
data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
data/vendor/faiss/IndexLSH.cpp +225 -0
data/vendor/faiss/IndexLSH.h +87 -0
data/vendor/faiss/IndexLattice.cpp +143 -0
data/vendor/faiss/IndexLattice.h +68 -0
data/vendor/faiss/IndexPQ.cpp +1188 -0
data/vendor/faiss/IndexPQ.h +199 -0
data/vendor/faiss/IndexPreTransform.cpp +288 -0
data/vendor/faiss/IndexPreTransform.h +91 -0
data/vendor/faiss/IndexReplicas.cpp +123 -0
data/vendor/faiss/IndexReplicas.h +76 -0
data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
data/vendor/faiss/IndexScalarQuantizer.h +127 -0
data/vendor/faiss/IndexShards.cpp +317 -0
data/vendor/faiss/IndexShards.h +100 -0
data/vendor/faiss/InvertedLists.cpp +623 -0
data/vendor/faiss/InvertedLists.h +334 -0
data/vendor/faiss/LICENSE +21 -0
data/vendor/faiss/MatrixStats.cpp +252 -0
data/vendor/faiss/MatrixStats.h +62 -0
data/vendor/faiss/MetaIndexes.cpp +351 -0
data/vendor/faiss/MetaIndexes.h +126 -0
data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
data/vendor/faiss/OnDiskInvertedLists.h +127 -0
data/vendor/faiss/VectorTransform.cpp +1157 -0
data/vendor/faiss/VectorTransform.h +322 -0
data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
data/vendor/faiss/c_api/AutoTune_c.h +64 -0
data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
data/vendor/faiss/c_api/Clustering_c.h +117 -0
data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
data/vendor/faiss/c_api/IndexShards_c.h +42 -0
data/vendor/faiss/c_api/Index_c.cpp +105 -0
data/vendor/faiss/c_api/Index_c.h +183 -0
data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
data/vendor/faiss/c_api/clone_index_c.h +32 -0
data/vendor/faiss/c_api/error_c.h +42 -0
data/vendor/faiss/c_api/error_impl.cpp +27 -0
data/vendor/faiss/c_api/error_impl.h +16 -0
data/vendor/faiss/c_api/faiss_c.h +58 -0
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
data/vendor/faiss/c_api/index_factory_c.h +30 -0
data/vendor/faiss/c_api/index_io_c.cpp +42 -0
data/vendor/faiss/c_api/index_io_c.h +50 -0
data/vendor/faiss/c_api/macros_impl.h +110 -0
data/vendor/faiss/clone_index.cpp +147 -0
data/vendor/faiss/clone_index.h +38 -0
data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
data/vendor/faiss/gpu/GpuCloner.h +82 -0
data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
data/vendor/faiss/gpu/GpuDistance.h +52 -0
data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
data/vendor/faiss/gpu/GpuIndex.h +148 -0
data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
data/vendor/faiss/gpu/GpuResources.cpp +52 -0
data/vendor/faiss/gpu/GpuResources.h +73 -0
data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
data/vendor/faiss/gpu/test/TestUtils.h +93 -0
data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
data/vendor/faiss/gpu/utils/Timer.h +52 -0
data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
data/vendor/faiss/impl/FaissAssert.h +95 -0
data/vendor/faiss/impl/FaissException.cpp +66 -0
data/vendor/faiss/impl/FaissException.h +71 -0
data/vendor/faiss/impl/HNSW.cpp +818 -0
data/vendor/faiss/impl/HNSW.h +275 -0
data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
data/vendor/faiss/impl/PolysemousTraining.h +158 -0
data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
data/vendor/faiss/impl/ProductQuantizer.h +242 -0
data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
data/vendor/faiss/impl/ThreadedIndex.h +80 -0
data/vendor/faiss/impl/index_read.cpp +793 -0
data/vendor/faiss/impl/index_write.cpp +558 -0
data/vendor/faiss/impl/io.cpp +142 -0
data/vendor/faiss/impl/io.h +98 -0
data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
data/vendor/faiss/impl/lattice_Zn.h +199 -0
data/vendor/faiss/index_factory.cpp +392 -0
data/vendor/faiss/index_factory.h +25 -0
data/vendor/faiss/index_io.h +75 -0
data/vendor/faiss/misc/test_blas.cpp +84 -0
data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
data/vendor/faiss/tests/test_merge.cpp +258 -0
data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
data/vendor/faiss/tests/test_params_override.cpp +231 -0
data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
data/vendor/faiss/utils/Heap.cpp +122 -0
data/vendor/faiss/utils/Heap.h +495 -0
data/vendor/faiss/utils/WorkerThread.cpp +126 -0
data/vendor/faiss/utils/WorkerThread.h +61 -0
data/vendor/faiss/utils/distances.cpp +765 -0
data/vendor/faiss/utils/distances.h +243 -0
data/vendor/faiss/utils/distances_simd.cpp +809 -0
data/vendor/faiss/utils/extra_distances.cpp +336 -0
data/vendor/faiss/utils/extra_distances.h +54 -0
data/vendor/faiss/utils/hamming-inl.h +472 -0
data/vendor/faiss/utils/hamming.cpp +792 -0
data/vendor/faiss/utils/hamming.h +220 -0
data/vendor/faiss/utils/random.cpp +192 -0
data/vendor/faiss/utils/random.h +60 -0
data/vendor/faiss/utils/utils.cpp +783 -0
data/vendor/faiss/utils/utils.h +181 -0
metadata +216 -2

data/vendor/faiss/gpu/utils/DeviceMemory.cpp ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/gpu/utils/DeviceMemory.h>
+#include <faiss/gpu/utils/DeviceUtils.h>
+#include <faiss/impl/FaissAssert.h>
+namespace faiss { namespace gpu {
+DeviceMemoryReservation::DeviceMemoryReservation()
+    : state_(NULL),
+      device_(0),
+      data_(NULL),
+      size_(0),
+      stream_(0) {
+}
+DeviceMemoryReservation::DeviceMemoryReservation(DeviceMemory* state,
+                                             int device,
+                                             void* p,
+                                             size_t size,
+                                             cudaStream_t stream)
+    : state_(state),
+      device_(device),
+      data_(p),
+      size_(size),
+      stream_(stream) {
+}
+DeviceMemoryReservation::DeviceMemoryReservation(
+  DeviceMemoryReservation&& m) noexcept {
+  state_ = m.state_;
+  device_ = m.device_;
+  data_ = m.data_;
+  size_ = m.size_;
+  stream_ = m.stream_;
+  m.data_ = NULL;
+}
+DeviceMemoryReservation::~DeviceMemoryReservation() {
+  if (data_) {
+    FAISS_ASSERT(state_);
+    state_->returnAllocation(*this);
+  }
+  data_ = NULL;
+}
+DeviceMemoryReservation&
+DeviceMemoryReservation::operator=(DeviceMemoryReservation&& m) {
+  if (data_) {
+    FAISS_ASSERT(state_);
+    state_->returnAllocation(*this);
+  }
+  state_ = m.state_;
+  device_ = m.device_;
+  data_ = m.data_;
+  size_ = m.size_;
+  stream_ = m.stream_;
+  m.data_ = NULL;
+  return *this;
+}
+DeviceMemory::~DeviceMemory() {
+}
+} } // namespace

data/vendor/faiss/gpu/utils/DeviceMemory.h ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <cuda_runtime.h>
+#include <string>
+namespace faiss { namespace gpu {
+class DeviceMemory;
+class DeviceMemoryReservation {
+ public:
+  DeviceMemoryReservation();
+  DeviceMemoryReservation(DeviceMemory* state,
+                          int device, void* p, size_t size,
+                          cudaStream_t stream);
+  DeviceMemoryReservation(DeviceMemoryReservation&& m) noexcept;
+  ~DeviceMemoryReservation();
+  DeviceMemoryReservation& operator=(DeviceMemoryReservation&& m);
+  int device() { return device_; }
+  void* get() { return data_; }
+  size_t size() { return size_; }
+  cudaStream_t stream() { return stream_; }
+ private:
+  DeviceMemory* state_;
+  int device_;
+  void* data_;
+  size_t size_;
+  cudaStream_t stream_;
+};
+/// Manages temporary memory allocations on a GPU device
+class DeviceMemory {
+ public:
+  virtual ~DeviceMemory();
+  /// Returns the device we are managing memory for
+  virtual int getDevice() const = 0;
+  /// Obtains a temporary memory allocation for our device,
+  /// whose usage is ordered with respect to the given stream.
+  virtual DeviceMemoryReservation getMemory(cudaStream_t stream,
+                                            size_t size) = 0;
+  /// Returns the current size available without calling cudaMalloc
+  virtual size_t getSizeAvailable() const = 0;
+  /// Returns a string containing our current memory manager state
+  virtual std::string toString() const = 0;
+  /// Returns the high-water mark of cudaMalloc allocations for our
+  /// device
+  virtual size_t getHighWaterCudaMalloc() const = 0;
+ protected:
+  friend class DeviceMemoryReservation;
+  virtual void returnAllocation(DeviceMemoryReservation& m) = 0;
+};
+} } // namespace

data/vendor/faiss/gpu/utils/DeviceUtils.h ADDED Viewed

@@ -0,0 +1,185 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <faiss/impl/FaissAssert.h>
+#include <cuda_runtime.h>
+#include <cublas_v2.h>
+#include <vector>
+namespace faiss { namespace gpu {
+/// Returns the current thread-local GPU device
+int getCurrentDevice();
+/// Sets the current thread-local GPU device
+void setCurrentDevice(int device);
+/// Returns the number of available GPU devices
+int getNumDevices();
+/// Starts the CUDA profiler (exposed via SWIG)
+void profilerStart();
+/// Stops the CUDA profiler (exposed via SWIG)
+void profilerStop();
+/// Synchronizes the CPU against all devices (equivalent to
+/// cudaDeviceSynchronize for each device)
+void synchronizeAllDevices();
+/// Returns a cached cudaDeviceProp for the given device
+const cudaDeviceProp& getDeviceProperties(int device);
+/// Returns the cached cudaDeviceProp for the current device
+const cudaDeviceProp& getCurrentDeviceProperties();
+/// Returns the maximum number of threads available for the given GPU
+/// device
+int getMaxThreads(int device);
+/// Equivalent to getMaxThreads(getCurrentDevice())
+int getMaxThreadsCurrentDevice();
+/// Returns the maximum smem available for the given GPU device
+size_t getMaxSharedMemPerBlock(int device);
+/// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
+size_t getMaxSharedMemPerBlockCurrentDevice();
+/// For a given pointer, returns whether or not it is located on
+/// a device (deviceId >= 0) or the host (-1).
+int getDeviceForAddress(const void* p);
+/// Does the given device support full unified memory sharing host
+/// memory?
+bool getFullUnifiedMemSupport(int device);
+/// Equivalent to getFullUnifiedMemSupport(getCurrentDevice())
+bool getFullUnifiedMemSupportCurrentDevice();
+/// Returns the maximum k-selection value supported based on the CUDA SDK that
+/// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
+/// non-CUDA files
+int getMaxKSelection();
+/// RAII object to set the current device, and restore the previous
+/// device upon destruction
+class DeviceScope {
+ public:
+  explicit DeviceScope(int device);
+  ~DeviceScope();
+ private:
+  int prevDevice_;
+};
+/// RAII object to manage a cublasHandle_t
+class CublasHandleScope {
+ public:
+  CublasHandleScope();
+  ~CublasHandleScope();
+  cublasHandle_t get() { return blasHandle_; }
+ private:
+  cublasHandle_t blasHandle_;
+};
+// RAII object to manage a cudaEvent_t
+class CudaEvent {
+ public:
+  /// Creates an event and records it in this stream
+  explicit CudaEvent(cudaStream_t stream);
+  CudaEvent(const CudaEvent& event) = delete;
+  CudaEvent(CudaEvent&& event) noexcept;
+  ~CudaEvent();
+  inline cudaEvent_t get() { return event_; }
+  /// Wait on this event in this stream
+  void streamWaitOnEvent(cudaStream_t stream);
+  /// Have the CPU wait for the completion of this event
+  void cpuWaitOnEvent();
+  CudaEvent& operator=(CudaEvent&& event) noexcept;
+  CudaEvent& operator=(CudaEvent& event) = delete;
+ private:
+  cudaEvent_t event_;
+};
+/// Wrapper to test return status of CUDA functions
+#define CUDA_VERIFY(X)                                                  \
+  do {                                                                  \
+    auto err__ = (X);                                                   \
+    FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d %s",          \
+                     (int) err__, cudaGetErrorString(err__));           \
+  } while (0)
+/// Wrapper to synchronously probe for CUDA errors
+// #define FAISS_GPU_SYNC_ERROR 1
+#ifdef FAISS_GPU_SYNC_ERROR
+#define CUDA_TEST_ERROR()                       \
+  do {                                          \
+    CUDA_VERIFY(cudaDeviceSynchronize());       \
+  } while (0)
+#else
+#define CUDA_TEST_ERROR()                       \
+  do {                                          \
+    CUDA_VERIFY(cudaGetLastError());            \
+  } while (0)
+#endif
+/// Call for a collection of streams to wait on
+template <typename L1, typename L2>
+void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
+  // For all the streams we are waiting on, create an event
+  std::vector<cudaEvent_t> events;
+  for (auto& stream : listWaitOn) {
+    cudaEvent_t event;
+    CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
+    CUDA_VERIFY(cudaEventRecord(event, stream));
+    events.push_back(event);
+  }
+  // For all the streams that are waiting, issue a wait
+  for (auto& stream : listWaiting) {
+    for (auto& event : events) {
+      CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
+    }
+  }
+  for (auto& event : events) {
+    CUDA_VERIFY(cudaEventDestroy(event));
+  }
+}
+/// These versions allow usage of initializer_list as arguments, since
+/// otherwise {...} doesn't have a type
+template <typename L1>
+void streamWait(const L1& a,
+                const std::initializer_list<cudaStream_t>& b) {
+  streamWaitBase(a, b);
+}
+template <typename L2>
+void streamWait(const std::initializer_list<cudaStream_t>& a,
+                const L2& b) {
+  streamWaitBase(a, b);
+}
+inline void streamWait(const std::initializer_list<cudaStream_t>& a,
+                       const std::initializer_list<cudaStream_t>& b) {
+  streamWaitBase(a, b);
+}
+} } // namespace

data/vendor/faiss/gpu/utils/MemorySpace.cpp ADDED Viewed

@@ -0,0 +1,89 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/gpu/utils/MemorySpace.h>
+#include <faiss/impl/FaissAssert.h>
+#include <cuda_runtime.h>
+namespace faiss { namespace gpu {
+/// Allocates CUDA memory for a given memory space
+void allocMemorySpaceV(MemorySpace space, void** p, size_t size) {
+  switch (space) {
+    case MemorySpace::Device:
+    {
+      auto err = cudaMalloc(p, size);
+      // Throw if we fail to allocate
+      FAISS_THROW_IF_NOT_FMT(
+        err == cudaSuccess,
+        "failed to cudaMalloc %zu bytes (error %d %s)",
+        size, (int) err, cudaGetErrorString(err));
+    }
+    break;
+    case MemorySpace::Unified:
+    {
+#ifdef FAISS_UNIFIED_MEM
+      auto err = cudaMallocManaged(p, size);
+      // Throw if we fail to allocate
+      FAISS_THROW_IF_NOT_FMT(
+        err == cudaSuccess,
+        "failed to cudaMallocManaged %zu bytes (error %d %s)",
+        size, (int) err, cudaGetErrorString(err));
+#else
+      FAISS_THROW_MSG("Attempting to allocate via cudaMallocManaged "
+                      "without CUDA 8+ support");
+#endif
+    }
+    break;
+    case MemorySpace::HostPinned:
+    {
+      auto err = cudaHostAlloc(p, size, cudaHostAllocDefault);
+      // Throw if we fail to allocate
+      FAISS_THROW_IF_NOT_FMT(
+        err == cudaSuccess,
+        "failed to cudaHostAlloc %zu bytes (error %d %s)",
+        size, (int) err, cudaGetErrorString(err));
+    }
+    break;
+    default:
+      FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
+      break;
+  }
+}
+// We'll allow allocation to fail, but free should always succeed and be a
+// fatal error if it doesn't free
+void freeMemorySpace(MemorySpace space, void* p) {
+  switch (space) {
+    case MemorySpace::Device:
+    case MemorySpace::Unified:
+    {
+      auto err = cudaFree(p);
+      FAISS_ASSERT_FMT(err == cudaSuccess,
+                       "Failed to cudaFree pointer %p (error %d %s)",
+                       p, (int) err, cudaGetErrorString(err));
+    }
+    break;
+    case MemorySpace::HostPinned:
+    {
+      auto err = cudaFreeHost(p);
+      FAISS_ASSERT_FMT(err == cudaSuccess,
+                       "Failed to cudaFreeHost pointer %p (error %d %s)",
+                       p, (int) err, cudaGetErrorString(err));
+    }
+    break;
+    default:
+      FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
+      break;
+  }
+}
+} }

data/vendor/faiss/gpu/utils/MemorySpace.h ADDED Viewed

@@ -0,0 +1,44 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <cuda.h>
+#if CUDA_VERSION >= 8000
+// Whether or not we enable usage of CUDA Unified Memory
+#define FAISS_UNIFIED_MEM 1
+#endif
+namespace faiss { namespace gpu {
+enum MemorySpace {
+  /// Managed using cudaMalloc/cudaFree
+  Device = 1,
+  /// Managed using cudaMallocManaged/cudaFree
+  Unified = 2,
+  /// Managed using cudaHostAlloc/cudaFreeHost
+  HostPinned = 3,
+};
+/// All memory allocations and de-allocations come through these functions
+/// Allocates CUDA memory for a given memory space (void pointer)
+/// Throws a FaissException if we are unable to allocate the memory
+void allocMemorySpaceV(MemorySpace space, void** p, size_t size);
+template <typename T>
+inline void allocMemorySpace(MemorySpace space, T** p, size_t size) {
+  allocMemorySpaceV(space, (void**)(void*) p, size);
+}
+/// Frees CUDA memory for a given memory space
+/// Asserts if we are unable to free the region
+void freeMemorySpace(MemorySpace space, void* p);
+} }