RubyGems - faiss - Versions diffs - 0.1.0 → 0.1.1 - Mend

faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (226) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +103 -3
data/ext/faiss/ext.cpp +99 -32
data/ext/faiss/extconf.rb +12 -2
data/lib/faiss/ext.bundle +0 -0
data/lib/faiss/index.rb +3 -3
data/lib/faiss/index_binary.rb +3 -3
data/lib/faiss/kmeans.rb +1 -1
data/lib/faiss/pca_matrix.rb +2 -2
data/lib/faiss/product_quantizer.rb +3 -3
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/AutoTune.cpp +719 -0
data/vendor/faiss/AutoTune.h +212 -0
data/vendor/faiss/Clustering.cpp +261 -0
data/vendor/faiss/Clustering.h +101 -0
data/vendor/faiss/IVFlib.cpp +339 -0
data/vendor/faiss/IVFlib.h +132 -0
data/vendor/faiss/Index.cpp +171 -0
data/vendor/faiss/Index.h +261 -0
data/vendor/faiss/Index2Layer.cpp +437 -0
data/vendor/faiss/Index2Layer.h +85 -0
data/vendor/faiss/IndexBinary.cpp +77 -0
data/vendor/faiss/IndexBinary.h +163 -0
data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
data/vendor/faiss/IndexBinaryFlat.h +54 -0
data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
data/vendor/faiss/IndexBinaryHNSW.h +56 -0
data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
data/vendor/faiss/IndexBinaryIVF.h +211 -0
data/vendor/faiss/IndexFlat.cpp +508 -0
data/vendor/faiss/IndexFlat.h +175 -0
data/vendor/faiss/IndexHNSW.cpp +1090 -0
data/vendor/faiss/IndexHNSW.h +170 -0
data/vendor/faiss/IndexIVF.cpp +909 -0
data/vendor/faiss/IndexIVF.h +353 -0
data/vendor/faiss/IndexIVFFlat.cpp +502 -0
data/vendor/faiss/IndexIVFFlat.h +118 -0
data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
data/vendor/faiss/IndexIVFPQ.h +161 -0
data/vendor/faiss/IndexIVFPQR.cpp +219 -0
data/vendor/faiss/IndexIVFPQR.h +65 -0
data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
data/vendor/faiss/IndexLSH.cpp +225 -0
data/vendor/faiss/IndexLSH.h +87 -0
data/vendor/faiss/IndexLattice.cpp +143 -0
data/vendor/faiss/IndexLattice.h +68 -0
data/vendor/faiss/IndexPQ.cpp +1188 -0
data/vendor/faiss/IndexPQ.h +199 -0
data/vendor/faiss/IndexPreTransform.cpp +288 -0
data/vendor/faiss/IndexPreTransform.h +91 -0
data/vendor/faiss/IndexReplicas.cpp +123 -0
data/vendor/faiss/IndexReplicas.h +76 -0
data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
data/vendor/faiss/IndexScalarQuantizer.h +127 -0
data/vendor/faiss/IndexShards.cpp +317 -0
data/vendor/faiss/IndexShards.h +100 -0
data/vendor/faiss/InvertedLists.cpp +623 -0
data/vendor/faiss/InvertedLists.h +334 -0
data/vendor/faiss/LICENSE +21 -0
data/vendor/faiss/MatrixStats.cpp +252 -0
data/vendor/faiss/MatrixStats.h +62 -0
data/vendor/faiss/MetaIndexes.cpp +351 -0
data/vendor/faiss/MetaIndexes.h +126 -0
data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
data/vendor/faiss/OnDiskInvertedLists.h +127 -0
data/vendor/faiss/VectorTransform.cpp +1157 -0
data/vendor/faiss/VectorTransform.h +322 -0
data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
data/vendor/faiss/c_api/AutoTune_c.h +64 -0
data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
data/vendor/faiss/c_api/Clustering_c.h +117 -0
data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
data/vendor/faiss/c_api/IndexShards_c.h +42 -0
data/vendor/faiss/c_api/Index_c.cpp +105 -0
data/vendor/faiss/c_api/Index_c.h +183 -0
data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
data/vendor/faiss/c_api/clone_index_c.h +32 -0
data/vendor/faiss/c_api/error_c.h +42 -0
data/vendor/faiss/c_api/error_impl.cpp +27 -0
data/vendor/faiss/c_api/error_impl.h +16 -0
data/vendor/faiss/c_api/faiss_c.h +58 -0
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
data/vendor/faiss/c_api/index_factory_c.h +30 -0
data/vendor/faiss/c_api/index_io_c.cpp +42 -0
data/vendor/faiss/c_api/index_io_c.h +50 -0
data/vendor/faiss/c_api/macros_impl.h +110 -0
data/vendor/faiss/clone_index.cpp +147 -0
data/vendor/faiss/clone_index.h +38 -0
data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
data/vendor/faiss/gpu/GpuCloner.h +82 -0
data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
data/vendor/faiss/gpu/GpuDistance.h +52 -0
data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
data/vendor/faiss/gpu/GpuIndex.h +148 -0
data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
data/vendor/faiss/gpu/GpuResources.cpp +52 -0
data/vendor/faiss/gpu/GpuResources.h +73 -0
data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
data/vendor/faiss/gpu/test/TestUtils.h +93 -0
data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
data/vendor/faiss/gpu/utils/Timer.h +52 -0
data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
data/vendor/faiss/impl/FaissAssert.h +95 -0
data/vendor/faiss/impl/FaissException.cpp +66 -0
data/vendor/faiss/impl/FaissException.h +71 -0
data/vendor/faiss/impl/HNSW.cpp +818 -0
data/vendor/faiss/impl/HNSW.h +275 -0
data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
data/vendor/faiss/impl/PolysemousTraining.h +158 -0
data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
data/vendor/faiss/impl/ProductQuantizer.h +242 -0
data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
data/vendor/faiss/impl/ThreadedIndex.h +80 -0
data/vendor/faiss/impl/index_read.cpp +793 -0
data/vendor/faiss/impl/index_write.cpp +558 -0
data/vendor/faiss/impl/io.cpp +142 -0
data/vendor/faiss/impl/io.h +98 -0
data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
data/vendor/faiss/impl/lattice_Zn.h +199 -0
data/vendor/faiss/index_factory.cpp +392 -0
data/vendor/faiss/index_factory.h +25 -0
data/vendor/faiss/index_io.h +75 -0
data/vendor/faiss/misc/test_blas.cpp +84 -0
data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
data/vendor/faiss/tests/test_merge.cpp +258 -0
data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
data/vendor/faiss/tests/test_params_override.cpp +231 -0
data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
data/vendor/faiss/utils/Heap.cpp +122 -0
data/vendor/faiss/utils/Heap.h +495 -0
data/vendor/faiss/utils/WorkerThread.cpp +126 -0
data/vendor/faiss/utils/WorkerThread.h +61 -0
data/vendor/faiss/utils/distances.cpp +765 -0
data/vendor/faiss/utils/distances.h +243 -0
data/vendor/faiss/utils/distances_simd.cpp +809 -0
data/vendor/faiss/utils/extra_distances.cpp +336 -0
data/vendor/faiss/utils/extra_distances.h +54 -0
data/vendor/faiss/utils/hamming-inl.h +472 -0
data/vendor/faiss/utils/hamming.cpp +792 -0
data/vendor/faiss/utils/hamming.h +220 -0
data/vendor/faiss/utils/random.cpp +192 -0
data/vendor/faiss/utils/random.h +60 -0
data/vendor/faiss/utils/utils.cpp +783 -0
data/vendor/faiss/utils/utils.h +181 -0
metadata +216 -2

data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h ADDED Viewed

@@ -0,0 +1,100 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <faiss/gpu/GpuIndexIVF.h>
+#include <faiss/IndexScalarQuantizer.h>
+namespace faiss { namespace gpu {
+class IVFFlat;
+class GpuIndexFlat;
+struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
+};
+/// Wrapper around the GPU implementation that looks like
+/// faiss::IndexIVFScalarQuantizer
+class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
+ public:
+  /// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance,
+  /// copying data over to the given GPU, if the input index is trained.
+  GpuIndexIVFScalarQuantizer(
+    GpuResources* resources,
+    const faiss::IndexIVFScalarQuantizer* index,
+    GpuIndexIVFScalarQuantizerConfig config =
+    GpuIndexIVFScalarQuantizerConfig());
+  /// Constructs a new instance with an empty flat quantizer; the user
+  /// provides the number of lists desired.
+  GpuIndexIVFScalarQuantizer(
+    GpuResources* resources,
+    int dims,
+    int nlist,
+    faiss::ScalarQuantizer::QuantizerType qtype,
+    faiss::MetricType metric = MetricType::METRIC_L2,
+    bool encodeResidual = true,
+    GpuIndexIVFScalarQuantizerConfig config =
+    GpuIndexIVFScalarQuantizerConfig());
+  ~GpuIndexIVFScalarQuantizer() override;
+  /// Reserve GPU memory in our inverted lists for this number of vectors
+  void reserveMemory(size_t numVecs);
+  /// Initialize ourselves from the given CPU index; will overwrite
+  /// all data in ourselves
+  void copyFrom(const faiss::IndexIVFScalarQuantizer* index);
+  /// Copy ourselves to the given CPU index; will overwrite all data
+  /// in the index instance
+  void copyTo(faiss::IndexIVFScalarQuantizer* index) const;
+  /// After adding vectors, one can call this to reclaim device memory
+  /// to exactly the amount needed. Returns space reclaimed in bytes
+  size_t reclaimMemory();
+  void reset() override;
+  void train(Index::idx_t n, const float* x) override;
+ protected:
+  /// Called from GpuIndex for add/add_with_ids
+  void addImpl_(int n,
+                const float* x,
+                const Index::idx_t* ids) override;
+  /// Called from GpuIndex for search
+  void searchImpl_(int n,
+                   const float* x,
+                   int k,
+                   float* distances,
+                   Index::idx_t* labels) const override;
+  /// Called from train to handle SQ residual training
+  void trainResiduals_(Index::idx_t n, const float* x);
+ public:
+  /// Exposed like the CPU version
+  faiss::ScalarQuantizer sq;
+  /// Exposed like the CPU version
+  bool by_residual;
+ private:
+  GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
+  /// Desired inverted list memory reservation
+  size_t reserveMemoryVecs_;
+  /// Instance that we own; contains the inverted list
+  IVFFlat* index_;
+};
+} } // namespace

data/vendor/faiss/gpu/GpuIndicesOptions.h ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+namespace faiss { namespace gpu {
+/// How user vector index data is stored on the GPU
+enum IndicesOptions {
+  /// The user indices are only stored on the CPU; the GPU returns
+  /// (inverted list, offset) to the CPU which is then translated to
+  /// the real user index.
+  INDICES_CPU = 0,
+  /// The indices are not stored at all, on either the CPU or
+  /// GPU. Only (inverted list, offset) is returned to the user as the
+  /// index.
+  INDICES_IVF = 1,
+  /// Indices are stored as 32 bit integers on the GPU, but returned
+  /// as 64 bit integers
+  INDICES_32_BIT = 2,
+  /// Indices are stored as 64 bit integers on the GPU
+  INDICES_64_BIT = 3,
+};
+} } // namespace

data/vendor/faiss/gpu/GpuResources.cpp ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/gpu/GpuResources.h>
+#include <faiss/gpu/utils/DeviceUtils.h>
+namespace faiss { namespace gpu {
+GpuResources::~GpuResources() {
+}
+cublasHandle_t
+GpuResources::getBlasHandleCurrentDevice() {
+  return getBlasHandle(getCurrentDevice());
+}
+cudaStream_t
+GpuResources::getDefaultStreamCurrentDevice() {
+  return getDefaultStream(getCurrentDevice());
+}
+std::vector<cudaStream_t>
+GpuResources::getAlternateStreamsCurrentDevice() {
+  return getAlternateStreams(getCurrentDevice());
+}
+DeviceMemory&
+GpuResources::getMemoryManagerCurrentDevice() {
+  return getMemoryManager(getCurrentDevice());
+}
+cudaStream_t
+GpuResources::getAsyncCopyStreamCurrentDevice() {
+  return getAsyncCopyStream(getCurrentDevice());
+}
+void
+GpuResources::syncDefaultStream(int device) {
+  CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
+}
+void
+GpuResources::syncDefaultStreamCurrentDevice() {
+  syncDefaultStream(getCurrentDevice());
+}
+} } // namespace

data/vendor/faiss/gpu/GpuResources.h ADDED Viewed

@@ -0,0 +1,73 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <faiss/gpu/utils/DeviceMemory.h>
+#include <cuda_runtime.h>
+#include <cublas_v2.h>
+#include <utility>
+#include <vector>
+namespace faiss { namespace gpu {
+/// Base class of GPU-side resource provider; hides provision of
+/// cuBLAS handles, CUDA streams and a temporary memory manager
+class GpuResources {
+ public:
+  virtual ~GpuResources();
+  /// Call to pre-allocate resources for a particular device. If this is
+  /// not called, then resources will be allocated at the first time
+  /// of demand
+  virtual void initializeForDevice(int device) = 0;
+  /// Returns the cuBLAS handle that we use for the given device
+  virtual cublasHandle_t getBlasHandle(int device) = 0;
+  /// Returns the stream that we order all computation on for the
+  /// given device
+  virtual cudaStream_t getDefaultStream(int device) = 0;
+  /// Returns the set of alternative streams that we use for the given device
+  virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
+  /// Returns the temporary memory manager for the given device
+  virtual DeviceMemory& getMemoryManager(int device) = 0;
+  /// Returns the available CPU pinned memory buffer
+  virtual std::pair<void*, size_t> getPinnedMemory() = 0;
+  /// Returns the stream on which we perform async CPU <-> GPU copies
+  virtual cudaStream_t getAsyncCopyStream(int device) = 0;
+  /// Calls getBlasHandle with the current device
+  cublasHandle_t getBlasHandleCurrentDevice();
+  /// Calls getDefaultStream with the current device
+  cudaStream_t getDefaultStreamCurrentDevice();
+  /// Synchronizes the CPU with respect to the default stream for the
+  /// given device
+  // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
+  void syncDefaultStream(int device);
+  /// Calls syncDefaultStream for the current device
+  void syncDefaultStreamCurrentDevice();
+  /// Calls getAlternateStreams for the current device
+  std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
+  /// Calls getMemoryManager for the current device
+  DeviceMemory& getMemoryManagerCurrentDevice();
+  /// Calls getAsyncCopyStream for the current device
+  cudaStream_t getAsyncCopyStreamCurrentDevice();
+};
+} } // namespace

data/vendor/faiss/gpu/StandardGpuResources.cpp ADDED Viewed

@@ -0,0 +1,295 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/utils/MemorySpace.h>
+#include <faiss/impl/FaissAssert.h>
+#include <limits>
+namespace faiss { namespace gpu {
+namespace {
+// How many streams per device we allocate by default (for multi-streaming)
+constexpr int kNumStreams = 2;
+// Use 256 MiB of pinned memory for async CPU <-> GPU copies by default
+constexpr size_t kDefaultPinnedMemoryAllocation = (size_t) 256 * 1024 * 1024;
+// Default temporary memory allocation for <= 4 GiB memory GPUs
+constexpr size_t k4GiBTempMem = (size_t) 512 * 1024 * 1024;
+// Default temporary memory allocation for <= 8 GiB memory GPUs
+constexpr size_t k8GiBTempMem = (size_t) 1024 * 1024 * 1024;
+// Maximum temporary memory allocation for all GPUs
+constexpr size_t kMaxTempMem = (size_t) 1536 * 1024 * 1024;
+}
+StandardGpuResources::StandardGpuResources() :
+    pinnedMemAlloc_(nullptr),
+    pinnedMemAllocSize_(0),
+    // let the adjustment function determine the memory size for us by passing
+    // in a huge value that will then be adjusted
+    tempMemSize_(getDefaultTempMemForGPU(-1,
+                                         std::numeric_limits<size_t>::max())),
+    pinnedMemSize_(kDefaultPinnedMemoryAllocation),
+    cudaMallocWarning_(true) {
+}
+StandardGpuResources::~StandardGpuResources() {
+  for (auto& entry : defaultStreams_) {
+    DeviceScope scope(entry.first);
+    auto it = userDefaultStreams_.find(entry.first);
+    if (it == userDefaultStreams_.end()) {
+      // The user did not specify this stream, thus we are the ones
+      // who have created it
+      CUDA_VERIFY(cudaStreamDestroy(entry.second));
+    }
+  }
+  for (auto& entry : alternateStreams_) {
+    DeviceScope scope(entry.first);
+    for (auto stream : entry.second) {
+      CUDA_VERIFY(cudaStreamDestroy(stream));
+    }
+  }
+  for (auto& entry : asyncCopyStreams_) {
+    DeviceScope scope(entry.first);
+    CUDA_VERIFY(cudaStreamDestroy(entry.second));
+  }
+  for (auto& entry : blasHandles_) {
+    DeviceScope scope(entry.first);
+    auto blasStatus = cublasDestroy(entry.second);
+    FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
+  }
+  if (pinnedMemAlloc_) {
+    freeMemorySpace(MemorySpace::HostPinned, pinnedMemAlloc_);
+  }
+}
+size_t
+StandardGpuResources::getDefaultTempMemForGPU(int device,
+                                              size_t requested) {
+  auto totalMem = device != -1 ?
+    getDeviceProperties(device).totalGlobalMem :
+    std::numeric_limits<size_t>::max();
+  if (totalMem <= (size_t) 4 * 1024 * 1024 * 1024) {
+    // If the GPU has <= 4 GiB of memory, reserve 512 MiB
+    if (requested > k4GiBTempMem) {
+      return k4GiBTempMem;
+    }
+  } else if (totalMem <= (size_t) 8 * 1024 * 1024 * 1024) {
+    // If the GPU has <= 8 GiB of memory, reserve 1 GiB
+    if (requested > k8GiBTempMem) {
+      return k8GiBTempMem;
+    }
+  } else {
+    // Never use more than 1.5 GiB
+    if (requested > kMaxTempMem) {
+      return kMaxTempMem;
+    }
+  }
+  // use whatever lower limit the user requested
+  return requested;
+}
+void
+StandardGpuResources::noTempMemory() {
+  setTempMemory(0);
+  setCudaMallocWarning(false);
+}
+void
+StandardGpuResources::setTempMemory(size_t size) {
+  if (tempMemSize_ != size) {
+    // adjust based on general limits
+    tempMemSize_ = getDefaultTempMemForGPU(-1, size);
+    // We need to re-initialize memory resources for all current devices that
+    // have been initialized.
+    // This should be safe to do, even if we are currently running work, because
+    // the cudaFree call that this implies will force-synchronize all GPUs with
+    // the CPU
+    for (auto& p : memory_) {
+      int device = p.first;
+      // Free the existing memory first
+      p.second.reset();
+      // Allocate new
+      p.second = std::unique_ptr<StackDeviceMemory>(
+        new StackDeviceMemory(p.first,
+                              // adjust for this specific device
+                              getDefaultTempMemForGPU(device, tempMemSize_)));
+    }
+  }
+}
+void
+StandardGpuResources::setPinnedMemory(size_t size) {
+  // Should not call this after devices have been initialized
+  FAISS_ASSERT(defaultStreams_.size() == 0);
+  FAISS_ASSERT(!pinnedMemAlloc_);
+  pinnedMemSize_ = size;
+}
+void
+StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
+  auto it = defaultStreams_.find(device);
+  if (it != defaultStreams_.end()) {
+    // Replace this stream with the user stream
+    CUDA_VERIFY(cudaStreamDestroy(it->second));
+    it->second = stream;
+  }
+  userDefaultStreams_[device] = stream;
+}
+void
+StandardGpuResources::setDefaultNullStreamAllDevices() {
+  for (int dev = 0; dev < getNumDevices(); ++dev) {
+    setDefaultStream(dev, nullptr);
+  }
+}
+void
+StandardGpuResources::setCudaMallocWarning(bool b) {
+  cudaMallocWarning_ = b;
+  for (auto& v : memory_) {
+    v.second->setCudaMallocWarning(b);
+  }
+}
+bool
+StandardGpuResources::isInitialized(int device) const {
+  // Use default streams as a marker for whether or not a certain
+  // device has been initialized
+  return defaultStreams_.count(device) != 0;
+}
+void
+StandardGpuResources::initializeForDevice(int device) {
+  if (isInitialized(device)) {
+    return;
+  }
+  // If this is the first device that we're initializing, create our
+  // pinned memory allocation
+  if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
+    allocMemorySpace(MemorySpace::HostPinned, &pinnedMemAlloc_, pinnedMemSize_);
+    pinnedMemAllocSize_ = pinnedMemSize_;
+  }
+  FAISS_ASSERT(device < getNumDevices());
+  DeviceScope scope(device);
+  // Make sure that device properties for all devices are cached
+  auto& prop = getDeviceProperties(device);
+  // Also check to make sure we meet our minimum compute capability (3.0)
+  FAISS_ASSERT_FMT(prop.major >= 3,
+                   "Device id %d with CC %d.%d not supported, "
+                   "need 3.0+ compute capability",
+                   device, prop.major, prop.minor);
+  // Create streams
+  cudaStream_t defaultStream = 0;
+  auto it = userDefaultStreams_.find(device);
+  if (it != userDefaultStreams_.end()) {
+    // We already have a stream provided by the user
+    defaultStream = it->second;
+  } else {
+    CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
+                                          cudaStreamNonBlocking));
+  }
+  defaultStreams_[device] = defaultStream;
+  cudaStream_t asyncCopyStream = 0;
+  CUDA_VERIFY(cudaStreamCreateWithFlags(&asyncCopyStream,
+                                        cudaStreamNonBlocking));
+  asyncCopyStreams_[device] = asyncCopyStream;
+  std::vector<cudaStream_t> deviceStreams;
+  for (int j = 0; j < kNumStreams; ++j) {
+    cudaStream_t stream = 0;
+    CUDA_VERIFY(cudaStreamCreateWithFlags(&stream,
+                                          cudaStreamNonBlocking));
+    deviceStreams.push_back(stream);
+  }
+  alternateStreams_[device] = std::move(deviceStreams);
+  // Create cuBLAS handle
+  cublasHandle_t blasHandle = 0;
+  auto blasStatus = cublasCreate(&blasHandle);
+  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
+  blasHandles_[device] = blasHandle;
+  FAISS_ASSERT(memory_.count(device) == 0);
+  auto mem = std::unique_ptr<StackDeviceMemory>(
+    new StackDeviceMemory(device,
+                          // adjust for this specific device
+                          getDefaultTempMemForGPU(device, tempMemSize_)));
+  mem->setCudaMallocWarning(cudaMallocWarning_);
+  memory_.emplace(device, std::move(mem));
+}
+cublasHandle_t
+StandardGpuResources::getBlasHandle(int device) {
+  initializeForDevice(device);
+  return blasHandles_[device];
+}
+cudaStream_t
+StandardGpuResources::getDefaultStream(int device) {
+  initializeForDevice(device);
+  return defaultStreams_[device];
+}
+std::vector<cudaStream_t>
+StandardGpuResources::getAlternateStreams(int device) {
+  initializeForDevice(device);
+  return alternateStreams_[device];
+}
+DeviceMemory& StandardGpuResources::getMemoryManager(int device) {
+  initializeForDevice(device);
+  return *memory_[device];
+}
+std::pair<void*, size_t>
+StandardGpuResources::getPinnedMemory() {
+  return std::make_pair(pinnedMemAlloc_, pinnedMemAllocSize_);
+}
+cudaStream_t
+StandardGpuResources::getAsyncCopyStream(int device) {
+  initializeForDevice(device);
+  return asyncCopyStreams_[device];
+}
+} } // namespace