RubyGems - faiss - Versions diffs - 0.2.7 → 0.3.1 - Mend

faiss 0.2.7 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (172) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +9 -2
data/ext/faiss/index.cpp +1 -1
data/ext/faiss/index_binary.cpp +2 -2
data/ext/faiss/product_quantizer.cpp +1 -1
data/lib/faiss/version.rb +1 -1
data/lib/faiss.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +7 -7
data/vendor/faiss/faiss/AutoTune.h +0 -1
data/vendor/faiss/faiss/Clustering.cpp +4 -18
data/vendor/faiss/faiss/Clustering.h +31 -21
data/vendor/faiss/faiss/IVFlib.cpp +22 -11
data/vendor/faiss/faiss/Index.cpp +1 -1
data/vendor/faiss/faiss/Index.h +20 -5
data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
data/vendor/faiss/faiss/IndexBinary.h +8 -19
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
data/vendor/faiss/faiss/IndexFastScan.h +9 -8
data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
data/vendor/faiss/faiss/IndexFlat.h +20 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
data/vendor/faiss/faiss/IndexHNSW.h +12 -48
data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
data/vendor/faiss/faiss/IndexIDMap.h +24 -2
data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
data/vendor/faiss/faiss/IndexIVF.h +37 -5
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
data/vendor/faiss/faiss/IndexNSG.h +10 -10
data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
data/vendor/faiss/faiss/IndexPQ.h +1 -4
data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
data/vendor/faiss/faiss/IndexRefine.h +7 -0
data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
data/vendor/faiss/faiss/IndexShards.cpp +21 -29
data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
data/vendor/faiss/faiss/MatrixStats.h +21 -9
data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
data/vendor/faiss/faiss/VectorTransform.h +7 -7
data/vendor/faiss/faiss/clone_index.cpp +15 -10
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
data/vendor/faiss/faiss/impl/FaissException.h +13 -34
data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
data/vendor/faiss/faiss/impl/HNSW.h +9 -8
data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
data/vendor/faiss/faiss/impl/io.cpp +10 -10
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
data/vendor/faiss/faiss/index_factory.cpp +10 -7
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
data/vendor/faiss/faiss/utils/distances.cpp +128 -74
data/vendor/faiss/faiss/utils/distances.h +81 -4
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
data/vendor/faiss/faiss/utils/fp16.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
data/vendor/faiss/faiss/utils/hamming.h +58 -0
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
data/vendor/faiss/faiss/utils/prefetch.h +77 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
data/vendor/faiss/faiss/utils/sorting.h +27 -0
data/vendor/faiss/faiss/utils/utils.cpp +112 -6
data/vendor/faiss/faiss/utils/utils.h +57 -20
metadata +11 -4

data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h CHANGED Viewed

@@ -23,24 +23,19 @@ class GpuIndexFlat;
 class IVFPQ;
 struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
-    inline GpuIndexIVFPQConfig()
-            : useFloat16LookupTables(false),
-              usePrecomputedTables(false),
-              interleavedLayout(false),
-              useMMCodeDistance(false) {}
     /// Whether or not float16 residual distance tables are used in the
     /// list scanning kernels. When subQuantizers * 2^bitsPerCode >
     /// 16384, this is required.
-    bool useFloat16LookupTables;
+    bool useFloat16LookupTables = false;
     /// Whether or not we enable the precomputed table option for
     /// search, which can substantially increase the memory requirement.
-    bool usePrecomputedTables;
+    bool usePrecomputedTables = false;
     /// Use the alternative memory layout for the IVF lists
-    /// WARNING: this is a feature under development, do not use!
-    bool interleavedLayout;
+    /// WARNING: this is a feature under development, and is only supported with
+    /// RAFT enabled for the index. Do not use if RAFT is not enabled.
+    bool interleavedLayout = false;
     /// Use GEMM-backed computation of PQ code distances for the no precomputed
     /// table version of IVFPQ.
@@ -50,7 +45,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
     /// Note that MM code distance is enabled automatically if one uses a number
     /// of dimensions per sub-quantizer that is not natively specialized (an odd
     /// number like 7 or so).
-    bool useMMCodeDistance;
+    bool useMMCodeDistance = false;
 };
 /// IVFPQ index for the GPU
@@ -139,6 +134,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
     ProductQuantizer pq;
    protected:
+    /// Initialize appropriate index
+    void setIndex_(
+            GpuResources* resources,
+            int dim,
+            idx_t nlist,
+            faiss::MetricType metric,
+            float metricArg,
+            int numSubQuantizers,
+            int bitsPerSubQuantizer,
+            bool useFloat16LookupTables,
+            bool useMMCodeDistance,
+            bool interleavedLayout,
+            float* pqCentroidData,
+            IndicesOptions indicesOptions,
+            MemorySpace space);
     /// Throws errors if configuration settings are improper
     void verifyPQSettings_() const;

data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h CHANGED Viewed

@@ -18,11 +18,9 @@ class IVFFlat;
 class GpuIndexFlat;
 struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
-    inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
     /// Use the alternative memory layout for the IVF lists
     /// (currently the default)
-    bool interleavedLayout;
+    bool interleavedLayout = true;
 };
 /// Wrapper around the GPU implementation that looks like

data/vendor/faiss/faiss/gpu/GpuResources.cpp CHANGED Viewed

@@ -4,6 +4,21 @@
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #include <faiss/gpu/GpuResources.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
@@ -143,7 +158,7 @@ GpuMemoryReservation::~GpuMemoryReservation() {
 // GpuResources
 //
-GpuResources::~GpuResources() {}
+GpuResources::~GpuResources() = default;
 cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
     return getBlasHandle(getCurrentDevice());
@@ -153,6 +168,12 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
     return getDefaultStream(getCurrentDevice());
 }
+#if defined USE_NVIDIA_RAFT
+raft::device_resources& GpuResources::getRaftHandleCurrentDevice() {
+    return getRaftHandle(getCurrentDevice());
+}
+#endif
 std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
     return getAlternateStreams(getCurrentDevice());
 }
@@ -182,7 +203,7 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
 // GpuResourcesProvider
 //
-GpuResourcesProvider::~GpuResourcesProvider() {}
+GpuResourcesProvider::~GpuResourcesProvider() = default;
 //
 // GpuResourcesProviderFromResourceInstance
@@ -192,7 +213,7 @@ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
         std::shared_ptr<GpuResources> p)
         : res_(p) {}
-GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() {}
+GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() = default;
 std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
     return res_;

data/vendor/faiss/faiss/gpu/GpuResources.h CHANGED Viewed

@@ -4,16 +4,37 @@
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #pragma once
 #include <cublas_v2.h>
 #include <cuda_runtime.h>
 #include <faiss/impl/FaissAssert.h>
 #include <memory>
 #include <utility>
 #include <vector>
+#if defined USE_NVIDIA_RAFT
+#include <raft/core/device_resources.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#endif
 namespace faiss {
 namespace gpu {
@@ -82,11 +103,7 @@ std::string memorySpaceToString(MemorySpace s);
 /// Information on what/where an allocation is
 struct AllocInfo {
-    inline AllocInfo()
-            : type(AllocType::Other),
-              device(0),
-              space(MemorySpace::Device),
-              stream(nullptr) {}
+    inline AllocInfo() {}
     inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
             : type(at), device(dev), space(sp), stream(st) {}
@@ -95,13 +112,13 @@ struct AllocInfo {
     std::string toString() const;
     /// The internal category of the allocation
-    AllocType type;
+    AllocType type = AllocType::Other;
     /// The device on which the allocation is happening
-    int device;
+    int device = 0;
     /// The memory space of the allocation
-    MemorySpace space;
+    MemorySpace space = MemorySpace::Device;
     /// The stream on which new work on the memory will be ordered (e.g., if a
     /// piece of memory cached and to be returned for this call was last used on
@@ -111,7 +128,7 @@ struct AllocInfo {
     ///
     /// The memory manager guarantees that the returned memory is free to use
     /// without data races on this stream specified.
-    cudaStream_t stream;
+    cudaStream_t stream = nullptr;
 };
 /// Create an AllocInfo for the current device with MemorySpace::Device
@@ -125,7 +142,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
 /// Information on what/where an allocation is, along with how big it should be
 struct AllocRequest : public AllocInfo {
-    inline AllocRequest() : AllocInfo(), size(0) {}
+    inline AllocRequest() {}
     inline AllocRequest(const AllocInfo& info, size_t sz)
             : AllocInfo(info), size(sz) {}
@@ -142,7 +159,11 @@ struct AllocRequest : public AllocInfo {
     std::string toString() const;
     /// The size in bytes of the allocation
-    size_t size;
+    size_t size = 0;
+#if defined USE_NVIDIA_RAFT
+    rmm::mr::device_memory_resource* mr = nullptr;
+#endif
 };
 /// A RAII object that manages a temporary memory request
@@ -190,6 +211,13 @@ class GpuResources {
     /// given device
     virtual cudaStream_t getDefaultStream(int device) = 0;
+#if defined USE_NVIDIA_RAFT
+    /// Returns the raft handle for the given device which can be used to
+    /// make calls to other raft primitives.
+    virtual raft::device_resources& getRaftHandle(int device) = 0;
+    raft::device_resources& getRaftHandleCurrentDevice();
+#endif
     /// Overrides the default stream for a device to the user-supplied stream.
     /// The resources object does not own this stream (i.e., it will not destroy
     /// it).

data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp CHANGED Viewed

@@ -4,6 +4,29 @@
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#if defined USE_NVIDIA_RAFT
+#include <raft/core/device_resources.hpp>
+#include <rmm/mr/device/managed_memory_resource.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/mr/host/pinned_memory_resource.hpp>
+#include <memory>
+#endif
 #include <faiss/gpu/StandardGpuResources.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
@@ -66,7 +89,12 @@ std::string allocsToString(const std::unordered_map<void*, AllocRequest>& map) {
 //
 StandardGpuResourcesImpl::StandardGpuResourcesImpl()
-        : pinnedMemAlloc_(nullptr),
+        :
+#if defined USE_NVIDIA_RAFT
+          mmr_(new rmm::mr::managed_memory_resource),
+          pmr_(new rmm::mr::pinned_memory_resource),
+#endif
+          pinnedMemAlloc_(nullptr),
           pinnedMemAllocSize_(0),
           // let the adjustment function determine the memory size for us by
           // passing in a huge value that will then be adjusted
@@ -74,7 +102,8 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl()
                   -1,
                   std::numeric_limits<size_t>::max())),
           pinnedMemSize_(kDefaultPinnedMemoryAllocation),
-          allocLogging_(false) {}
+          allocLogging_(false) {
+}
 StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
     // The temporary memory allocator has allocated memory through us, so clean
@@ -129,6 +158,9 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
     }
     if (pinnedMemAlloc_) {
+#if defined USE_NVIDIA_RAFT
+        pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_);
+#else
         auto err = cudaFreeHost(pinnedMemAlloc_);
         FAISS_ASSERT_FMT(
                 err == cudaSuccess,
@@ -136,6 +168,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
                 pinnedMemAlloc_,
                 (int)err,
                 cudaGetErrorString(err));
+#endif
     }
 }
@@ -187,11 +220,11 @@ void StandardGpuResourcesImpl::setTempMemory(size_t size) {
             p.second.reset();
             // Allocate new
-            p.second = std::unique_ptr<StackDeviceMemory>(new StackDeviceMemory(
+            p.second = std::make_unique<StackDeviceMemory>(
                     this,
                     p.first,
                     // adjust for this specific device
-                    getDefaultTempMemForGPU(device, tempMemSize_)));
+                    getDefaultTempMemForGPU(device, tempMemSize_));
         }
     }
 }
@@ -274,6 +307,19 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
     // If this is the first device that we're initializing, create our
     // pinned memory allocation
     if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
+#if defined USE_NVIDIA_RAFT
+        // If this is the first device that we're initializing, create our
+        // pinned memory allocation
+        if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
+            try {
+                pinnedMemAlloc_ = pmr_->allocate(pinnedMemSize_);
+            } catch (const std::bad_alloc& rmm_ex) {
+                FAISS_THROW_MSG("CUDA memory allocation error");
+            }
+            pinnedMemAllocSize_ = pinnedMemSize_;
+        }
+#else
         auto err = cudaHostAlloc(
                 &pinnedMemAlloc_, pinnedMemSize_, cudaHostAllocDefault);
@@ -286,6 +332,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
                 cudaGetErrorString(err));
         pinnedMemAllocSize_ = pinnedMemSize_;
+#endif
     }
     // Make sure that device properties for all devices are cached
@@ -307,12 +354,16 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
             device);
     // Create streams
-    cudaStream_t defaultStream = 0;
+    cudaStream_t defaultStream = nullptr;
     CUDA_VERIFY(
             cudaStreamCreateWithFlags(&defaultStream, cudaStreamNonBlocking));
     defaultStreams_[device] = defaultStream;
+#if defined USE_NVIDIA_RAFT
+    raftHandles_.emplace(std::make_pair(device, defaultStream));
+#endif
     cudaStream_t asyncCopyStream = 0;
     CUDA_VERIFY(
             cudaStreamCreateWithFlags(&asyncCopyStream, cudaStreamNonBlocking));
@@ -321,7 +372,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
     std::vector<cudaStream_t> deviceStreams;
     for (int j = 0; j < kNumStreams; ++j) {
-        cudaStream_t stream = 0;
+        cudaStream_t stream = nullptr;
         CUDA_VERIFY(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
         deviceStreams.push_back(stream);
@@ -330,7 +381,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
     alternateStreams_[device] = std::move(deviceStreams);
     // Create cuBLAS handle
-    cublasHandle_t blasHandle = 0;
+    cublasHandle_t blasHandle = nullptr;
     auto blasStatus = cublasCreate(&blasHandle);
     FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
     blasHandles_[device] = blasHandle;
@@ -348,11 +399,11 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
     allocs_[device] = std::unordered_map<void*, AllocRequest>();
     FAISS_ASSERT(tempMemory_.count(device) == 0);
-    auto mem = std::unique_ptr<StackDeviceMemory>(new StackDeviceMemory(
+    auto mem = std::make_unique<StackDeviceMemory>(
             this,
             device,
             // adjust for this specific device
-            getDefaultTempMemForGPU(device, tempMemSize_)));
+            getDefaultTempMemForGPU(device, tempMemSize_));
     tempMemory_.emplace(device, std::move(mem));
 }
@@ -375,6 +426,25 @@ cudaStream_t StandardGpuResourcesImpl::getDefaultStream(int device) {
     return defaultStreams_[device];
 }
+#if defined USE_NVIDIA_RAFT
+raft::device_resources& StandardGpuResourcesImpl::getRaftHandle(int device) {
+    initializeForDevice(device);
+    auto it = raftHandles_.find(device);
+    if (it == raftHandles_.end()) {
+        // Make sure we are using the stream the user may have already assigned
+        // to the current GpuResources
+        raftHandles_.emplace(device, getDefaultStream(device));
+        // Initialize cublas handle
+        raftHandles_[device].get_cublas_handle();
+    }
+    // Otherwise, our base default handle
+    return raftHandles_[device];
+}
+#endif
 std::vector<cudaStream_t> StandardGpuResourcesImpl::getAlternateStreams(
         int device) {
     initializeForDevice(device);
@@ -406,8 +476,6 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
     void* p = nullptr;
     if (adjReq.space == MemorySpace::Temporary) {
-        // If we don't have enough space in our temporary memory manager, we
-        // need to allocate this request separately
         auto& tempMem = tempMemory_[adjReq.device];
         if (adjReq.size > tempMem->getSizeAvailable()) {
@@ -428,15 +496,25 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
         // Otherwise, we can handle this locally
         p = tempMemory_[adjReq.device]->allocMemory(adjReq.stream, adjReq.size);
     } else if (adjReq.space == MemorySpace::Device) {
+#if defined USE_NVIDIA_RAFT
+        try {
+            rmm::mr::device_memory_resource* current_mr =
+                    rmm::mr::get_per_device_resource(
+                            rmm::cuda_device_id{adjReq.device});
+            p = current_mr->allocate_async(adjReq.size, adjReq.stream);
+            adjReq.mr = current_mr;
+        } catch (const std::bad_alloc& rmm_ex) {
+            FAISS_THROW_MSG("CUDA memory allocation error");
+        }
+#else
         auto err = cudaMalloc(&p, adjReq.size);
         // Throw if we fail to allocate
         if (err != cudaSuccess) {
             // FIXME: as of CUDA 11, a memory allocation error appears to be
-            // presented via cudaGetLastError as well, and needs to be cleared.
-            // Just call the function to clear it
+            // presented via cudaGetLastError as well, and needs to be
+            // cleared. Just call the function to clear it
             cudaGetLastError();
             std::stringstream ss;
@@ -451,7 +529,20 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
             FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
         }
+#endif
     } else if (adjReq.space == MemorySpace::Unified) {
+#if defined USE_NVIDIA_RAFT
+        try {
+            // for now, use our own managed MR to do Unified Memory allocations.
+            // TODO: change this to use the current device resource once RMM has
+            // a way to retrieve a "guaranteed" managed memory resource for a
+            // device.
+            p = mmr_->allocate_async(adjReq.size, adjReq.stream);
+            adjReq.mr = mmr_.get();
+        } catch (const std::bad_alloc& rmm_ex) {
+            FAISS_THROW_MSG("CUDA memory allocation error");
+        }
+#else
         auto err = cudaMallocManaged(&p, adjReq.size);
         if (err != cudaSuccess) {
@@ -472,6 +563,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
             FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
         }
+#endif
     } else {
         FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)adjReq.space);
     }
@@ -505,10 +597,12 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
     if (req.space == MemorySpace::Temporary) {
         tempMemory_[device]->deallocMemory(device, req.stream, req.size, p);
     } else if (
             req.space == MemorySpace::Device ||
             req.space == MemorySpace::Unified) {
+#if defined USE_NVIDIA_RAFT
+        req.mr->deallocate_async(p, req.size, req.stream);
+#else
         auto err = cudaFree(p);
         FAISS_ASSERT_FMT(
                 err == cudaSuccess,
@@ -516,7 +610,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
                 p,
                 (int)err,
                 cudaGetErrorString(err));
+#endif
     } else {
         FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)req.space);
     }
@@ -561,7 +655,7 @@ StandardGpuResourcesImpl::getMemoryInfo() const {
 StandardGpuResources::StandardGpuResources()
         : res_(new StandardGpuResourcesImpl) {}
-StandardGpuResources::~StandardGpuResources() {}
+StandardGpuResources::~StandardGpuResources() = default;
 std::shared_ptr<GpuResources> StandardGpuResources::getResources() {
     return res_;
@@ -600,6 +694,12 @@ cudaStream_t StandardGpuResources::getDefaultStream(int device) {
     return res_->getDefaultStream(device);
 }
+#if defined USE_NVIDIA_RAFT
+raft::device_resources& StandardGpuResources::getRaftHandle(int device) {
+    return res_->getRaftHandle(device);
+}
+#endif
 size_t StandardGpuResources::getTempMemoryAvailable(int device) const {
     return res_->getTempMemoryAvailable(device);
 }

data/vendor/faiss/faiss/gpu/StandardGpuResources.h CHANGED Viewed

@@ -4,9 +4,29 @@
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #pragma once
+#if defined USE_NVIDIA_RAFT
+#include <raft/core/device_resources.hpp>
+#include <rmm/mr/host/pinned_memory_resource.hpp>
+#endif
 #include <faiss/gpu/GpuResources.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
 #include <faiss/gpu/utils/StackDeviceMemory.h>
@@ -15,6 +35,7 @@
 #include <unordered_map>
 #include <vector>
+#pragma GCC visibility push(default)
 namespace faiss {
 namespace gpu {
@@ -58,6 +79,12 @@ class StandardGpuResourcesImpl : public GpuResources {
     /// this stream upon exit from an index or other Faiss GPU call.
     cudaStream_t getDefaultStream(int device) override;
+#if defined USE_NVIDIA_RAFT
+    /// Returns the raft handle for the given device which can be used to
+    /// make calls to other raft primitives.
+    raft::device_resources& getRaftHandle(int device) override;
+#endif
     /// Called to change the work ordering streams to the null stream
     /// for all devices
     void setDefaultNullStreamAllDevices();
@@ -92,7 +119,7 @@ class StandardGpuResourcesImpl : public GpuResources {
     cudaStream_t getAsyncCopyStream(int device) override;
-   private:
+   protected:
     /// Have GPU resources been initialized for this device yet?
     bool isInitialized(int device) const;
@@ -100,7 +127,7 @@ class StandardGpuResourcesImpl : public GpuResources {
     /// memory size
     static size_t getDefaultTempMemForGPU(int device, size_t requested);
-   private:
+   protected:
     /// Set of currently outstanding memory allocations per device
     /// device -> (alloc request, allocated ptr)
     std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
@@ -124,6 +151,27 @@ class StandardGpuResourcesImpl : public GpuResources {
     /// cuBLAS handle for each device
     std::unordered_map<int, cublasHandle_t> blasHandles_;
+#if defined USE_NVIDIA_RAFT
+    /// raft handle for each device
+    std::unordered_map<int, raft::device_resources> raftHandles_;
+    /**
+     * FIXME: Integrating these in a separate code path for now. Ultimately,
+     * it would be nice if we use a simple memory resource abstraction
+     * in FAISS so we could plug in whether to use RMM's memory resources
+     * or the default.
+     *
+     * There's enough duplicated logic that it doesn't *seem* to make sense
+     * to create a subclass only for the RMM memory resources.
+     */
+    // managed_memory_resource
+    std::unique_ptr<rmm::mr::device_memory_resource> mmr_;
+    // pinned_memory_resource
+    std::unique_ptr<rmm::mr::host_memory_resource> pmr_;
+#endif
     /// Pinned memory allocation for use with this GPU
     void* pinnedMemAlloc_;
     size_t pinnedMemAllocSize_;
@@ -183,10 +231,15 @@ class StandardGpuResources : public GpuResourcesProvider {
     /// Export a description of memory used for Python
     std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
             const;
     /// Returns the current default stream
     cudaStream_t getDefaultStream(int device);
+#if defined USE_NVIDIA_RAFT
+    /// Returns the raft handle for the given device which can be used to
+    /// make calls to other raft primitives.
+    raft::device_resources& getRaftHandle(int device);
+#endif
     /// Returns the current amount of temp memory available
     size_t getTempMemoryAvailable(int device) const;
@@ -203,3 +256,4 @@ class StandardGpuResources : public GpuResourcesProvider {
 } // namespace gpu
 } // namespace faiss
+#pragma GCC visibility pop

data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp CHANGED Viewed

@@ -42,7 +42,7 @@ int main(int argc, char** argv) {
     cudaProfilerStop();
-    auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
+    auto seed = FLAGS_seed != -1 ? FLAGS_seed : time(nullptr);
     printf("using seed %ld\n", seed);
     std::vector<float> vecs((size_t)FLAGS_num * FLAGS_dim);