faiss 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
- data/vendor/faiss/c_api/AutoTune_c.h +2 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
- data/vendor/faiss/c_api/IndexShards_c.h +1 -4
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
- data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
- data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
- data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
- data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
- data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
- data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
- data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
- data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
- data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
- data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
- data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
- data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
- data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
- data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
- data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
- data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
- data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
- data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
- data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
- data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
- data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
- data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
- data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
- data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
- data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
- data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
- data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
- data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
- data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
- data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
- data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
- data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
- data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
- data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
- data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
- data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
- data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
- data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
- data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
- data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
- data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
- data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
- data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
- data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
- data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
- data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
- data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
- data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
- data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
- data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
- data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
- data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
- data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
- data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
- data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
- data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
- data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
- data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
- data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
- data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
- data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
- data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
- data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
- data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
- data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
- data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
- data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
- data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
- data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
- data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
- data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
- data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
- data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
- data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
- data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
- data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
- data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
- data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
- data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
- data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
- data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
- data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
- data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
- data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
- data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
- data/vendor/faiss/faiss/impl/io_macros.h +57 -0
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
- data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
- data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
- data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
- data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
- data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
- data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
- data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
- data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
- data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
- data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
- data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
- data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
- data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
- data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
- data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
- data/vendor/faiss/misc/test_blas.cpp +4 -1
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
- data/vendor/faiss/tests/test_merge.cpp +6 -3
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
- data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
- data/vendor/faiss/tests/test_params_override.cpp +7 -2
- data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
- metadata +154 -153
- data/vendor/faiss/gpu/GpuResources.cpp +0 -52
- data/vendor/faiss/gpu/GpuResources.h +0 -73
- data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
- data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
- data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -1,52 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#include <faiss/gpu/GpuResources.h>
|
10
|
-
#include <faiss/gpu/utils/DeviceUtils.h>
|
11
|
-
|
12
|
-
namespace faiss { namespace gpu {
|
13
|
-
|
14
|
-
GpuResources::~GpuResources() {
|
15
|
-
}
|
16
|
-
|
17
|
-
cublasHandle_t
|
18
|
-
GpuResources::getBlasHandleCurrentDevice() {
|
19
|
-
return getBlasHandle(getCurrentDevice());
|
20
|
-
}
|
21
|
-
|
22
|
-
cudaStream_t
|
23
|
-
GpuResources::getDefaultStreamCurrentDevice() {
|
24
|
-
return getDefaultStream(getCurrentDevice());
|
25
|
-
}
|
26
|
-
|
27
|
-
std::vector<cudaStream_t>
|
28
|
-
GpuResources::getAlternateStreamsCurrentDevice() {
|
29
|
-
return getAlternateStreams(getCurrentDevice());
|
30
|
-
}
|
31
|
-
|
32
|
-
DeviceMemory&
|
33
|
-
GpuResources::getMemoryManagerCurrentDevice() {
|
34
|
-
return getMemoryManager(getCurrentDevice());
|
35
|
-
}
|
36
|
-
|
37
|
-
cudaStream_t
|
38
|
-
GpuResources::getAsyncCopyStreamCurrentDevice() {
|
39
|
-
return getAsyncCopyStream(getCurrentDevice());
|
40
|
-
}
|
41
|
-
|
42
|
-
void
|
43
|
-
GpuResources::syncDefaultStream(int device) {
|
44
|
-
CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
|
45
|
-
}
|
46
|
-
|
47
|
-
void
|
48
|
-
GpuResources::syncDefaultStreamCurrentDevice() {
|
49
|
-
syncDefaultStream(getCurrentDevice());
|
50
|
-
}
|
51
|
-
|
52
|
-
} } // namespace
|
@@ -1,73 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include <faiss/gpu/utils/DeviceMemory.h>
|
12
|
-
#include <cuda_runtime.h>
|
13
|
-
#include <cublas_v2.h>
|
14
|
-
#include <utility>
|
15
|
-
#include <vector>
|
16
|
-
|
17
|
-
namespace faiss { namespace gpu {
|
18
|
-
|
19
|
-
/// Base class of GPU-side resource provider; hides provision of
|
20
|
-
/// cuBLAS handles, CUDA streams and a temporary memory manager
|
21
|
-
class GpuResources {
|
22
|
-
public:
|
23
|
-
virtual ~GpuResources();
|
24
|
-
|
25
|
-
/// Call to pre-allocate resources for a particular device. If this is
|
26
|
-
/// not called, then resources will be allocated at the first time
|
27
|
-
/// of demand
|
28
|
-
virtual void initializeForDevice(int device) = 0;
|
29
|
-
|
30
|
-
/// Returns the cuBLAS handle that we use for the given device
|
31
|
-
virtual cublasHandle_t getBlasHandle(int device) = 0;
|
32
|
-
|
33
|
-
/// Returns the stream that we order all computation on for the
|
34
|
-
/// given device
|
35
|
-
virtual cudaStream_t getDefaultStream(int device) = 0;
|
36
|
-
|
37
|
-
/// Returns the set of alternative streams that we use for the given device
|
38
|
-
virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
|
39
|
-
|
40
|
-
/// Returns the temporary memory manager for the given device
|
41
|
-
virtual DeviceMemory& getMemoryManager(int device) = 0;
|
42
|
-
|
43
|
-
/// Returns the available CPU pinned memory buffer
|
44
|
-
virtual std::pair<void*, size_t> getPinnedMemory() = 0;
|
45
|
-
|
46
|
-
/// Returns the stream on which we perform async CPU <-> GPU copies
|
47
|
-
virtual cudaStream_t getAsyncCopyStream(int device) = 0;
|
48
|
-
|
49
|
-
/// Calls getBlasHandle with the current device
|
50
|
-
cublasHandle_t getBlasHandleCurrentDevice();
|
51
|
-
|
52
|
-
/// Calls getDefaultStream with the current device
|
53
|
-
cudaStream_t getDefaultStreamCurrentDevice();
|
54
|
-
|
55
|
-
/// Synchronizes the CPU with respect to the default stream for the
|
56
|
-
/// given device
|
57
|
-
// equivalent to cudaDeviceSynchronize(getDefaultStream(device))
|
58
|
-
void syncDefaultStream(int device);
|
59
|
-
|
60
|
-
/// Calls syncDefaultStream for the current device
|
61
|
-
void syncDefaultStreamCurrentDevice();
|
62
|
-
|
63
|
-
/// Calls getAlternateStreams for the current device
|
64
|
-
std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
|
65
|
-
|
66
|
-
/// Calls getMemoryManager for the current device
|
67
|
-
DeviceMemory& getMemoryManagerCurrentDevice();
|
68
|
-
|
69
|
-
/// Calls getAsyncCopyStream for the current device
|
70
|
-
cudaStream_t getAsyncCopyStreamCurrentDevice();
|
71
|
-
};
|
72
|
-
|
73
|
-
} } // namespace
|
@@ -1,303 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#include <faiss/gpu/StandardGpuResources.h>
|
10
|
-
#include <faiss/gpu/utils/DeviceUtils.h>
|
11
|
-
#include <faiss/gpu/utils/MemorySpace.h>
|
12
|
-
#include <faiss/impl/FaissAssert.h>
|
13
|
-
#include <limits>
|
14
|
-
|
15
|
-
namespace faiss { namespace gpu {
|
16
|
-
|
17
|
-
namespace {
|
18
|
-
|
19
|
-
// How many streams per device we allocate by default (for multi-streaming)
|
20
|
-
constexpr int kNumStreams = 2;
|
21
|
-
|
22
|
-
// Use 256 MiB of pinned memory for async CPU <-> GPU copies by default
|
23
|
-
constexpr size_t kDefaultPinnedMemoryAllocation = (size_t) 256 * 1024 * 1024;
|
24
|
-
|
25
|
-
// Default temporary memory allocation for <= 4 GiB memory GPUs
|
26
|
-
constexpr size_t k4GiBTempMem = (size_t) 512 * 1024 * 1024;
|
27
|
-
|
28
|
-
// Default temporary memory allocation for <= 8 GiB memory GPUs
|
29
|
-
constexpr size_t k8GiBTempMem = (size_t) 1024 * 1024 * 1024;
|
30
|
-
|
31
|
-
// Maximum temporary memory allocation for all GPUs
|
32
|
-
constexpr size_t kMaxTempMem = (size_t) 1536 * 1024 * 1024;
|
33
|
-
|
34
|
-
}
|
35
|
-
|
36
|
-
StandardGpuResources::StandardGpuResources() :
|
37
|
-
pinnedMemAlloc_(nullptr),
|
38
|
-
pinnedMemAllocSize_(0),
|
39
|
-
// let the adjustment function determine the memory size for us by passing
|
40
|
-
// in a huge value that will then be adjusted
|
41
|
-
tempMemSize_(getDefaultTempMemForGPU(-1,
|
42
|
-
std::numeric_limits<size_t>::max())),
|
43
|
-
pinnedMemSize_(kDefaultPinnedMemoryAllocation),
|
44
|
-
cudaMallocWarning_(true) {
|
45
|
-
}
|
46
|
-
|
47
|
-
StandardGpuResources::~StandardGpuResources() {
|
48
|
-
for (auto& entry : defaultStreams_) {
|
49
|
-
DeviceScope scope(entry.first);
|
50
|
-
|
51
|
-
auto it = userDefaultStreams_.find(entry.first);
|
52
|
-
if (it == userDefaultStreams_.end()) {
|
53
|
-
// The user did not specify this stream, thus we are the ones
|
54
|
-
// who have created it
|
55
|
-
CUDA_VERIFY(cudaStreamDestroy(entry.second));
|
56
|
-
}
|
57
|
-
}
|
58
|
-
|
59
|
-
for (auto& entry : alternateStreams_) {
|
60
|
-
DeviceScope scope(entry.first);
|
61
|
-
|
62
|
-
for (auto stream : entry.second) {
|
63
|
-
CUDA_VERIFY(cudaStreamDestroy(stream));
|
64
|
-
}
|
65
|
-
}
|
66
|
-
|
67
|
-
for (auto& entry : asyncCopyStreams_) {
|
68
|
-
DeviceScope scope(entry.first);
|
69
|
-
|
70
|
-
CUDA_VERIFY(cudaStreamDestroy(entry.second));
|
71
|
-
}
|
72
|
-
|
73
|
-
for (auto& entry : blasHandles_) {
|
74
|
-
DeviceScope scope(entry.first);
|
75
|
-
|
76
|
-
auto blasStatus = cublasDestroy(entry.second);
|
77
|
-
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
|
78
|
-
}
|
79
|
-
|
80
|
-
if (pinnedMemAlloc_) {
|
81
|
-
freeMemorySpace(MemorySpace::HostPinned, pinnedMemAlloc_);
|
82
|
-
}
|
83
|
-
}
|
84
|
-
|
85
|
-
size_t
|
86
|
-
StandardGpuResources::getDefaultTempMemForGPU(int device,
|
87
|
-
size_t requested) {
|
88
|
-
auto totalMem = device != -1 ?
|
89
|
-
getDeviceProperties(device).totalGlobalMem :
|
90
|
-
std::numeric_limits<size_t>::max();
|
91
|
-
|
92
|
-
if (totalMem <= (size_t) 4 * 1024 * 1024 * 1024) {
|
93
|
-
// If the GPU has <= 4 GiB of memory, reserve 512 MiB
|
94
|
-
|
95
|
-
if (requested > k4GiBTempMem) {
|
96
|
-
return k4GiBTempMem;
|
97
|
-
}
|
98
|
-
} else if (totalMem <= (size_t) 8 * 1024 * 1024 * 1024) {
|
99
|
-
// If the GPU has <= 8 GiB of memory, reserve 1 GiB
|
100
|
-
|
101
|
-
if (requested > k8GiBTempMem) {
|
102
|
-
return k8GiBTempMem;
|
103
|
-
}
|
104
|
-
} else {
|
105
|
-
// Never use more than 1.5 GiB
|
106
|
-
if (requested > kMaxTempMem) {
|
107
|
-
return kMaxTempMem;
|
108
|
-
}
|
109
|
-
}
|
110
|
-
|
111
|
-
// use whatever lower limit the user requested
|
112
|
-
return requested;
|
113
|
-
}
|
114
|
-
|
115
|
-
void
|
116
|
-
StandardGpuResources::noTempMemory() {
|
117
|
-
setTempMemory(0);
|
118
|
-
setCudaMallocWarning(false);
|
119
|
-
}
|
120
|
-
|
121
|
-
void
|
122
|
-
StandardGpuResources::setTempMemory(size_t size) {
|
123
|
-
if (tempMemSize_ != size) {
|
124
|
-
// adjust based on general limits
|
125
|
-
tempMemSize_ = getDefaultTempMemForGPU(-1, size);
|
126
|
-
|
127
|
-
// We need to re-initialize memory resources for all current devices that
|
128
|
-
// have been initialized.
|
129
|
-
// This should be safe to do, even if we are currently running work, because
|
130
|
-
// the cudaFree call that this implies will force-synchronize all GPUs with
|
131
|
-
// the CPU
|
132
|
-
for (auto& p : memory_) {
|
133
|
-
int device = p.first;
|
134
|
-
// Free the existing memory first
|
135
|
-
p.second.reset();
|
136
|
-
|
137
|
-
// Allocate new
|
138
|
-
p.second = std::unique_ptr<StackDeviceMemory>(
|
139
|
-
new StackDeviceMemory(p.first,
|
140
|
-
// adjust for this specific device
|
141
|
-
getDefaultTempMemForGPU(device, tempMemSize_)));
|
142
|
-
}
|
143
|
-
}
|
144
|
-
}
|
145
|
-
|
146
|
-
void
|
147
|
-
StandardGpuResources::setPinnedMemory(size_t size) {
|
148
|
-
// Should not call this after devices have been initialized
|
149
|
-
FAISS_ASSERT(defaultStreams_.size() == 0);
|
150
|
-
FAISS_ASSERT(!pinnedMemAlloc_);
|
151
|
-
|
152
|
-
pinnedMemSize_ = size;
|
153
|
-
}
|
154
|
-
|
155
|
-
void
|
156
|
-
StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
|
157
|
-
auto it = defaultStreams_.find(device);
|
158
|
-
if (it != defaultStreams_.end()) {
|
159
|
-
// Replace this stream with the user stream
|
160
|
-
CUDA_VERIFY(cudaStreamDestroy(it->second));
|
161
|
-
it->second = stream;
|
162
|
-
}
|
163
|
-
|
164
|
-
userDefaultStreams_[device] = stream;
|
165
|
-
}
|
166
|
-
|
167
|
-
void
|
168
|
-
StandardGpuResources::setDefaultNullStreamAllDevices() {
|
169
|
-
for (int dev = 0; dev < getNumDevices(); ++dev) {
|
170
|
-
setDefaultStream(dev, nullptr);
|
171
|
-
}
|
172
|
-
}
|
173
|
-
|
174
|
-
void
|
175
|
-
StandardGpuResources::setCudaMallocWarning(bool b) {
|
176
|
-
cudaMallocWarning_ = b;
|
177
|
-
|
178
|
-
for (auto& v : memory_) {
|
179
|
-
v.second->setCudaMallocWarning(b);
|
180
|
-
}
|
181
|
-
}
|
182
|
-
|
183
|
-
bool
|
184
|
-
StandardGpuResources::isInitialized(int device) const {
|
185
|
-
// Use default streams as a marker for whether or not a certain
|
186
|
-
// device has been initialized
|
187
|
-
return defaultStreams_.count(device) != 0;
|
188
|
-
}
|
189
|
-
|
190
|
-
void
|
191
|
-
StandardGpuResources::initializeForDevice(int device) {
|
192
|
-
if (isInitialized(device)) {
|
193
|
-
return;
|
194
|
-
}
|
195
|
-
|
196
|
-
// If this is the first device that we're initializing, create our
|
197
|
-
// pinned memory allocation
|
198
|
-
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
|
199
|
-
allocMemorySpace(MemorySpace::HostPinned, &pinnedMemAlloc_, pinnedMemSize_);
|
200
|
-
pinnedMemAllocSize_ = pinnedMemSize_;
|
201
|
-
}
|
202
|
-
|
203
|
-
FAISS_ASSERT(device < getNumDevices());
|
204
|
-
DeviceScope scope(device);
|
205
|
-
|
206
|
-
// Make sure that device properties for all devices are cached
|
207
|
-
auto& prop = getDeviceProperties(device);
|
208
|
-
|
209
|
-
// Also check to make sure we meet our minimum compute capability (3.0)
|
210
|
-
FAISS_ASSERT_FMT(prop.major >= 3,
|
211
|
-
"Device id %d with CC %d.%d not supported, "
|
212
|
-
"need 3.0+ compute capability",
|
213
|
-
device, prop.major, prop.minor);
|
214
|
-
|
215
|
-
// Create streams
|
216
|
-
cudaStream_t defaultStream = 0;
|
217
|
-
auto it = userDefaultStreams_.find(device);
|
218
|
-
if (it != userDefaultStreams_.end()) {
|
219
|
-
// We already have a stream provided by the user
|
220
|
-
defaultStream = it->second;
|
221
|
-
} else {
|
222
|
-
CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
|
223
|
-
cudaStreamNonBlocking));
|
224
|
-
}
|
225
|
-
|
226
|
-
defaultStreams_[device] = defaultStream;
|
227
|
-
|
228
|
-
cudaStream_t asyncCopyStream = 0;
|
229
|
-
CUDA_VERIFY(cudaStreamCreateWithFlags(&asyncCopyStream,
|
230
|
-
cudaStreamNonBlocking));
|
231
|
-
|
232
|
-
asyncCopyStreams_[device] = asyncCopyStream;
|
233
|
-
|
234
|
-
std::vector<cudaStream_t> deviceStreams;
|
235
|
-
for (int j = 0; j < kNumStreams; ++j) {
|
236
|
-
cudaStream_t stream = 0;
|
237
|
-
CUDA_VERIFY(cudaStreamCreateWithFlags(&stream,
|
238
|
-
cudaStreamNonBlocking));
|
239
|
-
|
240
|
-
deviceStreams.push_back(stream);
|
241
|
-
}
|
242
|
-
|
243
|
-
alternateStreams_[device] = std::move(deviceStreams);
|
244
|
-
|
245
|
-
// Create cuBLAS handle
|
246
|
-
cublasHandle_t blasHandle = 0;
|
247
|
-
auto blasStatus = cublasCreate(&blasHandle);
|
248
|
-
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
|
249
|
-
blasHandles_[device] = blasHandle;
|
250
|
-
|
251
|
-
// Enable tensor core support if available
|
252
|
-
#if CUDA_VERSION >= 9000
|
253
|
-
if (getTensorCoreSupport(device)) {
|
254
|
-
cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
|
255
|
-
}
|
256
|
-
#endif
|
257
|
-
|
258
|
-
FAISS_ASSERT(memory_.count(device) == 0);
|
259
|
-
|
260
|
-
auto mem = std::unique_ptr<StackDeviceMemory>(
|
261
|
-
new StackDeviceMemory(device,
|
262
|
-
// adjust for this specific device
|
263
|
-
getDefaultTempMemForGPU(device, tempMemSize_)));
|
264
|
-
mem->setCudaMallocWarning(cudaMallocWarning_);
|
265
|
-
|
266
|
-
memory_.emplace(device, std::move(mem));
|
267
|
-
}
|
268
|
-
|
269
|
-
cublasHandle_t
|
270
|
-
StandardGpuResources::getBlasHandle(int device) {
|
271
|
-
initializeForDevice(device);
|
272
|
-
return blasHandles_[device];
|
273
|
-
}
|
274
|
-
|
275
|
-
cudaStream_t
|
276
|
-
StandardGpuResources::getDefaultStream(int device) {
|
277
|
-
initializeForDevice(device);
|
278
|
-
return defaultStreams_[device];
|
279
|
-
}
|
280
|
-
|
281
|
-
std::vector<cudaStream_t>
|
282
|
-
StandardGpuResources::getAlternateStreams(int device) {
|
283
|
-
initializeForDevice(device);
|
284
|
-
return alternateStreams_[device];
|
285
|
-
}
|
286
|
-
|
287
|
-
DeviceMemory& StandardGpuResources::getMemoryManager(int device) {
|
288
|
-
initializeForDevice(device);
|
289
|
-
return *memory_[device];
|
290
|
-
}
|
291
|
-
|
292
|
-
std::pair<void*, size_t>
|
293
|
-
StandardGpuResources::getPinnedMemory() {
|
294
|
-
return std::make_pair(pinnedMemAlloc_, pinnedMemAllocSize_);
|
295
|
-
}
|
296
|
-
|
297
|
-
cudaStream_t
|
298
|
-
StandardGpuResources::getAsyncCopyStream(int device) {
|
299
|
-
initializeForDevice(device);
|
300
|
-
return asyncCopyStreams_[device];
|
301
|
-
}
|
302
|
-
|
303
|
-
} } // namespace
|
@@ -1,77 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
-
*
|
4
|
-
* This source code is licensed under the MIT license found in the
|
5
|
-
* LICENSE file in the root directory of this source tree.
|
6
|
-
*/
|
7
|
-
|
8
|
-
|
9
|
-
#include <faiss/gpu/utils/DeviceMemory.h>
|
10
|
-
#include <faiss/gpu/utils/DeviceUtils.h>
|
11
|
-
#include <faiss/impl/FaissAssert.h>
|
12
|
-
|
13
|
-
namespace faiss { namespace gpu {
|
14
|
-
|
15
|
-
DeviceMemoryReservation::DeviceMemoryReservation()
|
16
|
-
: state_(NULL),
|
17
|
-
device_(0),
|
18
|
-
data_(NULL),
|
19
|
-
size_(0),
|
20
|
-
stream_(0) {
|
21
|
-
}
|
22
|
-
|
23
|
-
DeviceMemoryReservation::DeviceMemoryReservation(DeviceMemory* state,
|
24
|
-
int device,
|
25
|
-
void* p,
|
26
|
-
size_t size,
|
27
|
-
cudaStream_t stream)
|
28
|
-
: state_(state),
|
29
|
-
device_(device),
|
30
|
-
data_(p),
|
31
|
-
size_(size),
|
32
|
-
stream_(stream) {
|
33
|
-
}
|
34
|
-
|
35
|
-
DeviceMemoryReservation::DeviceMemoryReservation(
|
36
|
-
DeviceMemoryReservation&& m) noexcept {
|
37
|
-
|
38
|
-
state_ = m.state_;
|
39
|
-
device_ = m.device_;
|
40
|
-
data_ = m.data_;
|
41
|
-
size_ = m.size_;
|
42
|
-
stream_ = m.stream_;
|
43
|
-
|
44
|
-
m.data_ = NULL;
|
45
|
-
}
|
46
|
-
|
47
|
-
DeviceMemoryReservation::~DeviceMemoryReservation() {
|
48
|
-
if (data_) {
|
49
|
-
FAISS_ASSERT(state_);
|
50
|
-
state_->returnAllocation(*this);
|
51
|
-
}
|
52
|
-
|
53
|
-
data_ = NULL;
|
54
|
-
}
|
55
|
-
|
56
|
-
DeviceMemoryReservation&
|
57
|
-
DeviceMemoryReservation::operator=(DeviceMemoryReservation&& m) {
|
58
|
-
if (data_) {
|
59
|
-
FAISS_ASSERT(state_);
|
60
|
-
state_->returnAllocation(*this);
|
61
|
-
}
|
62
|
-
|
63
|
-
state_ = m.state_;
|
64
|
-
device_ = m.device_;
|
65
|
-
data_ = m.data_;
|
66
|
-
size_ = m.size_;
|
67
|
-
stream_ = m.stream_;
|
68
|
-
|
69
|
-
m.data_ = NULL;
|
70
|
-
|
71
|
-
return *this;
|
72
|
-
}
|
73
|
-
|
74
|
-
DeviceMemory::~DeviceMemory() {
|
75
|
-
}
|
76
|
-
|
77
|
-
} } // namespace
|