faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
  6. data/vendor/faiss/c_api/AutoTune_c.h +2 -0
  7. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
  8. data/vendor/faiss/c_api/IndexShards_c.h +1 -4
  9. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
  10. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
  11. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
  12. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
  13. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
  14. data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
  15. data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
  16. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
  17. data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
  18. data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
  19. data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
  20. data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
  21. data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
  22. data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
  23. data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
  24. data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
  25. data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
  26. data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
  27. data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
  28. data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
  29. data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
  30. data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
  31. data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
  32. data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
  33. data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
  34. data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
  35. data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
  36. data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
  37. data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
  38. data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
  39. data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
  40. data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
  41. data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
  42. data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
  43. data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
  44. data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
  45. data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
  46. data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
  47. data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
  48. data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
  49. data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
  50. data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
  51. data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
  52. data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
  53. data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
  54. data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
  55. data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
  56. data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
  57. data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
  58. data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
  59. data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
  60. data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
  61. data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
  62. data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
  63. data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
  64. data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
  65. data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
  66. data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
  67. data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
  68. data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
  69. data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
  70. data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
  71. data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
  72. data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
  73. data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
  74. data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
  75. data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
  76. data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
  77. data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
  78. data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
  79. data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
  80. data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
  81. data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
  82. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
  83. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
  84. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
  85. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
  86. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
  87. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
  88. data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
  89. data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
  90. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
  91. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
  92. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
  93. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
  94. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
  95. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
  96. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
  97. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
  98. data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
  99. data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
  100. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
  101. data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
  102. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
  103. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
  104. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
  105. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
  106. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
  107. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
  108. data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
  109. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
  110. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
  111. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
  112. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
  113. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
  114. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
  115. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
  116. data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
  117. data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
  118. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
  119. data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
  120. data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
  121. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
  122. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
  123. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
  124. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
  125. data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
  126. data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
  127. data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
  128. data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
  129. data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
  130. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
  131. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
  132. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
  133. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
  134. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
  135. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
  136. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
  137. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
  138. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
  139. data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
  140. data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
  141. data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
  142. data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
  143. data/vendor/faiss/faiss/impl/io_macros.h +57 -0
  144. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
  145. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
  146. data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
  147. data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
  148. data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
  149. data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
  150. data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
  151. data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
  152. data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
  153. data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
  154. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
  155. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
  156. data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
  157. data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
  158. data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
  159. data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
  160. data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
  161. data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
  162. data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
  163. data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
  164. data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
  165. data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
  166. data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
  167. data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
  168. data/vendor/faiss/misc/test_blas.cpp +4 -1
  169. data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
  170. data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
  171. data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
  172. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
  173. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
  174. data/vendor/faiss/tests/test_merge.cpp +6 -3
  175. data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
  176. data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
  177. data/vendor/faiss/tests/test_params_override.cpp +7 -2
  178. data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
  179. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
  180. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
  181. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
  182. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
  183. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
  184. metadata +154 -153
  185. data/vendor/faiss/gpu/GpuResources.cpp +0 -52
  186. data/vendor/faiss/gpu/GpuResources.h +0 -73
  187. data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
  188. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
  189. data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
  190. data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
  191. data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
  192. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -1,52 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #include <faiss/gpu/GpuResources.h>
10
- #include <faiss/gpu/utils/DeviceUtils.h>
11
-
12
- namespace faiss { namespace gpu {
13
-
14
- GpuResources::~GpuResources() {
15
- }
16
-
17
- cublasHandle_t
18
- GpuResources::getBlasHandleCurrentDevice() {
19
- return getBlasHandle(getCurrentDevice());
20
- }
21
-
22
- cudaStream_t
23
- GpuResources::getDefaultStreamCurrentDevice() {
24
- return getDefaultStream(getCurrentDevice());
25
- }
26
-
27
- std::vector<cudaStream_t>
28
- GpuResources::getAlternateStreamsCurrentDevice() {
29
- return getAlternateStreams(getCurrentDevice());
30
- }
31
-
32
- DeviceMemory&
33
- GpuResources::getMemoryManagerCurrentDevice() {
34
- return getMemoryManager(getCurrentDevice());
35
- }
36
-
37
- cudaStream_t
38
- GpuResources::getAsyncCopyStreamCurrentDevice() {
39
- return getAsyncCopyStream(getCurrentDevice());
40
- }
41
-
42
- void
43
- GpuResources::syncDefaultStream(int device) {
44
- CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
45
- }
46
-
47
- void
48
- GpuResources::syncDefaultStreamCurrentDevice() {
49
- syncDefaultStream(getCurrentDevice());
50
- }
51
-
52
- } } // namespace
@@ -1,73 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #pragma once
10
-
11
- #include <faiss/gpu/utils/DeviceMemory.h>
12
- #include <cuda_runtime.h>
13
- #include <cublas_v2.h>
14
- #include <utility>
15
- #include <vector>
16
-
17
- namespace faiss { namespace gpu {
18
-
19
- /// Base class of GPU-side resource provider; hides provision of
20
- /// cuBLAS handles, CUDA streams and a temporary memory manager
21
- class GpuResources {
22
- public:
23
- virtual ~GpuResources();
24
-
25
- /// Call to pre-allocate resources for a particular device. If this is
26
- /// not called, then resources will be allocated at the first time
27
- /// of demand
28
- virtual void initializeForDevice(int device) = 0;
29
-
30
- /// Returns the cuBLAS handle that we use for the given device
31
- virtual cublasHandle_t getBlasHandle(int device) = 0;
32
-
33
- /// Returns the stream that we order all computation on for the
34
- /// given device
35
- virtual cudaStream_t getDefaultStream(int device) = 0;
36
-
37
- /// Returns the set of alternative streams that we use for the given device
38
- virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
39
-
40
- /// Returns the temporary memory manager for the given device
41
- virtual DeviceMemory& getMemoryManager(int device) = 0;
42
-
43
- /// Returns the available CPU pinned memory buffer
44
- virtual std::pair<void*, size_t> getPinnedMemory() = 0;
45
-
46
- /// Returns the stream on which we perform async CPU <-> GPU copies
47
- virtual cudaStream_t getAsyncCopyStream(int device) = 0;
48
-
49
- /// Calls getBlasHandle with the current device
50
- cublasHandle_t getBlasHandleCurrentDevice();
51
-
52
- /// Calls getDefaultStream with the current device
53
- cudaStream_t getDefaultStreamCurrentDevice();
54
-
55
- /// Synchronizes the CPU with respect to the default stream for the
56
- /// given device
57
- // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
58
- void syncDefaultStream(int device);
59
-
60
- /// Calls syncDefaultStream for the current device
61
- void syncDefaultStreamCurrentDevice();
62
-
63
- /// Calls getAlternateStreams for the current device
64
- std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
65
-
66
- /// Calls getMemoryManager for the current device
67
- DeviceMemory& getMemoryManagerCurrentDevice();
68
-
69
- /// Calls getAsyncCopyStream for the current device
70
- cudaStream_t getAsyncCopyStreamCurrentDevice();
71
- };
72
-
73
- } } // namespace
@@ -1,303 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #include <faiss/gpu/StandardGpuResources.h>
10
- #include <faiss/gpu/utils/DeviceUtils.h>
11
- #include <faiss/gpu/utils/MemorySpace.h>
12
- #include <faiss/impl/FaissAssert.h>
13
- #include <limits>
14
-
15
- namespace faiss { namespace gpu {
16
-
17
- namespace {
18
-
19
- // How many streams per device we allocate by default (for multi-streaming)
20
- constexpr int kNumStreams = 2;
21
-
22
- // Use 256 MiB of pinned memory for async CPU <-> GPU copies by default
23
- constexpr size_t kDefaultPinnedMemoryAllocation = (size_t) 256 * 1024 * 1024;
24
-
25
- // Default temporary memory allocation for <= 4 GiB memory GPUs
26
- constexpr size_t k4GiBTempMem = (size_t) 512 * 1024 * 1024;
27
-
28
- // Default temporary memory allocation for <= 8 GiB memory GPUs
29
- constexpr size_t k8GiBTempMem = (size_t) 1024 * 1024 * 1024;
30
-
31
- // Maximum temporary memory allocation for all GPUs
32
- constexpr size_t kMaxTempMem = (size_t) 1536 * 1024 * 1024;
33
-
34
- }
35
-
36
- StandardGpuResources::StandardGpuResources() :
37
- pinnedMemAlloc_(nullptr),
38
- pinnedMemAllocSize_(0),
39
- // let the adjustment function determine the memory size for us by passing
40
- // in a huge value that will then be adjusted
41
- tempMemSize_(getDefaultTempMemForGPU(-1,
42
- std::numeric_limits<size_t>::max())),
43
- pinnedMemSize_(kDefaultPinnedMemoryAllocation),
44
- cudaMallocWarning_(true) {
45
- }
46
-
47
- StandardGpuResources::~StandardGpuResources() {
48
- for (auto& entry : defaultStreams_) {
49
- DeviceScope scope(entry.first);
50
-
51
- auto it = userDefaultStreams_.find(entry.first);
52
- if (it == userDefaultStreams_.end()) {
53
- // The user did not specify this stream, thus we are the ones
54
- // who have created it
55
- CUDA_VERIFY(cudaStreamDestroy(entry.second));
56
- }
57
- }
58
-
59
- for (auto& entry : alternateStreams_) {
60
- DeviceScope scope(entry.first);
61
-
62
- for (auto stream : entry.second) {
63
- CUDA_VERIFY(cudaStreamDestroy(stream));
64
- }
65
- }
66
-
67
- for (auto& entry : asyncCopyStreams_) {
68
- DeviceScope scope(entry.first);
69
-
70
- CUDA_VERIFY(cudaStreamDestroy(entry.second));
71
- }
72
-
73
- for (auto& entry : blasHandles_) {
74
- DeviceScope scope(entry.first);
75
-
76
- auto blasStatus = cublasDestroy(entry.second);
77
- FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
78
- }
79
-
80
- if (pinnedMemAlloc_) {
81
- freeMemorySpace(MemorySpace::HostPinned, pinnedMemAlloc_);
82
- }
83
- }
84
-
85
- size_t
86
- StandardGpuResources::getDefaultTempMemForGPU(int device,
87
- size_t requested) {
88
- auto totalMem = device != -1 ?
89
- getDeviceProperties(device).totalGlobalMem :
90
- std::numeric_limits<size_t>::max();
91
-
92
- if (totalMem <= (size_t) 4 * 1024 * 1024 * 1024) {
93
- // If the GPU has <= 4 GiB of memory, reserve 512 MiB
94
-
95
- if (requested > k4GiBTempMem) {
96
- return k4GiBTempMem;
97
- }
98
- } else if (totalMem <= (size_t) 8 * 1024 * 1024 * 1024) {
99
- // If the GPU has <= 8 GiB of memory, reserve 1 GiB
100
-
101
- if (requested > k8GiBTempMem) {
102
- return k8GiBTempMem;
103
- }
104
- } else {
105
- // Never use more than 1.5 GiB
106
- if (requested > kMaxTempMem) {
107
- return kMaxTempMem;
108
- }
109
- }
110
-
111
- // use whatever lower limit the user requested
112
- return requested;
113
- }
114
-
115
- void
116
- StandardGpuResources::noTempMemory() {
117
- setTempMemory(0);
118
- setCudaMallocWarning(false);
119
- }
120
-
121
- void
122
- StandardGpuResources::setTempMemory(size_t size) {
123
- if (tempMemSize_ != size) {
124
- // adjust based on general limits
125
- tempMemSize_ = getDefaultTempMemForGPU(-1, size);
126
-
127
- // We need to re-initialize memory resources for all current devices that
128
- // have been initialized.
129
- // This should be safe to do, even if we are currently running work, because
130
- // the cudaFree call that this implies will force-synchronize all GPUs with
131
- // the CPU
132
- for (auto& p : memory_) {
133
- int device = p.first;
134
- // Free the existing memory first
135
- p.second.reset();
136
-
137
- // Allocate new
138
- p.second = std::unique_ptr<StackDeviceMemory>(
139
- new StackDeviceMemory(p.first,
140
- // adjust for this specific device
141
- getDefaultTempMemForGPU(device, tempMemSize_)));
142
- }
143
- }
144
- }
145
-
146
- void
147
- StandardGpuResources::setPinnedMemory(size_t size) {
148
- // Should not call this after devices have been initialized
149
- FAISS_ASSERT(defaultStreams_.size() == 0);
150
- FAISS_ASSERT(!pinnedMemAlloc_);
151
-
152
- pinnedMemSize_ = size;
153
- }
154
-
155
- void
156
- StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
157
- auto it = defaultStreams_.find(device);
158
- if (it != defaultStreams_.end()) {
159
- // Replace this stream with the user stream
160
- CUDA_VERIFY(cudaStreamDestroy(it->second));
161
- it->second = stream;
162
- }
163
-
164
- userDefaultStreams_[device] = stream;
165
- }
166
-
167
- void
168
- StandardGpuResources::setDefaultNullStreamAllDevices() {
169
- for (int dev = 0; dev < getNumDevices(); ++dev) {
170
- setDefaultStream(dev, nullptr);
171
- }
172
- }
173
-
174
- void
175
- StandardGpuResources::setCudaMallocWarning(bool b) {
176
- cudaMallocWarning_ = b;
177
-
178
- for (auto& v : memory_) {
179
- v.second->setCudaMallocWarning(b);
180
- }
181
- }
182
-
183
- bool
184
- StandardGpuResources::isInitialized(int device) const {
185
- // Use default streams as a marker for whether or not a certain
186
- // device has been initialized
187
- return defaultStreams_.count(device) != 0;
188
- }
189
-
190
- void
191
- StandardGpuResources::initializeForDevice(int device) {
192
- if (isInitialized(device)) {
193
- return;
194
- }
195
-
196
- // If this is the first device that we're initializing, create our
197
- // pinned memory allocation
198
- if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
199
- allocMemorySpace(MemorySpace::HostPinned, &pinnedMemAlloc_, pinnedMemSize_);
200
- pinnedMemAllocSize_ = pinnedMemSize_;
201
- }
202
-
203
- FAISS_ASSERT(device < getNumDevices());
204
- DeviceScope scope(device);
205
-
206
- // Make sure that device properties for all devices are cached
207
- auto& prop = getDeviceProperties(device);
208
-
209
- // Also check to make sure we meet our minimum compute capability (3.0)
210
- FAISS_ASSERT_FMT(prop.major >= 3,
211
- "Device id %d with CC %d.%d not supported, "
212
- "need 3.0+ compute capability",
213
- device, prop.major, prop.minor);
214
-
215
- // Create streams
216
- cudaStream_t defaultStream = 0;
217
- auto it = userDefaultStreams_.find(device);
218
- if (it != userDefaultStreams_.end()) {
219
- // We already have a stream provided by the user
220
- defaultStream = it->second;
221
- } else {
222
- CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
223
- cudaStreamNonBlocking));
224
- }
225
-
226
- defaultStreams_[device] = defaultStream;
227
-
228
- cudaStream_t asyncCopyStream = 0;
229
- CUDA_VERIFY(cudaStreamCreateWithFlags(&asyncCopyStream,
230
- cudaStreamNonBlocking));
231
-
232
- asyncCopyStreams_[device] = asyncCopyStream;
233
-
234
- std::vector<cudaStream_t> deviceStreams;
235
- for (int j = 0; j < kNumStreams; ++j) {
236
- cudaStream_t stream = 0;
237
- CUDA_VERIFY(cudaStreamCreateWithFlags(&stream,
238
- cudaStreamNonBlocking));
239
-
240
- deviceStreams.push_back(stream);
241
- }
242
-
243
- alternateStreams_[device] = std::move(deviceStreams);
244
-
245
- // Create cuBLAS handle
246
- cublasHandle_t blasHandle = 0;
247
- auto blasStatus = cublasCreate(&blasHandle);
248
- FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
249
- blasHandles_[device] = blasHandle;
250
-
251
- // Enable tensor core support if available
252
- #if CUDA_VERSION >= 9000
253
- if (getTensorCoreSupport(device)) {
254
- cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
255
- }
256
- #endif
257
-
258
- FAISS_ASSERT(memory_.count(device) == 0);
259
-
260
- auto mem = std::unique_ptr<StackDeviceMemory>(
261
- new StackDeviceMemory(device,
262
- // adjust for this specific device
263
- getDefaultTempMemForGPU(device, tempMemSize_)));
264
- mem->setCudaMallocWarning(cudaMallocWarning_);
265
-
266
- memory_.emplace(device, std::move(mem));
267
- }
268
-
269
- cublasHandle_t
270
- StandardGpuResources::getBlasHandle(int device) {
271
- initializeForDevice(device);
272
- return blasHandles_[device];
273
- }
274
-
275
- cudaStream_t
276
- StandardGpuResources::getDefaultStream(int device) {
277
- initializeForDevice(device);
278
- return defaultStreams_[device];
279
- }
280
-
281
- std::vector<cudaStream_t>
282
- StandardGpuResources::getAlternateStreams(int device) {
283
- initializeForDevice(device);
284
- return alternateStreams_[device];
285
- }
286
-
287
- DeviceMemory& StandardGpuResources::getMemoryManager(int device) {
288
- initializeForDevice(device);
289
- return *memory_[device];
290
- }
291
-
292
- std::pair<void*, size_t>
293
- StandardGpuResources::getPinnedMemory() {
294
- return std::make_pair(pinnedMemAlloc_, pinnedMemAllocSize_);
295
- }
296
-
297
- cudaStream_t
298
- StandardGpuResources::getAsyncCopyStream(int device) {
299
- initializeForDevice(device);
300
- return asyncCopyStreams_[device];
301
- }
302
-
303
- } } // namespace
@@ -1,77 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #include <faiss/gpu/utils/DeviceMemory.h>
10
- #include <faiss/gpu/utils/DeviceUtils.h>
11
- #include <faiss/impl/FaissAssert.h>
12
-
13
- namespace faiss { namespace gpu {
14
-
15
- DeviceMemoryReservation::DeviceMemoryReservation()
16
- : state_(NULL),
17
- device_(0),
18
- data_(NULL),
19
- size_(0),
20
- stream_(0) {
21
- }
22
-
23
- DeviceMemoryReservation::DeviceMemoryReservation(DeviceMemory* state,
24
- int device,
25
- void* p,
26
- size_t size,
27
- cudaStream_t stream)
28
- : state_(state),
29
- device_(device),
30
- data_(p),
31
- size_(size),
32
- stream_(stream) {
33
- }
34
-
35
- DeviceMemoryReservation::DeviceMemoryReservation(
36
- DeviceMemoryReservation&& m) noexcept {
37
-
38
- state_ = m.state_;
39
- device_ = m.device_;
40
- data_ = m.data_;
41
- size_ = m.size_;
42
- stream_ = m.stream_;
43
-
44
- m.data_ = NULL;
45
- }
46
-
47
- DeviceMemoryReservation::~DeviceMemoryReservation() {
48
- if (data_) {
49
- FAISS_ASSERT(state_);
50
- state_->returnAllocation(*this);
51
- }
52
-
53
- data_ = NULL;
54
- }
55
-
56
- DeviceMemoryReservation&
57
- DeviceMemoryReservation::operator=(DeviceMemoryReservation&& m) {
58
- if (data_) {
59
- FAISS_ASSERT(state_);
60
- state_->returnAllocation(*this);
61
- }
62
-
63
- state_ = m.state_;
64
- device_ = m.device_;
65
- data_ = m.data_;
66
- size_ = m.size_;
67
- stream_ = m.stream_;
68
-
69
- m.data_ = NULL;
70
-
71
- return *this;
72
- }
73
-
74
- DeviceMemory::~DeviceMemory() {
75
- }
76
-
77
- } } // namespace