faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/utils/DeviceMemory.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
11
+ #include <faiss/impl/FaissAssert.h>
12
+
13
+ namespace faiss { namespace gpu {
14
+
15
+ DeviceMemoryReservation::DeviceMemoryReservation()
16
+ : state_(NULL),
17
+ device_(0),
18
+ data_(NULL),
19
+ size_(0),
20
+ stream_(0) {
21
+ }
22
+
23
+ DeviceMemoryReservation::DeviceMemoryReservation(DeviceMemory* state,
24
+ int device,
25
+ void* p,
26
+ size_t size,
27
+ cudaStream_t stream)
28
+ : state_(state),
29
+ device_(device),
30
+ data_(p),
31
+ size_(size),
32
+ stream_(stream) {
33
+ }
34
+
35
+ DeviceMemoryReservation::DeviceMemoryReservation(
36
+ DeviceMemoryReservation&& m) noexcept {
37
+
38
+ state_ = m.state_;
39
+ device_ = m.device_;
40
+ data_ = m.data_;
41
+ size_ = m.size_;
42
+ stream_ = m.stream_;
43
+
44
+ m.data_ = NULL;
45
+ }
46
+
47
+ DeviceMemoryReservation::~DeviceMemoryReservation() {
48
+ if (data_) {
49
+ FAISS_ASSERT(state_);
50
+ state_->returnAllocation(*this);
51
+ }
52
+
53
+ data_ = NULL;
54
+ }
55
+
56
+ DeviceMemoryReservation&
57
+ DeviceMemoryReservation::operator=(DeviceMemoryReservation&& m) {
58
+ if (data_) {
59
+ FAISS_ASSERT(state_);
60
+ state_->returnAllocation(*this);
61
+ }
62
+
63
+ state_ = m.state_;
64
+ device_ = m.device_;
65
+ data_ = m.data_;
66
+ size_ = m.size_;
67
+ stream_ = m.stream_;
68
+
69
+ m.data_ = NULL;
70
+
71
+ return *this;
72
+ }
73
+
74
+ DeviceMemory::~DeviceMemory() {
75
+ }
76
+
77
+ } } // namespace
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <cuda_runtime.h>
12
+ #include <string>
13
+
14
+ namespace faiss { namespace gpu {
15
+
16
+ class DeviceMemory;
17
+
18
+ class DeviceMemoryReservation {
19
+ public:
20
+ DeviceMemoryReservation();
21
+ DeviceMemoryReservation(DeviceMemory* state,
22
+ int device, void* p, size_t size,
23
+ cudaStream_t stream);
24
+ DeviceMemoryReservation(DeviceMemoryReservation&& m) noexcept;
25
+ ~DeviceMemoryReservation();
26
+
27
+ DeviceMemoryReservation& operator=(DeviceMemoryReservation&& m);
28
+
29
+ int device() { return device_; }
30
+ void* get() { return data_; }
31
+ size_t size() { return size_; }
32
+ cudaStream_t stream() { return stream_; }
33
+
34
+ private:
35
+ DeviceMemory* state_;
36
+
37
+ int device_;
38
+ void* data_;
39
+ size_t size_;
40
+ cudaStream_t stream_;
41
+ };
42
+
43
+ /// Manages temporary memory allocations on a GPU device
44
+ class DeviceMemory {
45
+ public:
46
+ virtual ~DeviceMemory();
47
+
48
+ /// Returns the device we are managing memory for
49
+ virtual int getDevice() const = 0;
50
+
51
+ /// Obtains a temporary memory allocation for our device,
52
+ /// whose usage is ordered with respect to the given stream.
53
+ virtual DeviceMemoryReservation getMemory(cudaStream_t stream,
54
+ size_t size) = 0;
55
+
56
+ /// Returns the current size available without calling cudaMalloc
57
+ virtual size_t getSizeAvailable() const = 0;
58
+
59
+ /// Returns a string containing our current memory manager state
60
+ virtual std::string toString() const = 0;
61
+
62
+ /// Returns the high-water mark of cudaMalloc allocations for our
63
+ /// device
64
+ virtual size_t getHighWaterCudaMalloc() const = 0;
65
+
66
+ protected:
67
+ friend class DeviceMemoryReservation;
68
+ virtual void returnAllocation(DeviceMemoryReservation& m) = 0;
69
+ };
70
+
71
+ } } // namespace
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/impl/FaissAssert.h>
12
+ #include <cuda_runtime.h>
13
+ #include <cublas_v2.h>
14
+ #include <vector>
15
+
16
+ namespace faiss { namespace gpu {
17
+
18
+ /// Returns the current thread-local GPU device
19
+ int getCurrentDevice();
20
+
21
+ /// Sets the current thread-local GPU device
22
+ void setCurrentDevice(int device);
23
+
24
+ /// Returns the number of available GPU devices
25
+ int getNumDevices();
26
+
27
+ /// Starts the CUDA profiler (exposed via SWIG)
28
+ void profilerStart();
29
+
30
+ /// Stops the CUDA profiler (exposed via SWIG)
31
+ void profilerStop();
32
+
33
+ /// Synchronizes the CPU against all devices (equivalent to
34
+ /// cudaDeviceSynchronize for each device)
35
+ void synchronizeAllDevices();
36
+
37
+ /// Returns a cached cudaDeviceProp for the given device
38
+ const cudaDeviceProp& getDeviceProperties(int device);
39
+
40
+ /// Returns the cached cudaDeviceProp for the current device
41
+ const cudaDeviceProp& getCurrentDeviceProperties();
42
+
43
+ /// Returns the maximum number of threads available for the given GPU
44
+ /// device
45
+ int getMaxThreads(int device);
46
+
47
+ /// Equivalent to getMaxThreads(getCurrentDevice())
48
+ int getMaxThreadsCurrentDevice();
49
+
50
+ /// Returns the maximum smem available for the given GPU device
51
+ size_t getMaxSharedMemPerBlock(int device);
52
+
53
+ /// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
54
+ size_t getMaxSharedMemPerBlockCurrentDevice();
55
+
56
+ /// For a given pointer, returns whether or not it is located on
57
+ /// a device (deviceId >= 0) or the host (-1).
58
+ int getDeviceForAddress(const void* p);
59
+
60
+ /// Does the given device support full unified memory sharing host
61
+ /// memory?
62
+ bool getFullUnifiedMemSupport(int device);
63
+
64
+ /// Equivalent to getFullUnifiedMemSupport(getCurrentDevice())
65
+ bool getFullUnifiedMemSupportCurrentDevice();
66
+
67
+ /// Returns the maximum k-selection value supported based on the CUDA SDK that
68
+ /// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
69
+ /// non-CUDA files
70
+ int getMaxKSelection();
71
+
72
+ /// RAII object to set the current device, and restore the previous
73
+ /// device upon destruction
74
+ class DeviceScope {
75
+ public:
76
+ explicit DeviceScope(int device);
77
+ ~DeviceScope();
78
+
79
+ private:
80
+ int prevDevice_;
81
+ };
82
+
83
+ /// RAII object to manage a cublasHandle_t
84
+ class CublasHandleScope {
85
+ public:
86
+ CublasHandleScope();
87
+ ~CublasHandleScope();
88
+
89
+ cublasHandle_t get() { return blasHandle_; }
90
+
91
+ private:
92
+ cublasHandle_t blasHandle_;
93
+ };
94
+
95
+ // RAII object to manage a cudaEvent_t
96
+ class CudaEvent {
97
+ public:
98
+ /// Creates an event and records it in this stream
99
+ explicit CudaEvent(cudaStream_t stream);
100
+ CudaEvent(const CudaEvent& event) = delete;
101
+ CudaEvent(CudaEvent&& event) noexcept;
102
+ ~CudaEvent();
103
+
104
+ inline cudaEvent_t get() { return event_; }
105
+
106
+ /// Wait on this event in this stream
107
+ void streamWaitOnEvent(cudaStream_t stream);
108
+
109
+ /// Have the CPU wait for the completion of this event
110
+ void cpuWaitOnEvent();
111
+
112
+ CudaEvent& operator=(CudaEvent&& event) noexcept;
113
+ CudaEvent& operator=(CudaEvent& event) = delete;
114
+
115
+ private:
116
+ cudaEvent_t event_;
117
+ };
118
+
119
+ /// Wrapper to test return status of CUDA functions
120
+ #define CUDA_VERIFY(X) \
121
+ do { \
122
+ auto err__ = (X); \
123
+ FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d %s", \
124
+ (int) err__, cudaGetErrorString(err__)); \
125
+ } while (0)
126
+
127
+ /// Wrapper to synchronously probe for CUDA errors
128
+ // #define FAISS_GPU_SYNC_ERROR 1
129
+
130
+ #ifdef FAISS_GPU_SYNC_ERROR
131
+ #define CUDA_TEST_ERROR() \
132
+ do { \
133
+ CUDA_VERIFY(cudaDeviceSynchronize()); \
134
+ } while (0)
135
+ #else
136
+ #define CUDA_TEST_ERROR() \
137
+ do { \
138
+ CUDA_VERIFY(cudaGetLastError()); \
139
+ } while (0)
140
+ #endif
141
+
142
+ /// Call for a collection of streams to wait on
143
+ template <typename L1, typename L2>
144
+ void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
145
+ // For all the streams we are waiting on, create an event
146
+ std::vector<cudaEvent_t> events;
147
+ for (auto& stream : listWaitOn) {
148
+ cudaEvent_t event;
149
+ CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
150
+ CUDA_VERIFY(cudaEventRecord(event, stream));
151
+ events.push_back(event);
152
+ }
153
+
154
+ // For all the streams that are waiting, issue a wait
155
+ for (auto& stream : listWaiting) {
156
+ for (auto& event : events) {
157
+ CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
158
+ }
159
+ }
160
+
161
+ for (auto& event : events) {
162
+ CUDA_VERIFY(cudaEventDestroy(event));
163
+ }
164
+ }
165
+
166
+ /// These versions allow usage of initializer_list as arguments, since
167
+ /// otherwise {...} doesn't have a type
168
+ template <typename L1>
169
+ void streamWait(const L1& a,
170
+ const std::initializer_list<cudaStream_t>& b) {
171
+ streamWaitBase(a, b);
172
+ }
173
+
174
+ template <typename L2>
175
+ void streamWait(const std::initializer_list<cudaStream_t>& a,
176
+ const L2& b) {
177
+ streamWaitBase(a, b);
178
+ }
179
+
180
+ inline void streamWait(const std::initializer_list<cudaStream_t>& a,
181
+ const std::initializer_list<cudaStream_t>& b) {
182
+ streamWaitBase(a, b);
183
+ }
184
+
185
+ } } // namespace
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/utils/MemorySpace.h>
10
+ #include <faiss/impl/FaissAssert.h>
11
+ #include <cuda_runtime.h>
12
+
13
+ namespace faiss { namespace gpu {
14
+
15
+ /// Allocates CUDA memory for a given memory space
16
+ void allocMemorySpaceV(MemorySpace space, void** p, size_t size) {
17
+ switch (space) {
18
+ case MemorySpace::Device:
19
+ {
20
+ auto err = cudaMalloc(p, size);
21
+
22
+ // Throw if we fail to allocate
23
+ FAISS_THROW_IF_NOT_FMT(
24
+ err == cudaSuccess,
25
+ "failed to cudaMalloc %zu bytes (error %d %s)",
26
+ size, (int) err, cudaGetErrorString(err));
27
+ }
28
+ break;
29
+ case MemorySpace::Unified:
30
+ {
31
+ #ifdef FAISS_UNIFIED_MEM
32
+ auto err = cudaMallocManaged(p, size);
33
+
34
+ // Throw if we fail to allocate
35
+ FAISS_THROW_IF_NOT_FMT(
36
+ err == cudaSuccess,
37
+ "failed to cudaMallocManaged %zu bytes (error %d %s)",
38
+ size, (int) err, cudaGetErrorString(err));
39
+ #else
40
+ FAISS_THROW_MSG("Attempting to allocate via cudaMallocManaged "
41
+ "without CUDA 8+ support");
42
+ #endif
43
+ }
44
+ break;
45
+ case MemorySpace::HostPinned:
46
+ {
47
+ auto err = cudaHostAlloc(p, size, cudaHostAllocDefault);
48
+
49
+ // Throw if we fail to allocate
50
+ FAISS_THROW_IF_NOT_FMT(
51
+ err == cudaSuccess,
52
+ "failed to cudaHostAlloc %zu bytes (error %d %s)",
53
+ size, (int) err, cudaGetErrorString(err));
54
+ }
55
+ break;
56
+ default:
57
+ FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
58
+ break;
59
+ }
60
+ }
61
+
62
+ // We'll allow allocation to fail, but free should always succeed and be a
63
+ // fatal error if it doesn't free
64
+ void freeMemorySpace(MemorySpace space, void* p) {
65
+ switch (space) {
66
+ case MemorySpace::Device:
67
+ case MemorySpace::Unified:
68
+ {
69
+ auto err = cudaFree(p);
70
+ FAISS_ASSERT_FMT(err == cudaSuccess,
71
+ "Failed to cudaFree pointer %p (error %d %s)",
72
+ p, (int) err, cudaGetErrorString(err));
73
+ }
74
+ break;
75
+ case MemorySpace::HostPinned:
76
+ {
77
+ auto err = cudaFreeHost(p);
78
+ FAISS_ASSERT_FMT(err == cudaSuccess,
79
+ "Failed to cudaFreeHost pointer %p (error %d %s)",
80
+ p, (int) err, cudaGetErrorString(err));
81
+ }
82
+ break;
83
+ default:
84
+ FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
85
+ break;
86
+ }
87
+ }
88
+
89
+ } }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <cuda.h>
12
+
13
+ #if CUDA_VERSION >= 8000
14
+ // Whether or not we enable usage of CUDA Unified Memory
15
+ #define FAISS_UNIFIED_MEM 1
16
+ #endif
17
+
18
+ namespace faiss { namespace gpu {
19
+
20
+ enum MemorySpace {
21
+ /// Managed using cudaMalloc/cudaFree
22
+ Device = 1,
23
+ /// Managed using cudaMallocManaged/cudaFree
24
+ Unified = 2,
25
+ /// Managed using cudaHostAlloc/cudaFreeHost
26
+ HostPinned = 3,
27
+ };
28
+
29
+ /// All memory allocations and de-allocations come through these functions
30
+
31
+ /// Allocates CUDA memory for a given memory space (void pointer)
32
+ /// Throws a FaissException if we are unable to allocate the memory
33
+ void allocMemorySpaceV(MemorySpace space, void** p, size_t size);
34
+
35
+ template <typename T>
36
+ inline void allocMemorySpace(MemorySpace space, T** p, size_t size) {
37
+ allocMemorySpaceV(space, (void**)(void*) p, size);
38
+ }
39
+
40
+ /// Frees CUDA memory for a given memory space
41
+ /// Asserts if we are unable to free the region
42
+ void freeMemorySpace(MemorySpace space, void* p);
43
+
44
+ } }