faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/utils/DeviceMemory.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
11
+ #include <faiss/impl/FaissAssert.h>
12
+
13
+ namespace faiss { namespace gpu {
14
+
15
+ DeviceMemoryReservation::DeviceMemoryReservation()
16
+ : state_(NULL),
17
+ device_(0),
18
+ data_(NULL),
19
+ size_(0),
20
+ stream_(0) {
21
+ }
22
+
23
+ DeviceMemoryReservation::DeviceMemoryReservation(DeviceMemory* state,
24
+ int device,
25
+ void* p,
26
+ size_t size,
27
+ cudaStream_t stream)
28
+ : state_(state),
29
+ device_(device),
30
+ data_(p),
31
+ size_(size),
32
+ stream_(stream) {
33
+ }
34
+
35
+ DeviceMemoryReservation::DeviceMemoryReservation(
36
+ DeviceMemoryReservation&& m) noexcept {
37
+
38
+ state_ = m.state_;
39
+ device_ = m.device_;
40
+ data_ = m.data_;
41
+ size_ = m.size_;
42
+ stream_ = m.stream_;
43
+
44
+ m.data_ = NULL;
45
+ }
46
+
47
+ DeviceMemoryReservation::~DeviceMemoryReservation() {
48
+ if (data_) {
49
+ FAISS_ASSERT(state_);
50
+ state_->returnAllocation(*this);
51
+ }
52
+
53
+ data_ = NULL;
54
+ }
55
+
56
+ DeviceMemoryReservation&
57
+ DeviceMemoryReservation::operator=(DeviceMemoryReservation&& m) {
58
+ if (data_) {
59
+ FAISS_ASSERT(state_);
60
+ state_->returnAllocation(*this);
61
+ }
62
+
63
+ state_ = m.state_;
64
+ device_ = m.device_;
65
+ data_ = m.data_;
66
+ size_ = m.size_;
67
+ stream_ = m.stream_;
68
+
69
+ m.data_ = NULL;
70
+
71
+ return *this;
72
+ }
73
+
74
+ DeviceMemory::~DeviceMemory() {
75
+ }
76
+
77
+ } } // namespace
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <cuda_runtime.h>
12
+ #include <string>
13
+
14
+ namespace faiss { namespace gpu {
15
+
16
+ class DeviceMemory;
17
+
18
+ class DeviceMemoryReservation {
19
+ public:
20
+ DeviceMemoryReservation();
21
+ DeviceMemoryReservation(DeviceMemory* state,
22
+ int device, void* p, size_t size,
23
+ cudaStream_t stream);
24
+ DeviceMemoryReservation(DeviceMemoryReservation&& m) noexcept;
25
+ ~DeviceMemoryReservation();
26
+
27
+ DeviceMemoryReservation& operator=(DeviceMemoryReservation&& m);
28
+
29
+ int device() { return device_; }
30
+ void* get() { return data_; }
31
+ size_t size() { return size_; }
32
+ cudaStream_t stream() { return stream_; }
33
+
34
+ private:
35
+ DeviceMemory* state_;
36
+
37
+ int device_;
38
+ void* data_;
39
+ size_t size_;
40
+ cudaStream_t stream_;
41
+ };
42
+
43
+ /// Manages temporary memory allocations on a GPU device
44
+ class DeviceMemory {
45
+ public:
46
+ virtual ~DeviceMemory();
47
+
48
+ /// Returns the device we are managing memory for
49
+ virtual int getDevice() const = 0;
50
+
51
+ /// Obtains a temporary memory allocation for our device,
52
+ /// whose usage is ordered with respect to the given stream.
53
+ virtual DeviceMemoryReservation getMemory(cudaStream_t stream,
54
+ size_t size) = 0;
55
+
56
+ /// Returns the current size available without calling cudaMalloc
57
+ virtual size_t getSizeAvailable() const = 0;
58
+
59
+ /// Returns a string containing our current memory manager state
60
+ virtual std::string toString() const = 0;
61
+
62
+ /// Returns the high-water mark of cudaMalloc allocations for our
63
+ /// device
64
+ virtual size_t getHighWaterCudaMalloc() const = 0;
65
+
66
+ protected:
67
+ friend class DeviceMemoryReservation;
68
+ virtual void returnAllocation(DeviceMemoryReservation& m) = 0;
69
+ };
70
+
71
+ } } // namespace
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/impl/FaissAssert.h>
12
+ #include <cuda_runtime.h>
13
+ #include <cublas_v2.h>
14
+ #include <vector>
15
+
16
+ namespace faiss { namespace gpu {
17
+
18
+ /// Returns the current thread-local GPU device
19
+ int getCurrentDevice();
20
+
21
+ /// Sets the current thread-local GPU device
22
+ void setCurrentDevice(int device);
23
+
24
+ /// Returns the number of available GPU devices
25
+ int getNumDevices();
26
+
27
+ /// Starts the CUDA profiler (exposed via SWIG)
28
+ void profilerStart();
29
+
30
+ /// Stops the CUDA profiler (exposed via SWIG)
31
+ void profilerStop();
32
+
33
+ /// Synchronizes the CPU against all devices (equivalent to
34
+ /// cudaDeviceSynchronize for each device)
35
+ void synchronizeAllDevices();
36
+
37
+ /// Returns a cached cudaDeviceProp for the given device
38
+ const cudaDeviceProp& getDeviceProperties(int device);
39
+
40
+ /// Returns the cached cudaDeviceProp for the current device
41
+ const cudaDeviceProp& getCurrentDeviceProperties();
42
+
43
+ /// Returns the maximum number of threads available for the given GPU
44
+ /// device
45
+ int getMaxThreads(int device);
46
+
47
+ /// Equivalent to getMaxThreads(getCurrentDevice())
48
+ int getMaxThreadsCurrentDevice();
49
+
50
+ /// Returns the maximum smem available for the given GPU device
51
+ size_t getMaxSharedMemPerBlock(int device);
52
+
53
+ /// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
54
+ size_t getMaxSharedMemPerBlockCurrentDevice();
55
+
56
+ /// For a given pointer, returns whether or not it is located on
57
+ /// a device (deviceId >= 0) or the host (-1).
58
+ int getDeviceForAddress(const void* p);
59
+
60
+ /// Does the given device support full unified memory sharing host
61
+ /// memory?
62
+ bool getFullUnifiedMemSupport(int device);
63
+
64
+ /// Equivalent to getFullUnifiedMemSupport(getCurrentDevice())
65
+ bool getFullUnifiedMemSupportCurrentDevice();
66
+
67
+ /// Returns the maximum k-selection value supported based on the CUDA SDK that
68
+ /// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
69
+ /// non-CUDA files
70
+ int getMaxKSelection();
71
+
72
+ /// RAII object to set the current device, and restore the previous
73
+ /// device upon destruction
74
+ class DeviceScope {
75
+ public:
76
+ explicit DeviceScope(int device);
77
+ ~DeviceScope();
78
+
79
+ private:
80
+ int prevDevice_;
81
+ };
82
+
83
+ /// RAII object to manage a cublasHandle_t
84
+ class CublasHandleScope {
85
+ public:
86
+ CublasHandleScope();
87
+ ~CublasHandleScope();
88
+
89
+ cublasHandle_t get() { return blasHandle_; }
90
+
91
+ private:
92
+ cublasHandle_t blasHandle_;
93
+ };
94
+
95
+ // RAII object to manage a cudaEvent_t
96
+ class CudaEvent {
97
+ public:
98
+ /// Creates an event and records it in this stream
99
+ explicit CudaEvent(cudaStream_t stream);
100
+ CudaEvent(const CudaEvent& event) = delete;
101
+ CudaEvent(CudaEvent&& event) noexcept;
102
+ ~CudaEvent();
103
+
104
+ inline cudaEvent_t get() { return event_; }
105
+
106
+ /// Wait on this event in this stream
107
+ void streamWaitOnEvent(cudaStream_t stream);
108
+
109
+ /// Have the CPU wait for the completion of this event
110
+ void cpuWaitOnEvent();
111
+
112
+ CudaEvent& operator=(CudaEvent&& event) noexcept;
113
+ CudaEvent& operator=(CudaEvent& event) = delete;
114
+
115
+ private:
116
+ cudaEvent_t event_;
117
+ };
118
+
119
+ /// Wrapper to test return status of CUDA functions
120
+ #define CUDA_VERIFY(X) \
121
+ do { \
122
+ auto err__ = (X); \
123
+ FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d %s", \
124
+ (int) err__, cudaGetErrorString(err__)); \
125
+ } while (0)
126
+
127
+ /// Wrapper to synchronously probe for CUDA errors
128
+ // #define FAISS_GPU_SYNC_ERROR 1
129
+
130
+ #ifdef FAISS_GPU_SYNC_ERROR
131
+ #define CUDA_TEST_ERROR() \
132
+ do { \
133
+ CUDA_VERIFY(cudaDeviceSynchronize()); \
134
+ } while (0)
135
+ #else
136
+ #define CUDA_TEST_ERROR() \
137
+ do { \
138
+ CUDA_VERIFY(cudaGetLastError()); \
139
+ } while (0)
140
+ #endif
141
+
142
+ /// Call for a collection of streams to wait on
143
+ template <typename L1, typename L2>
144
+ void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
145
+ // For all the streams we are waiting on, create an event
146
+ std::vector<cudaEvent_t> events;
147
+ for (auto& stream : listWaitOn) {
148
+ cudaEvent_t event;
149
+ CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
150
+ CUDA_VERIFY(cudaEventRecord(event, stream));
151
+ events.push_back(event);
152
+ }
153
+
154
+ // For all the streams that are waiting, issue a wait
155
+ for (auto& stream : listWaiting) {
156
+ for (auto& event : events) {
157
+ CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
158
+ }
159
+ }
160
+
161
+ for (auto& event : events) {
162
+ CUDA_VERIFY(cudaEventDestroy(event));
163
+ }
164
+ }
165
+
166
+ /// These versions allow usage of initializer_list as arguments, since
167
+ /// otherwise {...} doesn't have a type
168
+ template <typename L1>
169
+ void streamWait(const L1& a,
170
+ const std::initializer_list<cudaStream_t>& b) {
171
+ streamWaitBase(a, b);
172
+ }
173
+
174
+ template <typename L2>
175
+ void streamWait(const std::initializer_list<cudaStream_t>& a,
176
+ const L2& b) {
177
+ streamWaitBase(a, b);
178
+ }
179
+
180
+ inline void streamWait(const std::initializer_list<cudaStream_t>& a,
181
+ const std::initializer_list<cudaStream_t>& b) {
182
+ streamWaitBase(a, b);
183
+ }
184
+
185
+ } } // namespace
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/utils/MemorySpace.h>
10
+ #include <faiss/impl/FaissAssert.h>
11
+ #include <cuda_runtime.h>
12
+
13
+ namespace faiss { namespace gpu {
14
+
15
+ /// Allocates CUDA memory for a given memory space
16
+ void allocMemorySpaceV(MemorySpace space, void** p, size_t size) {
17
+ switch (space) {
18
+ case MemorySpace::Device:
19
+ {
20
+ auto err = cudaMalloc(p, size);
21
+
22
+ // Throw if we fail to allocate
23
+ FAISS_THROW_IF_NOT_FMT(
24
+ err == cudaSuccess,
25
+ "failed to cudaMalloc %zu bytes (error %d %s)",
26
+ size, (int) err, cudaGetErrorString(err));
27
+ }
28
+ break;
29
+ case MemorySpace::Unified:
30
+ {
31
+ #ifdef FAISS_UNIFIED_MEM
32
+ auto err = cudaMallocManaged(p, size);
33
+
34
+ // Throw if we fail to allocate
35
+ FAISS_THROW_IF_NOT_FMT(
36
+ err == cudaSuccess,
37
+ "failed to cudaMallocManaged %zu bytes (error %d %s)",
38
+ size, (int) err, cudaGetErrorString(err));
39
+ #else
40
+ FAISS_THROW_MSG("Attempting to allocate via cudaMallocManaged "
41
+ "without CUDA 8+ support");
42
+ #endif
43
+ }
44
+ break;
45
+ case MemorySpace::HostPinned:
46
+ {
47
+ auto err = cudaHostAlloc(p, size, cudaHostAllocDefault);
48
+
49
+ // Throw if we fail to allocate
50
+ FAISS_THROW_IF_NOT_FMT(
51
+ err == cudaSuccess,
52
+ "failed to cudaHostAlloc %zu bytes (error %d %s)",
53
+ size, (int) err, cudaGetErrorString(err));
54
+ }
55
+ break;
56
+ default:
57
+ FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
58
+ break;
59
+ }
60
+ }
61
+
62
+ // We'll allow allocation to fail, but free should always succeed and be a
63
+ // fatal error if it doesn't free
64
+ void freeMemorySpace(MemorySpace space, void* p) {
65
+ switch (space) {
66
+ case MemorySpace::Device:
67
+ case MemorySpace::Unified:
68
+ {
69
+ auto err = cudaFree(p);
70
+ FAISS_ASSERT_FMT(err == cudaSuccess,
71
+ "Failed to cudaFree pointer %p (error %d %s)",
72
+ p, (int) err, cudaGetErrorString(err));
73
+ }
74
+ break;
75
+ case MemorySpace::HostPinned:
76
+ {
77
+ auto err = cudaFreeHost(p);
78
+ FAISS_ASSERT_FMT(err == cudaSuccess,
79
+ "Failed to cudaFreeHost pointer %p (error %d %s)",
80
+ p, (int) err, cudaGetErrorString(err));
81
+ }
82
+ break;
83
+ default:
84
+ FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
85
+ break;
86
+ }
87
+ }
88
+
89
+ } }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <cuda.h>
12
+
13
+ #if CUDA_VERSION >= 8000
14
+ // Whether or not we enable usage of CUDA Unified Memory
15
+ #define FAISS_UNIFIED_MEM 1
16
+ #endif
17
+
18
+ namespace faiss { namespace gpu {
19
+
20
+ enum MemorySpace {
21
+ /// Managed using cudaMalloc/cudaFree
22
+ Device = 1,
23
+ /// Managed using cudaMallocManaged/cudaFree
24
+ Unified = 2,
25
+ /// Managed using cudaHostAlloc/cudaFreeHost
26
+ HostPinned = 3,
27
+ };
28
+
29
+ /// All memory allocations and de-allocations come through these functions
30
+
31
+ /// Allocates CUDA memory for a given memory space (void pointer)
32
+ /// Throws a FaissException if we are unable to allocate the memory
33
+ void allocMemorySpaceV(MemorySpace space, void** p, size_t size);
34
+
35
+ template <typename T>
36
+ inline void allocMemorySpace(MemorySpace space, T** p, size_t size) {
37
+ allocMemorySpaceV(space, (void**)(void*) p, size);
38
+ }
39
+
40
+ /// Frees CUDA memory for a given memory space
41
+ /// Asserts if we are unable to free the region
42
+ void freeMemorySpace(MemorySpace space, void* p);
43
+
44
+ } }