faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,239 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
11
+ #include <faiss/gpu/utils/MemorySpace.h>
12
+ #include <faiss/gpu/utils/StaticUtils.h>
13
+ #include <faiss/impl/FaissAssert.h>
14
+ #include <stdio.h>
15
+ #include <sstream>
16
+
17
+ namespace faiss { namespace gpu {
18
+
19
+ StackDeviceMemory::Stack::Stack(int d, size_t sz)
20
+ : device_(d),
21
+ isOwner_(true),
22
+ start_(nullptr),
23
+ end_(nullptr),
24
+ size_(sz),
25
+ head_(nullptr),
26
+ mallocCurrent_(0),
27
+ highWaterMemoryUsed_(0),
28
+ highWaterMalloc_(0),
29
+ cudaMallocWarning_(true) {
30
+ DeviceScope s(device_);
31
+
32
+ allocMemorySpace(MemorySpace::Device, &start_, size_);
33
+
34
+ head_ = start_;
35
+ end_ = start_ + size_;
36
+ }
37
+
38
+ StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
39
+ : device_(d),
40
+ isOwner_(isOwner),
41
+ start_((char*) p),
42
+ end_(((char*) p) + sz),
43
+ size_(sz),
44
+ head_((char*) p),
45
+ mallocCurrent_(0),
46
+ highWaterMemoryUsed_(0),
47
+ highWaterMalloc_(0),
48
+ cudaMallocWarning_(true) {
49
+ }
50
+
51
+ StackDeviceMemory::Stack::~Stack() {
52
+ if (isOwner_) {
53
+ DeviceScope s(device_);
54
+
55
+ freeMemorySpace(MemorySpace::Device, start_);
56
+ }
57
+ }
58
+
59
+ size_t
60
+ StackDeviceMemory::Stack::getSizeAvailable() const {
61
+ return (end_ - head_);
62
+ }
63
+
64
+ char*
65
+ StackDeviceMemory::Stack::getAlloc(size_t size,
66
+ cudaStream_t stream) {
67
+ if (size > (end_ - head_)) {
68
+ // Too large for our stack
69
+ DeviceScope s(device_);
70
+
71
+ if (cudaMallocWarning_) {
72
+ // Print our requested size before we attempt the allocation
73
+ fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
74
+ "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
75
+ size, highWaterMalloc_);
76
+ }
77
+
78
+ char* p = nullptr;
79
+ allocMemorySpace(MemorySpace::Device, &p, size);
80
+
81
+ mallocCurrent_ += size;
82
+ highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
83
+
84
+ return p;
85
+ } else {
86
+ // We can make the allocation out of our stack
87
+ // Find all the ranges that we overlap that may have been
88
+ // previously allocated; our allocation will be [head, endAlloc)
89
+ char* startAlloc = head_;
90
+ char* endAlloc = head_ + size;
91
+
92
+ while (lastUsers_.size() > 0) {
93
+ auto& prevUser = lastUsers_.back();
94
+
95
+ // Because there is a previous user, we must overlap it
96
+ FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
97
+
98
+ if (stream != prevUser.stream_) {
99
+ // Synchronization required
100
+ // FIXME
101
+ FAISS_ASSERT(false);
102
+ }
103
+
104
+ if (endAlloc < prevUser.end_) {
105
+ // Update the previous user info
106
+ prevUser.start_ = endAlloc;
107
+
108
+ break;
109
+ }
110
+
111
+ // If we're the exact size of the previous request, then we
112
+ // don't need to continue
113
+ bool done = (prevUser.end_ == endAlloc);
114
+
115
+ lastUsers_.pop_back();
116
+
117
+ if (done) {
118
+ break;
119
+ }
120
+ }
121
+
122
+ head_ = endAlloc;
123
+ FAISS_ASSERT(head_ <= end_);
124
+
125
+ highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
126
+ (size_t) (head_ - start_));
127
+ return startAlloc;
128
+ }
129
+ }
130
+
131
+ void
132
+ StackDeviceMemory::Stack::returnAlloc(char* p,
133
+ size_t size,
134
+ cudaStream_t stream) {
135
+ if (p < start_ || p >= end_) {
136
+ // This is not on our stack; it was a one-off allocation
137
+ DeviceScope s(device_);
138
+
139
+ freeMemorySpace(MemorySpace::Device, p);
140
+
141
+ FAISS_ASSERT(mallocCurrent_ >= size);
142
+ mallocCurrent_ -= size;
143
+ } else {
144
+ // This is on our stack
145
+ // Allocations should be freed in the reverse order they are made
146
+ FAISS_ASSERT(p + size == head_);
147
+
148
+ head_ = p;
149
+ lastUsers_.push_back(Range(p, p + size, stream));
150
+ }
151
+ }
152
+
153
+ std::string
154
+ StackDeviceMemory::Stack::toString() const {
155
+ std::stringstream s;
156
+
157
+ s << "SDM device " << device_ << ": Total memory " << size_ << " ["
158
+ << (void*) start_ << ", " << (void*) end_ << ")\n";
159
+ s << " Available memory " << (size_t) (end_ - head_)
160
+ << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
161
+ s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
162
+ s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
163
+
164
+ int i = lastUsers_.size();
165
+ for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
166
+ s << i-- << ": size " << (size_t) (it->end_ - it->start_)
167
+ << " stream " << it->stream_
168
+ << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
169
+ }
170
+
171
+ return s.str();
172
+ }
173
+
174
+ size_t
175
+ StackDeviceMemory::Stack::getHighWaterCudaMalloc() const {
176
+ return highWaterMalloc_;
177
+ }
178
+
179
+ StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
180
+ : device_(device),
181
+ stack_(device, allocPerDevice) {
182
+ }
183
+
184
+ StackDeviceMemory::StackDeviceMemory(int device,
185
+ void* p, size_t size, bool isOwner)
186
+ : device_(device),
187
+ stack_(device, p, size, isOwner) {
188
+ }
189
+
190
+ StackDeviceMemory::~StackDeviceMemory() {
191
+ }
192
+
193
+ void
194
+ StackDeviceMemory::setCudaMallocWarning(bool b) {
195
+ stack_.cudaMallocWarning_ = b;
196
+ }
197
+
198
+ int
199
+ StackDeviceMemory::getDevice() const {
200
+ return device_;
201
+ }
202
+
203
+ DeviceMemoryReservation
204
+ StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
205
+ // We guarantee 16 byte alignment for allocations, so bump up `size`
206
+ // to the next highest multiple of 16
207
+ size = utils::roundUp(size, (size_t) 16);
208
+
209
+ return DeviceMemoryReservation(this,
210
+ device_,
211
+ stack_.getAlloc(size, stream),
212
+ size,
213
+ stream);
214
+ }
215
+
216
+ size_t
217
+ StackDeviceMemory::getSizeAvailable() const {
218
+ return stack_.getSizeAvailable();
219
+ }
220
+
221
+ std::string
222
+ StackDeviceMemory::toString() const {
223
+ return stack_.toString();
224
+ }
225
+
226
+ size_t
227
+ StackDeviceMemory::getHighWaterCudaMalloc() const {
228
+ return stack_.getHighWaterCudaMalloc();
229
+ }
230
+
231
+ void
232
+ StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
233
+ FAISS_ASSERT(m.get());
234
+ FAISS_ASSERT(device_ == m.device());
235
+
236
+ stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
237
+ }
238
+
239
+ } } // namespace
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/gpu/utils/DeviceMemory.h>
12
+ #include <list>
13
+ #include <memory>
14
+ #include <unordered_map>
15
+
16
+ namespace faiss { namespace gpu {
17
+
18
+ /// Device memory manager that provides temporary memory allocations
19
+ /// out of a region of memory
20
+ class StackDeviceMemory : public DeviceMemory {
21
+ public:
22
+ /// Allocate a new region of memory that we manage
23
+ explicit StackDeviceMemory(int device, size_t allocPerDevice);
24
+
25
+ /// Manage a region of memory for a particular device, with or
26
+ /// without ownership
27
+ StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
28
+
29
+ ~StackDeviceMemory() override;
30
+
31
+ /// Enable or disable the warning about not having enough temporary memory
32
+ /// when cudaMalloc gets called
33
+ void setCudaMallocWarning(bool b);
34
+
35
+ int getDevice() const override;
36
+
37
+ DeviceMemoryReservation getMemory(cudaStream_t stream,
38
+ size_t size) override;
39
+
40
+ size_t getSizeAvailable() const override;
41
+ std::string toString() const override;
42
+ size_t getHighWaterCudaMalloc() const override;
43
+
44
+ protected:
45
+ void returnAllocation(DeviceMemoryReservation& m) override;
46
+
47
+ protected:
48
+ /// Previous allocation ranges and the streams for which
49
+ /// synchronization is required
50
+ struct Range {
51
+ inline Range(char* s, char* e, cudaStream_t str) :
52
+ start_(s), end_(e), stream_(str) {
53
+ }
54
+
55
+ // References a memory range [start, end)
56
+ char* start_;
57
+ char* end_;
58
+ cudaStream_t stream_;
59
+ };
60
+
61
+ struct Stack {
62
+ /// Constructor that allocates memory via cudaMalloc
63
+ Stack(int device, size_t size);
64
+
65
+ /// Constructor that references a pre-allocated region of memory
66
+ Stack(int device, void* p, size_t size, bool isOwner);
67
+ ~Stack();
68
+
69
+ /// Returns how much size is available for an allocation without
70
+ /// calling cudaMalloc
71
+ size_t getSizeAvailable() const;
72
+
73
+ /// Obtains an allocation; all allocations are guaranteed to be 16
74
+ /// byte aligned
75
+ char* getAlloc(size_t size, cudaStream_t stream);
76
+
77
+ /// Returns an allocation
78
+ void returnAlloc(char* p, size_t size, cudaStream_t stream);
79
+
80
+ /// Returns the stack state
81
+ std::string toString() const;
82
+
83
+ /// Returns the high-water mark of cudaMalloc activity
84
+ size_t getHighWaterCudaMalloc() const;
85
+
86
+ /// Device this allocation is on
87
+ int device_;
88
+
89
+ /// Do we own our region of memory?
90
+ bool isOwner_;
91
+
92
+ /// Where our allocation begins and ends
93
+ /// [start_, end_) is valid
94
+ char* start_;
95
+ char* end_;
96
+
97
+ /// Total size end_ - start_
98
+ size_t size_;
99
+
100
+ /// Stack head within [start, end)
101
+ char* head_;
102
+
103
+ /// List of previous last users of allocations on our stack, for
104
+ /// possible synchronization purposes
105
+ std::list<Range> lastUsers_;
106
+
107
+ /// How much cudaMalloc memory is currently outstanding?
108
+ size_t mallocCurrent_;
109
+
110
+ /// What's the high water mark in terms of memory used from the
111
+ /// temporary buffer?
112
+ size_t highWaterMemoryUsed_;
113
+
114
+ /// What's the high water mark in terms of memory allocated via
115
+ /// cudaMalloc?
116
+ size_t highWaterMalloc_;
117
+
118
+ /// Whether or not a warning upon cudaMalloc is generated
119
+ bool cudaMallocWarning_;
120
+ };
121
+
122
+ /// Our device
123
+ int device_;
124
+
125
+ /// Memory stack
126
+ Stack stack_;
127
+ };
128
+
129
+ } } // namespace
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <cuda.h>
12
+
13
+ namespace faiss { namespace gpu { namespace utils {
14
+
15
+ template <typename U, typename V>
16
+ constexpr __host__ __device__ auto divDown(U a, V b) -> decltype(a + b) {
17
+ return (a / b);
18
+ }
19
+
20
+ template <typename U, typename V>
21
+ constexpr __host__ __device__ auto divUp(U a, V b) -> decltype(a + b) {
22
+ return (a + b - 1) / b;
23
+ }
24
+
25
+ template <typename U, typename V>
26
+ constexpr __host__ __device__ auto roundDown(U a, V b) -> decltype(a + b) {
27
+ return divDown(a, b) * b;
28
+ }
29
+
30
+ template <typename U, typename V>
31
+ constexpr __host__ __device__ auto roundUp(U a, V b) -> decltype(a + b) {
32
+ return divUp(a, b) * b;
33
+ }
34
+
35
+ template <class T>
36
+ constexpr __host__ __device__ T pow(T n, T power) {
37
+ return (power > 0 ? n * pow(n, power - 1) : 1);
38
+ }
39
+
40
+ template <class T>
41
+ constexpr __host__ __device__ T pow2(T n) {
42
+ return pow(2, (T) n);
43
+ }
44
+
45
+ static_assert(pow2(8) == 256, "pow2");
46
+
47
+ template <typename T>
48
+ constexpr __host__ __device__ int log2(T n, int p = 0) {
49
+ return (n <= 1) ? p : log2(n / 2, p + 1);
50
+ }
51
+
52
+ static_assert(log2(2) == 1, "log2");
53
+ static_assert(log2(3) == 1, "log2");
54
+ static_assert(log2(4) == 2, "log2");
55
+
56
+ template <typename T>
57
+ constexpr __host__ __device__ bool isPowerOf2(T v) {
58
+ return (v && !(v & (v - 1)));
59
+ }
60
+
61
+ static_assert(isPowerOf2(2048), "isPowerOf2");
62
+ static_assert(!isPowerOf2(3333), "isPowerOf2");
63
+
64
+ template <typename T>
65
+ constexpr __host__ __device__ T nextHighestPowerOf2(T v) {
66
+ return (isPowerOf2(v) ? (T) 2 * v : ((T) 1 << (log2(v) + 1)));
67
+ }
68
+
69
+ static_assert(nextHighestPowerOf2(1) == 2, "nextHighestPowerOf2");
70
+ static_assert(nextHighestPowerOf2(2) == 4, "nextHighestPowerOf2");
71
+ static_assert(nextHighestPowerOf2(3) == 4, "nextHighestPowerOf2");
72
+ static_assert(nextHighestPowerOf2(4) == 8, "nextHighestPowerOf2");
73
+
74
+ static_assert(nextHighestPowerOf2(15) == 16, "nextHighestPowerOf2");
75
+ static_assert(nextHighestPowerOf2(16) == 32, "nextHighestPowerOf2");
76
+ static_assert(nextHighestPowerOf2(17) == 32, "nextHighestPowerOf2");
77
+
78
+ static_assert(nextHighestPowerOf2(1536000000u) == 2147483648u,
79
+ "nextHighestPowerOf2");
80
+ static_assert(nextHighestPowerOf2((size_t) 2147483648ULL) ==
81
+ (size_t) 4294967296ULL, "nextHighestPowerOf2");
82
+
83
+ } } } // namespace