faiss 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -5,210 +5,204 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
- #include <faiss/gpu/utils/StackDeviceMemory.h>
10
8
  #include <faiss/gpu/utils/DeviceUtils.h>
9
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
11
10
  #include <faiss/gpu/utils/StaticUtils.h>
12
11
  #include <faiss/impl/FaissAssert.h>
13
12
  #include <algorithm>
14
13
  #include <sstream>
15
14
 
16
- namespace faiss { namespace gpu {
15
+ namespace faiss {
16
+ namespace gpu {
17
17
 
18
18
  namespace {
19
19
 
20
20
  size_t adjustStackSize(size_t sz) {
21
- if (sz == 0) {
22
- return 0;
23
- } else {
24
- // ensure that we have at least 16 bytes, as all allocations are bumped up
25
- // to 16
26
- return utils::roundUp(sz, (size_t) 16);
27
- }
21
+ if (sz == 0) {
22
+ return 0;
23
+ } else {
24
+ // ensure that we have at least 16 bytes, as all allocations are bumped
25
+ // up to 16
26
+ return utils::roundUp(sz, (size_t)16);
27
+ }
28
28
  }
29
29
 
30
30
  } // namespace
31
31
 
32
32
  StackDeviceMemory::Stack::Stack(GpuResources* res, int d, size_t sz)
33
- : res_(res),
34
- device_(d),
35
- alloc_(nullptr),
36
- allocSize_(adjustStackSize(sz)),
37
- start_(nullptr),
38
- end_(nullptr),
39
- head_(nullptr),
40
- highWaterMemoryUsed_(0) {
41
- if (allocSize_ == 0) {
42
- return;
43
- }
44
-
45
- DeviceScope s(device_);
46
- auto req = AllocRequest(AllocType::TemporaryMemoryBuffer,
47
- device_,
48
- MemorySpace::Device,
49
- res_->getDefaultStream(device_),
50
- allocSize_);
51
-
52
- alloc_ = (char*) res_->allocMemory(req);
53
- FAISS_ASSERT_FMT(
54
- alloc_,
55
- "could not reserve temporary memory region of size %zu", allocSize_);
56
-
57
- // In order to disambiguate between our entire region of temporary memory
58
- // versus the first allocation in the temporary memory region, ensure that the
59
- // first address returned is +16 bytes from the beginning
60
- start_ = alloc_ + 16;
61
- head_ = start_;
62
- end_ = alloc_ + allocSize_;
33
+ : res_(res),
34
+ device_(d),
35
+ alloc_(nullptr),
36
+ allocSize_(adjustStackSize(sz)),
37
+ start_(nullptr),
38
+ end_(nullptr),
39
+ head_(nullptr),
40
+ highWaterMemoryUsed_(0) {
41
+ if (allocSize_ == 0) {
42
+ return;
43
+ }
44
+
45
+ DeviceScope s(device_);
46
+ auto req = AllocRequest(
47
+ AllocType::TemporaryMemoryBuffer,
48
+ device_,
49
+ MemorySpace::Device,
50
+ res_->getDefaultStream(device_),
51
+ allocSize_);
52
+
53
+ alloc_ = (char*)res_->allocMemory(req);
54
+ FAISS_ASSERT_FMT(
55
+ alloc_,
56
+ "could not reserve temporary memory region of size %zu",
57
+ allocSize_);
58
+
59
+ // In order to disambiguate between our entire region of temporary memory
60
+ // versus the first allocation in the temporary memory region, ensure that
61
+ // the first address returned is +16 bytes from the beginning
62
+ start_ = alloc_ + 16;
63
+ head_ = start_;
64
+ end_ = alloc_ + allocSize_;
63
65
  }
64
66
 
65
67
  StackDeviceMemory::Stack::~Stack() {
66
- DeviceScope s(device_);
68
+ DeviceScope s(device_);
67
69
 
68
- // FIXME: make sure there are no outstanding memory allocations?
69
- if (alloc_) {
70
- res_->deallocMemory(device_, alloc_);
71
- }
70
+ // FIXME: make sure there are no outstanding memory allocations?
71
+ if (alloc_) {
72
+ res_->deallocMemory(device_, alloc_);
73
+ }
72
74
  }
73
75
 
74
- size_t
75
- StackDeviceMemory::Stack::getSizeAvailable() const {
76
- return (end_ - head_);
76
+ size_t StackDeviceMemory::Stack::getSizeAvailable() const {
77
+ return (end_ - head_);
77
78
  }
78
79
 
79
- char*
80
- StackDeviceMemory::Stack::getAlloc(size_t size,
81
- cudaStream_t stream) {
82
- // The user must check to see that the allocation fit within us
83
- auto sizeRemaining = getSizeAvailable();
80
+ char* StackDeviceMemory::Stack::getAlloc(size_t size, cudaStream_t stream) {
81
+ // The user must check to see that the allocation fit within us
82
+ auto sizeRemaining = getSizeAvailable();
84
83
 
85
- FAISS_ASSERT(size <= sizeRemaining);
84
+ FAISS_ASSERT(size <= sizeRemaining);
86
85
 
87
- // We can make the allocation out of our stack
88
- // Find all the ranges that we overlap that may have been
89
- // previously allocated; our allocation will be [head, endAlloc)
90
- char* startAlloc = head_;
91
- char* endAlloc = head_ + size;
86
+ // We can make the allocation out of our stack
87
+ // Find all the ranges that we overlap that may have been
88
+ // previously allocated; our allocation will be [head, endAlloc)
89
+ char* startAlloc = head_;
90
+ char* endAlloc = head_ + size;
92
91
 
93
- while (lastUsers_.size() > 0) {
94
- auto& prevUser = lastUsers_.back();
92
+ while (lastUsers_.size() > 0) {
93
+ auto& prevUser = lastUsers_.back();
95
94
 
96
- // Because there is a previous user, we must overlap it
97
- FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
95
+ // Because there is a previous user, we must overlap it
96
+ FAISS_ASSERT(
97
+ prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
98
98
 
99
- if (stream != prevUser.stream_) {
100
- // Synchronization required
101
- streamWait({stream}, {prevUser.stream_});
102
- }
99
+ if (stream != prevUser.stream_) {
100
+ // Synchronization required
101
+ streamWait({stream}, {prevUser.stream_});
102
+ }
103
103
 
104
- if (endAlloc < prevUser.end_) {
105
- // Update the previous user info
106
- prevUser.start_ = endAlloc;
104
+ if (endAlloc < prevUser.end_) {
105
+ // Update the previous user info
106
+ prevUser.start_ = endAlloc;
107
107
 
108
- break;
109
- }
108
+ break;
109
+ }
110
110
 
111
- // If we're the exact size of the previous request, then we
112
- // don't need to continue
113
- bool done = (prevUser.end_ == endAlloc);
111
+ // If we're the exact size of the previous request, then we
112
+ // don't need to continue
113
+ bool done = (prevUser.end_ == endAlloc);
114
114
 
115
- lastUsers_.pop_back();
115
+ lastUsers_.pop_back();
116
116
 
117
- if (done) {
118
- break;
117
+ if (done) {
118
+ break;
119
+ }
119
120
  }
120
- }
121
121
 
122
- head_ = endAlloc;
123
- FAISS_ASSERT(head_ <= end_);
122
+ head_ = endAlloc;
123
+ FAISS_ASSERT(head_ <= end_);
124
124
 
125
- highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
126
- (size_t) (head_ - start_));
127
- FAISS_ASSERT(startAlloc);
128
- return startAlloc;
125
+ highWaterMemoryUsed_ =
126
+ std::max(highWaterMemoryUsed_, (size_t)(head_ - start_));
127
+ FAISS_ASSERT(startAlloc);
128
+ return startAlloc;
129
129
  }
130
130
 
131
- void
132
- StackDeviceMemory::Stack::returnAlloc(char* p,
133
- size_t size,
134
- cudaStream_t stream) {
135
- // This allocation should be within ourselves
136
- FAISS_ASSERT(p >= start_ && p < end_);
131
+ void StackDeviceMemory::Stack::returnAlloc(
132
+ char* p,
133
+ size_t size,
134
+ cudaStream_t stream) {
135
+ // This allocation should be within ourselves
136
+ FAISS_ASSERT(p >= start_ && p < end_);
137
137
 
138
- // All allocations should have been adjusted to a multiple of 16 bytes
139
- FAISS_ASSERT(size % 16 == 0);
138
+ // All allocations should have been adjusted to a multiple of 16 bytes
139
+ FAISS_ASSERT(size % 16 == 0);
140
140
 
141
- // This is on our stack
142
- // Allocations should be freed in the reverse order they are made
143
- if (p + size != head_) {
144
- FAISS_ASSERT(p + size == head_);
145
- }
141
+ // This is on our stack
142
+ // Allocations should be freed in the reverse order they are made
143
+ if (p + size != head_) {
144
+ FAISS_ASSERT(p + size == head_);
145
+ }
146
146
 
147
- head_ = p;
148
- lastUsers_.push_back(Range(p, p + size, stream));
147
+ head_ = p;
148
+ lastUsers_.push_back(Range(p, p + size, stream));
149
149
  }
150
150
 
151
- std::string
152
- StackDeviceMemory::Stack::toString() const {
153
- std::stringstream s;
151
+ std::string StackDeviceMemory::Stack::toString() const {
152
+ std::stringstream s;
154
153
 
155
- s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
156
- << (void*) start_ << ", " << (void*) end_ << ")\n";
157
- s << " Available memory " << (size_t) (end_ - head_)
158
- << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
159
- s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
154
+ s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
155
+ << (void*)start_ << ", " << (void*)end_ << ")\n";
156
+ s << " Available memory " << (size_t)(end_ - head_) << " ["
157
+ << (void*)head_ << ", " << (void*)end_ << ")\n";
158
+ s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
160
159
 
161
- int i = lastUsers_.size();
162
- for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
163
- s << i-- << ": size " << (size_t) (it->end_ - it->start_)
164
- << " stream " << it->stream_
165
- << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
166
- }
160
+ int i = lastUsers_.size();
161
+ for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
162
+ s << i-- << ": size " << (size_t)(it->end_ - it->start_) << " stream "
163
+ << it->stream_ << " [" << (void*)it->start_ << ", " << (void*)it->end_
164
+ << ")\n";
165
+ }
167
166
 
168
- return s.str();
167
+ return s.str();
169
168
  }
170
169
 
171
- StackDeviceMemory::StackDeviceMemory(GpuResources* res,
172
- int device,
173
- size_t allocPerDevice)
174
- : device_(device),
175
- stack_(res, device, allocPerDevice) {
176
- }
170
+ StackDeviceMemory::StackDeviceMemory(
171
+ GpuResources* res,
172
+ int device,
173
+ size_t allocPerDevice)
174
+ : device_(device), stack_(res, device, allocPerDevice) {}
177
175
 
178
- StackDeviceMemory::~StackDeviceMemory() {
179
- }
176
+ StackDeviceMemory::~StackDeviceMemory() {}
180
177
 
181
- int
182
- StackDeviceMemory::getDevice() const {
183
- return device_;
178
+ int StackDeviceMemory::getDevice() const {
179
+ return device_;
184
180
  }
185
181
 
186
- size_t
187
- StackDeviceMemory::getSizeAvailable() const {
188
- return stack_.getSizeAvailable();
182
+ size_t StackDeviceMemory::getSizeAvailable() const {
183
+ return stack_.getSizeAvailable();
189
184
  }
190
185
 
191
- std::string
192
- StackDeviceMemory::toString() const {
193
- return stack_.toString();
186
+ std::string StackDeviceMemory::toString() const {
187
+ return stack_.toString();
194
188
  }
195
189
 
196
- void*
197
- StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
198
- // All allocations should have been adjusted to a multiple of 16 bytes
199
- FAISS_ASSERT(size % 16 == 0);
200
- return stack_.getAlloc(size, stream);
190
+ void* StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
191
+ // All allocations should have been adjusted to a multiple of 16 bytes
192
+ FAISS_ASSERT(size % 16 == 0);
193
+ return stack_.getAlloc(size, stream);
201
194
  }
202
195
 
203
- void
204
- StackDeviceMemory::deallocMemory(int device,
205
- cudaStream_t stream,
206
- size_t size,
207
- void* p) {
208
- FAISS_ASSERT(p);
209
- FAISS_ASSERT(device == device_);
196
+ void StackDeviceMemory::deallocMemory(
197
+ int device,
198
+ cudaStream_t stream,
199
+ size_t size,
200
+ void* p) {
201
+ FAISS_ASSERT(p);
202
+ FAISS_ASSERT(device == device_);
210
203
 
211
- stack_.returnAlloc((char*) p, size, stream);
204
+ stack_.returnAlloc((char*)p, size, stream);
212
205
  }
213
206
 
214
- } } // namespace
207
+ } // namespace gpu
208
+ } // namespace faiss
@@ -5,110 +5,108 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
- #include <faiss/gpu/GpuResources.h>
12
10
  #include <cuda_runtime.h>
11
+ #include <faiss/gpu/GpuResources.h>
13
12
  #include <list>
14
13
  #include <memory>
15
- #include <unordered_map>
16
14
  #include <tuple>
15
+ #include <unordered_map>
17
16
 
18
- namespace faiss { namespace gpu {
17
+ namespace faiss {
18
+ namespace gpu {
19
19
 
20
20
  /// Device memory manager that provides temporary memory allocations
21
21
  /// out of a region of memory, for a single device
22
22
  class StackDeviceMemory {
23
- public:
24
- /// Allocate a new region of memory that we manage
25
- StackDeviceMemory(GpuResources* res,
26
- int device,
27
- size_t allocPerDevice);
23
+ public:
24
+ /// Allocate a new region of memory that we manage
25
+ StackDeviceMemory(GpuResources* res, int device, size_t allocPerDevice);
28
26
 
29
- /// Manage a region of memory for a particular device, with or
30
- /// without ownership
31
- StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
27
+ /// Manage a region of memory for a particular device, with or
28
+ /// without ownership
29
+ StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
32
30
 
33
- ~StackDeviceMemory();
31
+ ~StackDeviceMemory();
34
32
 
35
- int getDevice() const;
33
+ int getDevice() const;
36
34
 
37
- /// All allocations requested should be a multiple of 16 bytes
38
- void* allocMemory(cudaStream_t stream, size_t size);
39
- void deallocMemory(int device, cudaStream_t, size_t size, void* p);
35
+ /// All allocations requested should be a multiple of 16 bytes
36
+ void* allocMemory(cudaStream_t stream, size_t size);
37
+ void deallocMemory(int device, cudaStream_t, size_t size, void* p);
40
38
 
41
- size_t getSizeAvailable() const;
42
- std::string toString() const;
39
+ size_t getSizeAvailable() const;
40
+ std::string toString() const;
43
41
 
44
- protected:
45
- /// Previous allocation ranges and the streams for which
46
- /// synchronization is required
47
- struct Range {
48
- inline Range(char* s, char* e, cudaStream_t str) :
49
- start_(s), end_(e), stream_(str) {
50
- }
42
+ protected:
43
+ /// Previous allocation ranges and the streams for which
44
+ /// synchronization is required
45
+ struct Range {
46
+ inline Range(char* s, char* e, cudaStream_t str)
47
+ : start_(s), end_(e), stream_(str) {}
51
48
 
52
- // References a memory range [start, end)
53
- char* start_;
54
- char* end_;
55
- cudaStream_t stream_;
56
- };
49
+ // References a memory range [start, end)
50
+ char* start_;
51
+ char* end_;
52
+ cudaStream_t stream_;
53
+ };
57
54
 
58
- struct Stack {
59
- /// Constructor that allocates memory via cudaMalloc
60
- Stack(GpuResources* res, int device, size_t size);
55
+ struct Stack {
56
+ /// Constructor that allocates memory via cudaMalloc
57
+ Stack(GpuResources* res, int device, size_t size);
61
58
 
62
- ~Stack();
59
+ ~Stack();
63
60
 
64
- /// Returns how much size is available for an allocation without
65
- /// calling cudaMalloc
66
- size_t getSizeAvailable() const;
61
+ /// Returns how much size is available for an allocation without
62
+ /// calling cudaMalloc
63
+ size_t getSizeAvailable() const;
67
64
 
68
- /// Obtains an allocation; all allocations are guaranteed to be 16
69
- /// byte aligned
70
- char* getAlloc(size_t size, cudaStream_t stream);
65
+ /// Obtains an allocation; all allocations are guaranteed to be 16
66
+ /// byte aligned
67
+ char* getAlloc(size_t size, cudaStream_t stream);
71
68
 
72
- /// Returns an allocation
73
- void returnAlloc(char* p, size_t size, cudaStream_t stream);
69
+ /// Returns an allocation
70
+ void returnAlloc(char* p, size_t size, cudaStream_t stream);
74
71
 
75
- /// Returns the stack state
76
- std::string toString() const;
72
+ /// Returns the stack state
73
+ std::string toString() const;
77
74
 
78
- /// Our GpuResources object
79
- GpuResources* res_;
75
+ /// Our GpuResources object
76
+ GpuResources* res_;
80
77
 
81
- /// Device this allocation is on
82
- int device_;
78
+ /// Device this allocation is on
79
+ int device_;
83
80
 
84
- /// Where our temporary memory buffer is allocated; we allocate starting 16
85
- /// bytes into this
86
- char* alloc_;
81
+ /// Where our temporary memory buffer is allocated; we allocate starting
82
+ /// 16 bytes into this
83
+ char* alloc_;
87
84
 
88
- /// Total size of our allocation
89
- size_t allocSize_;
85
+ /// Total size of our allocation
86
+ size_t allocSize_;
90
87
 
91
- /// Our temporary memory region; [start_, end_) is valid
92
- char* start_;
93
- char* end_;
88
+ /// Our temporary memory region; [start_, end_) is valid
89
+ char* start_;
90
+ char* end_;
94
91
 
95
- /// Stack head within [start, end)
96
- char* head_;
92
+ /// Stack head within [start, end)
93
+ char* head_;
97
94
 
98
- /// List of previous last users of allocations on our stack, for
99
- /// possible synchronization purposes
100
- std::list<Range> lastUsers_;
95
+ /// List of previous last users of allocations on our stack, for
96
+ /// possible synchronization purposes
97
+ std::list<Range> lastUsers_;
101
98
 
102
- /// What's the high water mark in terms of memory used from the
103
- /// temporary buffer?
104
- size_t highWaterMemoryUsed_;
105
- };
99
+ /// What's the high water mark in terms of memory used from the
100
+ /// temporary buffer?
101
+ size_t highWaterMemoryUsed_;
102
+ };
106
103
 
107
- /// Our device
108
- int device_;
104
+ /// Our device
105
+ int device_;
109
106
 
110
- /// Memory stack
111
- Stack stack_;
107
+ /// Memory stack
108
+ Stack stack_;
112
109
  };
113
110
 
114
- } } // namespace
111
+ } // namespace gpu
112
+ } // namespace faiss