faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -5,210 +5,204 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
- #include <faiss/gpu/utils/StackDeviceMemory.h>
10
8
  #include <faiss/gpu/utils/DeviceUtils.h>
9
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
11
10
  #include <faiss/gpu/utils/StaticUtils.h>
12
11
  #include <faiss/impl/FaissAssert.h>
13
12
  #include <algorithm>
14
13
  #include <sstream>
15
14
 
16
- namespace faiss { namespace gpu {
15
+ namespace faiss {
16
+ namespace gpu {
17
17
 
18
18
  namespace {
19
19
 
20
20
  size_t adjustStackSize(size_t sz) {
21
- if (sz == 0) {
22
- return 0;
23
- } else {
24
- // ensure that we have at least 16 bytes, as all allocations are bumped up
25
- // to 16
26
- return utils::roundUp(sz, (size_t) 16);
27
- }
21
+ if (sz == 0) {
22
+ return 0;
23
+ } else {
24
+ // ensure that we have at least 16 bytes, as all allocations are bumped
25
+ // up to 16
26
+ return utils::roundUp(sz, (size_t)16);
27
+ }
28
28
  }
29
29
 
30
30
  } // namespace
31
31
 
32
32
  StackDeviceMemory::Stack::Stack(GpuResources* res, int d, size_t sz)
33
- : res_(res),
34
- device_(d),
35
- alloc_(nullptr),
36
- allocSize_(adjustStackSize(sz)),
37
- start_(nullptr),
38
- end_(nullptr),
39
- head_(nullptr),
40
- highWaterMemoryUsed_(0) {
41
- if (allocSize_ == 0) {
42
- return;
43
- }
44
-
45
- DeviceScope s(device_);
46
- auto req = AllocRequest(AllocType::TemporaryMemoryBuffer,
47
- device_,
48
- MemorySpace::Device,
49
- res_->getDefaultStream(device_),
50
- allocSize_);
51
-
52
- alloc_ = (char*) res_->allocMemory(req);
53
- FAISS_ASSERT_FMT(
54
- alloc_,
55
- "could not reserve temporary memory region of size %zu", allocSize_);
56
-
57
- // In order to disambiguate between our entire region of temporary memory
58
- // versus the first allocation in the temporary memory region, ensure that the
59
- // first address returned is +16 bytes from the beginning
60
- start_ = alloc_ + 16;
61
- head_ = start_;
62
- end_ = alloc_ + allocSize_;
33
+ : res_(res),
34
+ device_(d),
35
+ alloc_(nullptr),
36
+ allocSize_(adjustStackSize(sz)),
37
+ start_(nullptr),
38
+ end_(nullptr),
39
+ head_(nullptr),
40
+ highWaterMemoryUsed_(0) {
41
+ if (allocSize_ == 0) {
42
+ return;
43
+ }
44
+
45
+ DeviceScope s(device_);
46
+ auto req = AllocRequest(
47
+ AllocType::TemporaryMemoryBuffer,
48
+ device_,
49
+ MemorySpace::Device,
50
+ res_->getDefaultStream(device_),
51
+ allocSize_);
52
+
53
+ alloc_ = (char*)res_->allocMemory(req);
54
+ FAISS_ASSERT_FMT(
55
+ alloc_,
56
+ "could not reserve temporary memory region of size %zu",
57
+ allocSize_);
58
+
59
+ // In order to disambiguate between our entire region of temporary memory
60
+ // versus the first allocation in the temporary memory region, ensure that
61
+ // the first address returned is +16 bytes from the beginning
62
+ start_ = alloc_ + 16;
63
+ head_ = start_;
64
+ end_ = alloc_ + allocSize_;
63
65
  }
64
66
 
65
67
  StackDeviceMemory::Stack::~Stack() {
66
- DeviceScope s(device_);
68
+ DeviceScope s(device_);
67
69
 
68
- // FIXME: make sure there are no outstanding memory allocations?
69
- if (alloc_) {
70
- res_->deallocMemory(device_, alloc_);
71
- }
70
+ // FIXME: make sure there are no outstanding memory allocations?
71
+ if (alloc_) {
72
+ res_->deallocMemory(device_, alloc_);
73
+ }
72
74
  }
73
75
 
74
- size_t
75
- StackDeviceMemory::Stack::getSizeAvailable() const {
76
- return (end_ - head_);
76
+ size_t StackDeviceMemory::Stack::getSizeAvailable() const {
77
+ return (end_ - head_);
77
78
  }
78
79
 
79
- char*
80
- StackDeviceMemory::Stack::getAlloc(size_t size,
81
- cudaStream_t stream) {
82
- // The user must check to see that the allocation fit within us
83
- auto sizeRemaining = getSizeAvailable();
80
+ char* StackDeviceMemory::Stack::getAlloc(size_t size, cudaStream_t stream) {
81
+ // The user must check to see that the allocation fit within us
82
+ auto sizeRemaining = getSizeAvailable();
84
83
 
85
- FAISS_ASSERT(size <= sizeRemaining);
84
+ FAISS_ASSERT(size <= sizeRemaining);
86
85
 
87
- // We can make the allocation out of our stack
88
- // Find all the ranges that we overlap that may have been
89
- // previously allocated; our allocation will be [head, endAlloc)
90
- char* startAlloc = head_;
91
- char* endAlloc = head_ + size;
86
+ // We can make the allocation out of our stack
87
+ // Find all the ranges that we overlap that may have been
88
+ // previously allocated; our allocation will be [head, endAlloc)
89
+ char* startAlloc = head_;
90
+ char* endAlloc = head_ + size;
92
91
 
93
- while (lastUsers_.size() > 0) {
94
- auto& prevUser = lastUsers_.back();
92
+ while (lastUsers_.size() > 0) {
93
+ auto& prevUser = lastUsers_.back();
95
94
 
96
- // Because there is a previous user, we must overlap it
97
- FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
95
+ // Because there is a previous user, we must overlap it
96
+ FAISS_ASSERT(
97
+ prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
98
98
 
99
- if (stream != prevUser.stream_) {
100
- // Synchronization required
101
- streamWait({stream}, {prevUser.stream_});
102
- }
99
+ if (stream != prevUser.stream_) {
100
+ // Synchronization required
101
+ streamWait({stream}, {prevUser.stream_});
102
+ }
103
103
 
104
- if (endAlloc < prevUser.end_) {
105
- // Update the previous user info
106
- prevUser.start_ = endAlloc;
104
+ if (endAlloc < prevUser.end_) {
105
+ // Update the previous user info
106
+ prevUser.start_ = endAlloc;
107
107
 
108
- break;
109
- }
108
+ break;
109
+ }
110
110
 
111
- // If we're the exact size of the previous request, then we
112
- // don't need to continue
113
- bool done = (prevUser.end_ == endAlloc);
111
+ // If we're the exact size of the previous request, then we
112
+ // don't need to continue
113
+ bool done = (prevUser.end_ == endAlloc);
114
114
 
115
- lastUsers_.pop_back();
115
+ lastUsers_.pop_back();
116
116
 
117
- if (done) {
118
- break;
117
+ if (done) {
118
+ break;
119
+ }
119
120
  }
120
- }
121
121
 
122
- head_ = endAlloc;
123
- FAISS_ASSERT(head_ <= end_);
122
+ head_ = endAlloc;
123
+ FAISS_ASSERT(head_ <= end_);
124
124
 
125
- highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
126
- (size_t) (head_ - start_));
127
- FAISS_ASSERT(startAlloc);
128
- return startAlloc;
125
+ highWaterMemoryUsed_ =
126
+ std::max(highWaterMemoryUsed_, (size_t)(head_ - start_));
127
+ FAISS_ASSERT(startAlloc);
128
+ return startAlloc;
129
129
  }
130
130
 
131
- void
132
- StackDeviceMemory::Stack::returnAlloc(char* p,
133
- size_t size,
134
- cudaStream_t stream) {
135
- // This allocation should be within ourselves
136
- FAISS_ASSERT(p >= start_ && p < end_);
131
+ void StackDeviceMemory::Stack::returnAlloc(
132
+ char* p,
133
+ size_t size,
134
+ cudaStream_t stream) {
135
+ // This allocation should be within ourselves
136
+ FAISS_ASSERT(p >= start_ && p < end_);
137
137
 
138
- // All allocations should have been adjusted to a multiple of 16 bytes
139
- FAISS_ASSERT(size % 16 == 0);
138
+ // All allocations should have been adjusted to a multiple of 16 bytes
139
+ FAISS_ASSERT(size % 16 == 0);
140
140
 
141
- // This is on our stack
142
- // Allocations should be freed in the reverse order they are made
143
- if (p + size != head_) {
144
- FAISS_ASSERT(p + size == head_);
145
- }
141
+ // This is on our stack
142
+ // Allocations should be freed in the reverse order they are made
143
+ if (p + size != head_) {
144
+ FAISS_ASSERT(p + size == head_);
145
+ }
146
146
 
147
- head_ = p;
148
- lastUsers_.push_back(Range(p, p + size, stream));
147
+ head_ = p;
148
+ lastUsers_.push_back(Range(p, p + size, stream));
149
149
  }
150
150
 
151
- std::string
152
- StackDeviceMemory::Stack::toString() const {
153
- std::stringstream s;
151
+ std::string StackDeviceMemory::Stack::toString() const {
152
+ std::stringstream s;
154
153
 
155
- s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
156
- << (void*) start_ << ", " << (void*) end_ << ")\n";
157
- s << " Available memory " << (size_t) (end_ - head_)
158
- << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
159
- s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
154
+ s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
155
+ << (void*)start_ << ", " << (void*)end_ << ")\n";
156
+ s << " Available memory " << (size_t)(end_ - head_) << " ["
157
+ << (void*)head_ << ", " << (void*)end_ << ")\n";
158
+ s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
160
159
 
161
- int i = lastUsers_.size();
162
- for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
163
- s << i-- << ": size " << (size_t) (it->end_ - it->start_)
164
- << " stream " << it->stream_
165
- << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
166
- }
160
+ int i = lastUsers_.size();
161
+ for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
162
+ s << i-- << ": size " << (size_t)(it->end_ - it->start_) << " stream "
163
+ << it->stream_ << " [" << (void*)it->start_ << ", " << (void*)it->end_
164
+ << ")\n";
165
+ }
167
166
 
168
- return s.str();
167
+ return s.str();
169
168
  }
170
169
 
171
- StackDeviceMemory::StackDeviceMemory(GpuResources* res,
172
- int device,
173
- size_t allocPerDevice)
174
- : device_(device),
175
- stack_(res, device, allocPerDevice) {
176
- }
170
+ StackDeviceMemory::StackDeviceMemory(
171
+ GpuResources* res,
172
+ int device,
173
+ size_t allocPerDevice)
174
+ : device_(device), stack_(res, device, allocPerDevice) {}
177
175
 
178
- StackDeviceMemory::~StackDeviceMemory() {
179
- }
176
+ StackDeviceMemory::~StackDeviceMemory() {}
180
177
 
181
- int
182
- StackDeviceMemory::getDevice() const {
183
- return device_;
178
+ int StackDeviceMemory::getDevice() const {
179
+ return device_;
184
180
  }
185
181
 
186
- size_t
187
- StackDeviceMemory::getSizeAvailable() const {
188
- return stack_.getSizeAvailable();
182
+ size_t StackDeviceMemory::getSizeAvailable() const {
183
+ return stack_.getSizeAvailable();
189
184
  }
190
185
 
191
- std::string
192
- StackDeviceMemory::toString() const {
193
- return stack_.toString();
186
+ std::string StackDeviceMemory::toString() const {
187
+ return stack_.toString();
194
188
  }
195
189
 
196
- void*
197
- StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
198
- // All allocations should have been adjusted to a multiple of 16 bytes
199
- FAISS_ASSERT(size % 16 == 0);
200
- return stack_.getAlloc(size, stream);
190
+ void* StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
191
+ // All allocations should have been adjusted to a multiple of 16 bytes
192
+ FAISS_ASSERT(size % 16 == 0);
193
+ return stack_.getAlloc(size, stream);
201
194
  }
202
195
 
203
- void
204
- StackDeviceMemory::deallocMemory(int device,
205
- cudaStream_t stream,
206
- size_t size,
207
- void* p) {
208
- FAISS_ASSERT(p);
209
- FAISS_ASSERT(device == device_);
196
+ void StackDeviceMemory::deallocMemory(
197
+ int device,
198
+ cudaStream_t stream,
199
+ size_t size,
200
+ void* p) {
201
+ FAISS_ASSERT(p);
202
+ FAISS_ASSERT(device == device_);
210
203
 
211
- stack_.returnAlloc((char*) p, size, stream);
204
+ stack_.returnAlloc((char*)p, size, stream);
212
205
  }
213
206
 
214
- } } // namespace
207
+ } // namespace gpu
208
+ } // namespace faiss
@@ -5,110 +5,108 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
- #include <faiss/gpu/GpuResources.h>
12
10
  #include <cuda_runtime.h>
11
+ #include <faiss/gpu/GpuResources.h>
13
12
  #include <list>
14
13
  #include <memory>
15
- #include <unordered_map>
16
14
  #include <tuple>
15
+ #include <unordered_map>
17
16
 
18
- namespace faiss { namespace gpu {
17
+ namespace faiss {
18
+ namespace gpu {
19
19
 
20
20
  /// Device memory manager that provides temporary memory allocations
21
21
  /// out of a region of memory, for a single device
22
22
  class StackDeviceMemory {
23
- public:
24
- /// Allocate a new region of memory that we manage
25
- StackDeviceMemory(GpuResources* res,
26
- int device,
27
- size_t allocPerDevice);
23
+ public:
24
+ /// Allocate a new region of memory that we manage
25
+ StackDeviceMemory(GpuResources* res, int device, size_t allocPerDevice);
28
26
 
29
- /// Manage a region of memory for a particular device, with or
30
- /// without ownership
31
- StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
27
+ /// Manage a region of memory for a particular device, with or
28
+ /// without ownership
29
+ StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
32
30
 
33
- ~StackDeviceMemory();
31
+ ~StackDeviceMemory();
34
32
 
35
- int getDevice() const;
33
+ int getDevice() const;
36
34
 
37
- /// All allocations requested should be a multiple of 16 bytes
38
- void* allocMemory(cudaStream_t stream, size_t size);
39
- void deallocMemory(int device, cudaStream_t, size_t size, void* p);
35
+ /// All allocations requested should be a multiple of 16 bytes
36
+ void* allocMemory(cudaStream_t stream, size_t size);
37
+ void deallocMemory(int device, cudaStream_t, size_t size, void* p);
40
38
 
41
- size_t getSizeAvailable() const;
42
- std::string toString() const;
39
+ size_t getSizeAvailable() const;
40
+ std::string toString() const;
43
41
 
44
- protected:
45
- /// Previous allocation ranges and the streams for which
46
- /// synchronization is required
47
- struct Range {
48
- inline Range(char* s, char* e, cudaStream_t str) :
49
- start_(s), end_(e), stream_(str) {
50
- }
42
+ protected:
43
+ /// Previous allocation ranges and the streams for which
44
+ /// synchronization is required
45
+ struct Range {
46
+ inline Range(char* s, char* e, cudaStream_t str)
47
+ : start_(s), end_(e), stream_(str) {}
51
48
 
52
- // References a memory range [start, end)
53
- char* start_;
54
- char* end_;
55
- cudaStream_t stream_;
56
- };
49
+ // References a memory range [start, end)
50
+ char* start_;
51
+ char* end_;
52
+ cudaStream_t stream_;
53
+ };
57
54
 
58
- struct Stack {
59
- /// Constructor that allocates memory via cudaMalloc
60
- Stack(GpuResources* res, int device, size_t size);
55
+ struct Stack {
56
+ /// Constructor that allocates memory via cudaMalloc
57
+ Stack(GpuResources* res, int device, size_t size);
61
58
 
62
- ~Stack();
59
+ ~Stack();
63
60
 
64
- /// Returns how much size is available for an allocation without
65
- /// calling cudaMalloc
66
- size_t getSizeAvailable() const;
61
+ /// Returns how much size is available for an allocation without
62
+ /// calling cudaMalloc
63
+ size_t getSizeAvailable() const;
67
64
 
68
- /// Obtains an allocation; all allocations are guaranteed to be 16
69
- /// byte aligned
70
- char* getAlloc(size_t size, cudaStream_t stream);
65
+ /// Obtains an allocation; all allocations are guaranteed to be 16
66
+ /// byte aligned
67
+ char* getAlloc(size_t size, cudaStream_t stream);
71
68
 
72
- /// Returns an allocation
73
- void returnAlloc(char* p, size_t size, cudaStream_t stream);
69
+ /// Returns an allocation
70
+ void returnAlloc(char* p, size_t size, cudaStream_t stream);
74
71
 
75
- /// Returns the stack state
76
- std::string toString() const;
72
+ /// Returns the stack state
73
+ std::string toString() const;
77
74
 
78
- /// Our GpuResources object
79
- GpuResources* res_;
75
+ /// Our GpuResources object
76
+ GpuResources* res_;
80
77
 
81
- /// Device this allocation is on
82
- int device_;
78
+ /// Device this allocation is on
79
+ int device_;
83
80
 
84
- /// Where our temporary memory buffer is allocated; we allocate starting 16
85
- /// bytes into this
86
- char* alloc_;
81
+ /// Where our temporary memory buffer is allocated; we allocate starting
82
+ /// 16 bytes into this
83
+ char* alloc_;
87
84
 
88
- /// Total size of our allocation
89
- size_t allocSize_;
85
+ /// Total size of our allocation
86
+ size_t allocSize_;
90
87
 
91
- /// Our temporary memory region; [start_, end_) is valid
92
- char* start_;
93
- char* end_;
88
+ /// Our temporary memory region; [start_, end_) is valid
89
+ char* start_;
90
+ char* end_;
94
91
 
95
- /// Stack head within [start, end)
96
- char* head_;
92
+ /// Stack head within [start, end)
93
+ char* head_;
97
94
 
98
- /// List of previous last users of allocations on our stack, for
99
- /// possible synchronization purposes
100
- std::list<Range> lastUsers_;
95
+ /// List of previous last users of allocations on our stack, for
96
+ /// possible synchronization purposes
97
+ std::list<Range> lastUsers_;
101
98
 
102
- /// What's the high water mark in terms of memory used from the
103
- /// temporary buffer?
104
- size_t highWaterMemoryUsed_;
105
- };
99
+ /// What's the high water mark in terms of memory used from the
100
+ /// temporary buffer?
101
+ size_t highWaterMemoryUsed_;
102
+ };
106
103
 
107
- /// Our device
108
- int device_;
104
+ /// Our device
105
+ int device_;
109
106
 
110
- /// Memory stack
111
- Stack stack_;
107
+ /// Memory stack
108
+ Stack stack_;
112
109
  };
113
110
 
114
- } } // namespace
111
+ } // namespace gpu
112
+ } // namespace faiss