faiss 0.1.7 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -7
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +8 -2
  6. data/ext/faiss/index.cpp +102 -69
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +0 -5
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +26 -12
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -5,210 +5,204 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
- #include <faiss/gpu/utils/StackDeviceMemory.h>
10
8
  #include <faiss/gpu/utils/DeviceUtils.h>
9
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
11
10
  #include <faiss/gpu/utils/StaticUtils.h>
12
11
  #include <faiss/impl/FaissAssert.h>
13
12
  #include <algorithm>
14
13
  #include <sstream>
15
14
 
16
- namespace faiss { namespace gpu {
15
+ namespace faiss {
16
+ namespace gpu {
17
17
 
18
18
  namespace {
19
19
 
20
20
  size_t adjustStackSize(size_t sz) {
21
- if (sz == 0) {
22
- return 0;
23
- } else {
24
- // ensure that we have at least 16 bytes, as all allocations are bumped up
25
- // to 16
26
- return utils::roundUp(sz, (size_t) 16);
27
- }
21
+ if (sz == 0) {
22
+ return 0;
23
+ } else {
24
+ // ensure that we have at least 16 bytes, as all allocations are bumped
25
+ // up to 16
26
+ return utils::roundUp(sz, (size_t)16);
27
+ }
28
28
  }
29
29
 
30
30
  } // namespace
31
31
 
32
32
  StackDeviceMemory::Stack::Stack(GpuResources* res, int d, size_t sz)
33
- : res_(res),
34
- device_(d),
35
- alloc_(nullptr),
36
- allocSize_(adjustStackSize(sz)),
37
- start_(nullptr),
38
- end_(nullptr),
39
- head_(nullptr),
40
- highWaterMemoryUsed_(0) {
41
- if (allocSize_ == 0) {
42
- return;
43
- }
44
-
45
- DeviceScope s(device_);
46
- auto req = AllocRequest(AllocType::TemporaryMemoryBuffer,
47
- device_,
48
- MemorySpace::Device,
49
- res_->getDefaultStream(device_),
50
- allocSize_);
51
-
52
- alloc_ = (char*) res_->allocMemory(req);
53
- FAISS_ASSERT_FMT(
54
- alloc_,
55
- "could not reserve temporary memory region of size %zu", allocSize_);
56
-
57
- // In order to disambiguate between our entire region of temporary memory
58
- // versus the first allocation in the temporary memory region, ensure that the
59
- // first address returned is +16 bytes from the beginning
60
- start_ = alloc_ + 16;
61
- head_ = start_;
62
- end_ = alloc_ + allocSize_;
33
+ : res_(res),
34
+ device_(d),
35
+ alloc_(nullptr),
36
+ allocSize_(adjustStackSize(sz)),
37
+ start_(nullptr),
38
+ end_(nullptr),
39
+ head_(nullptr),
40
+ highWaterMemoryUsed_(0) {
41
+ if (allocSize_ == 0) {
42
+ return;
43
+ }
44
+
45
+ DeviceScope s(device_);
46
+ auto req = AllocRequest(
47
+ AllocType::TemporaryMemoryBuffer,
48
+ device_,
49
+ MemorySpace::Device,
50
+ res_->getDefaultStream(device_),
51
+ allocSize_);
52
+
53
+ alloc_ = (char*)res_->allocMemory(req);
54
+ FAISS_ASSERT_FMT(
55
+ alloc_,
56
+ "could not reserve temporary memory region of size %zu",
57
+ allocSize_);
58
+
59
+ // In order to disambiguate between our entire region of temporary memory
60
+ // versus the first allocation in the temporary memory region, ensure that
61
+ // the first address returned is +16 bytes from the beginning
62
+ start_ = alloc_ + 16;
63
+ head_ = start_;
64
+ end_ = alloc_ + allocSize_;
63
65
  }
64
66
 
65
67
  StackDeviceMemory::Stack::~Stack() {
66
- DeviceScope s(device_);
68
+ DeviceScope s(device_);
67
69
 
68
- // FIXME: make sure there are no outstanding memory allocations?
69
- if (alloc_) {
70
- res_->deallocMemory(device_, alloc_);
71
- }
70
+ // FIXME: make sure there are no outstanding memory allocations?
71
+ if (alloc_) {
72
+ res_->deallocMemory(device_, alloc_);
73
+ }
72
74
  }
73
75
 
74
- size_t
75
- StackDeviceMemory::Stack::getSizeAvailable() const {
76
- return (end_ - head_);
76
+ size_t StackDeviceMemory::Stack::getSizeAvailable() const {
77
+ return (end_ - head_);
77
78
  }
78
79
 
79
- char*
80
- StackDeviceMemory::Stack::getAlloc(size_t size,
81
- cudaStream_t stream) {
82
- // The user must check to see that the allocation fit within us
83
- auto sizeRemaining = getSizeAvailable();
80
+ char* StackDeviceMemory::Stack::getAlloc(size_t size, cudaStream_t stream) {
81
+ // The user must check to see that the allocation fit within us
82
+ auto sizeRemaining = getSizeAvailable();
84
83
 
85
- FAISS_ASSERT(size <= sizeRemaining);
84
+ FAISS_ASSERT(size <= sizeRemaining);
86
85
 
87
- // We can make the allocation out of our stack
88
- // Find all the ranges that we overlap that may have been
89
- // previously allocated; our allocation will be [head, endAlloc)
90
- char* startAlloc = head_;
91
- char* endAlloc = head_ + size;
86
+ // We can make the allocation out of our stack
87
+ // Find all the ranges that we overlap that may have been
88
+ // previously allocated; our allocation will be [head, endAlloc)
89
+ char* startAlloc = head_;
90
+ char* endAlloc = head_ + size;
92
91
 
93
- while (lastUsers_.size() > 0) {
94
- auto& prevUser = lastUsers_.back();
92
+ while (lastUsers_.size() > 0) {
93
+ auto& prevUser = lastUsers_.back();
95
94
 
96
- // Because there is a previous user, we must overlap it
97
- FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
95
+ // Because there is a previous user, we must overlap it
96
+ FAISS_ASSERT(
97
+ prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
98
98
 
99
- if (stream != prevUser.stream_) {
100
- // Synchronization required
101
- streamWait({stream}, {prevUser.stream_});
102
- }
99
+ if (stream != prevUser.stream_) {
100
+ // Synchronization required
101
+ streamWait({stream}, {prevUser.stream_});
102
+ }
103
103
 
104
- if (endAlloc < prevUser.end_) {
105
- // Update the previous user info
106
- prevUser.start_ = endAlloc;
104
+ if (endAlloc < prevUser.end_) {
105
+ // Update the previous user info
106
+ prevUser.start_ = endAlloc;
107
107
 
108
- break;
109
- }
108
+ break;
109
+ }
110
110
 
111
- // If we're the exact size of the previous request, then we
112
- // don't need to continue
113
- bool done = (prevUser.end_ == endAlloc);
111
+ // If we're the exact size of the previous request, then we
112
+ // don't need to continue
113
+ bool done = (prevUser.end_ == endAlloc);
114
114
 
115
- lastUsers_.pop_back();
115
+ lastUsers_.pop_back();
116
116
 
117
- if (done) {
118
- break;
117
+ if (done) {
118
+ break;
119
+ }
119
120
  }
120
- }
121
121
 
122
- head_ = endAlloc;
123
- FAISS_ASSERT(head_ <= end_);
122
+ head_ = endAlloc;
123
+ FAISS_ASSERT(head_ <= end_);
124
124
 
125
- highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
126
- (size_t) (head_ - start_));
127
- FAISS_ASSERT(startAlloc);
128
- return startAlloc;
125
+ highWaterMemoryUsed_ =
126
+ std::max(highWaterMemoryUsed_, (size_t)(head_ - start_));
127
+ FAISS_ASSERT(startAlloc);
128
+ return startAlloc;
129
129
  }
130
130
 
131
- void
132
- StackDeviceMemory::Stack::returnAlloc(char* p,
133
- size_t size,
134
- cudaStream_t stream) {
135
- // This allocation should be within ourselves
136
- FAISS_ASSERT(p >= start_ && p < end_);
131
+ void StackDeviceMemory::Stack::returnAlloc(
132
+ char* p,
133
+ size_t size,
134
+ cudaStream_t stream) {
135
+ // This allocation should be within ourselves
136
+ FAISS_ASSERT(p >= start_ && p < end_);
137
137
 
138
- // All allocations should have been adjusted to a multiple of 16 bytes
139
- FAISS_ASSERT(size % 16 == 0);
138
+ // All allocations should have been adjusted to a multiple of 16 bytes
139
+ FAISS_ASSERT(size % 16 == 0);
140
140
 
141
- // This is on our stack
142
- // Allocations should be freed in the reverse order they are made
143
- if (p + size != head_) {
144
- FAISS_ASSERT(p + size == head_);
145
- }
141
+ // This is on our stack
142
+ // Allocations should be freed in the reverse order they are made
143
+ if (p + size != head_) {
144
+ FAISS_ASSERT(p + size == head_);
145
+ }
146
146
 
147
- head_ = p;
148
- lastUsers_.push_back(Range(p, p + size, stream));
147
+ head_ = p;
148
+ lastUsers_.push_back(Range(p, p + size, stream));
149
149
  }
150
150
 
151
- std::string
152
- StackDeviceMemory::Stack::toString() const {
153
- std::stringstream s;
151
+ std::string StackDeviceMemory::Stack::toString() const {
152
+ std::stringstream s;
154
153
 
155
- s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
156
- << (void*) start_ << ", " << (void*) end_ << ")\n";
157
- s << " Available memory " << (size_t) (end_ - head_)
158
- << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
159
- s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
154
+ s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
155
+ << (void*)start_ << ", " << (void*)end_ << ")\n";
156
+ s << " Available memory " << (size_t)(end_ - head_) << " ["
157
+ << (void*)head_ << ", " << (void*)end_ << ")\n";
158
+ s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
160
159
 
161
- int i = lastUsers_.size();
162
- for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
163
- s << i-- << ": size " << (size_t) (it->end_ - it->start_)
164
- << " stream " << it->stream_
165
- << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
166
- }
160
+ int i = lastUsers_.size();
161
+ for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
162
+ s << i-- << ": size " << (size_t)(it->end_ - it->start_) << " stream "
163
+ << it->stream_ << " [" << (void*)it->start_ << ", " << (void*)it->end_
164
+ << ")\n";
165
+ }
167
166
 
168
- return s.str();
167
+ return s.str();
169
168
  }
170
169
 
171
- StackDeviceMemory::StackDeviceMemory(GpuResources* res,
172
- int device,
173
- size_t allocPerDevice)
174
- : device_(device),
175
- stack_(res, device, allocPerDevice) {
176
- }
170
+ StackDeviceMemory::StackDeviceMemory(
171
+ GpuResources* res,
172
+ int device,
173
+ size_t allocPerDevice)
174
+ : device_(device), stack_(res, device, allocPerDevice) {}
177
175
 
178
- StackDeviceMemory::~StackDeviceMemory() {
179
- }
176
+ StackDeviceMemory::~StackDeviceMemory() {}
180
177
 
181
- int
182
- StackDeviceMemory::getDevice() const {
183
- return device_;
178
+ int StackDeviceMemory::getDevice() const {
179
+ return device_;
184
180
  }
185
181
 
186
- size_t
187
- StackDeviceMemory::getSizeAvailable() const {
188
- return stack_.getSizeAvailable();
182
+ size_t StackDeviceMemory::getSizeAvailable() const {
183
+ return stack_.getSizeAvailable();
189
184
  }
190
185
 
191
- std::string
192
- StackDeviceMemory::toString() const {
193
- return stack_.toString();
186
+ std::string StackDeviceMemory::toString() const {
187
+ return stack_.toString();
194
188
  }
195
189
 
196
- void*
197
- StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
198
- // All allocations should have been adjusted to a multiple of 16 bytes
199
- FAISS_ASSERT(size % 16 == 0);
200
- return stack_.getAlloc(size, stream);
190
+ void* StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
191
+ // All allocations should have been adjusted to a multiple of 16 bytes
192
+ FAISS_ASSERT(size % 16 == 0);
193
+ return stack_.getAlloc(size, stream);
201
194
  }
202
195
 
203
- void
204
- StackDeviceMemory::deallocMemory(int device,
205
- cudaStream_t stream,
206
- size_t size,
207
- void* p) {
208
- FAISS_ASSERT(p);
209
- FAISS_ASSERT(device == device_);
196
+ void StackDeviceMemory::deallocMemory(
197
+ int device,
198
+ cudaStream_t stream,
199
+ size_t size,
200
+ void* p) {
201
+ FAISS_ASSERT(p);
202
+ FAISS_ASSERT(device == device_);
210
203
 
211
- stack_.returnAlloc((char*) p, size, stream);
204
+ stack_.returnAlloc((char*)p, size, stream);
212
205
  }
213
206
 
214
- } } // namespace
207
+ } // namespace gpu
208
+ } // namespace faiss
@@ -5,110 +5,108 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
- #include <faiss/gpu/GpuResources.h>
12
10
  #include <cuda_runtime.h>
11
+ #include <faiss/gpu/GpuResources.h>
13
12
  #include <list>
14
13
  #include <memory>
15
- #include <unordered_map>
16
14
  #include <tuple>
15
+ #include <unordered_map>
17
16
 
18
- namespace faiss { namespace gpu {
17
+ namespace faiss {
18
+ namespace gpu {
19
19
 
20
20
  /// Device memory manager that provides temporary memory allocations
21
21
  /// out of a region of memory, for a single device
22
22
  class StackDeviceMemory {
23
- public:
24
- /// Allocate a new region of memory that we manage
25
- StackDeviceMemory(GpuResources* res,
26
- int device,
27
- size_t allocPerDevice);
23
+ public:
24
+ /// Allocate a new region of memory that we manage
25
+ StackDeviceMemory(GpuResources* res, int device, size_t allocPerDevice);
28
26
 
29
- /// Manage a region of memory for a particular device, with or
30
- /// without ownership
31
- StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
27
+ /// Manage a region of memory for a particular device, with or
28
+ /// without ownership
29
+ StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
32
30
 
33
- ~StackDeviceMemory();
31
+ ~StackDeviceMemory();
34
32
 
35
- int getDevice() const;
33
+ int getDevice() const;
36
34
 
37
- /// All allocations requested should be a multiple of 16 bytes
38
- void* allocMemory(cudaStream_t stream, size_t size);
39
- void deallocMemory(int device, cudaStream_t, size_t size, void* p);
35
+ /// All allocations requested should be a multiple of 16 bytes
36
+ void* allocMemory(cudaStream_t stream, size_t size);
37
+ void deallocMemory(int device, cudaStream_t, size_t size, void* p);
40
38
 
41
- size_t getSizeAvailable() const;
42
- std::string toString() const;
39
+ size_t getSizeAvailable() const;
40
+ std::string toString() const;
43
41
 
44
- protected:
45
- /// Previous allocation ranges and the streams for which
46
- /// synchronization is required
47
- struct Range {
48
- inline Range(char* s, char* e, cudaStream_t str) :
49
- start_(s), end_(e), stream_(str) {
50
- }
42
+ protected:
43
+ /// Previous allocation ranges and the streams for which
44
+ /// synchronization is required
45
+ struct Range {
46
+ inline Range(char* s, char* e, cudaStream_t str)
47
+ : start_(s), end_(e), stream_(str) {}
51
48
 
52
- // References a memory range [start, end)
53
- char* start_;
54
- char* end_;
55
- cudaStream_t stream_;
56
- };
49
+ // References a memory range [start, end)
50
+ char* start_;
51
+ char* end_;
52
+ cudaStream_t stream_;
53
+ };
57
54
 
58
- struct Stack {
59
- /// Constructor that allocates memory via cudaMalloc
60
- Stack(GpuResources* res, int device, size_t size);
55
+ struct Stack {
56
+ /// Constructor that allocates memory via cudaMalloc
57
+ Stack(GpuResources* res, int device, size_t size);
61
58
 
62
- ~Stack();
59
+ ~Stack();
63
60
 
64
- /// Returns how much size is available for an allocation without
65
- /// calling cudaMalloc
66
- size_t getSizeAvailable() const;
61
+ /// Returns how much size is available for an allocation without
62
+ /// calling cudaMalloc
63
+ size_t getSizeAvailable() const;
67
64
 
68
- /// Obtains an allocation; all allocations are guaranteed to be 16
69
- /// byte aligned
70
- char* getAlloc(size_t size, cudaStream_t stream);
65
+ /// Obtains an allocation; all allocations are guaranteed to be 16
66
+ /// byte aligned
67
+ char* getAlloc(size_t size, cudaStream_t stream);
71
68
 
72
- /// Returns an allocation
73
- void returnAlloc(char* p, size_t size, cudaStream_t stream);
69
+ /// Returns an allocation
70
+ void returnAlloc(char* p, size_t size, cudaStream_t stream);
74
71
 
75
- /// Returns the stack state
76
- std::string toString() const;
72
+ /// Returns the stack state
73
+ std::string toString() const;
77
74
 
78
- /// Our GpuResources object
79
- GpuResources* res_;
75
+ /// Our GpuResources object
76
+ GpuResources* res_;
80
77
 
81
- /// Device this allocation is on
82
- int device_;
78
+ /// Device this allocation is on
79
+ int device_;
83
80
 
84
- /// Where our temporary memory buffer is allocated; we allocate starting 16
85
- /// bytes into this
86
- char* alloc_;
81
+ /// Where our temporary memory buffer is allocated; we allocate starting
82
+ /// 16 bytes into this
83
+ char* alloc_;
87
84
 
88
- /// Total size of our allocation
89
- size_t allocSize_;
85
+ /// Total size of our allocation
86
+ size_t allocSize_;
90
87
 
91
- /// Our temporary memory region; [start_, end_) is valid
92
- char* start_;
93
- char* end_;
88
+ /// Our temporary memory region; [start_, end_) is valid
89
+ char* start_;
90
+ char* end_;
94
91
 
95
- /// Stack head within [start, end)
96
- char* head_;
92
+ /// Stack head within [start, end)
93
+ char* head_;
97
94
 
98
- /// List of previous last users of allocations on our stack, for
99
- /// possible synchronization purposes
100
- std::list<Range> lastUsers_;
95
+ /// List of previous last users of allocations on our stack, for
96
+ /// possible synchronization purposes
97
+ std::list<Range> lastUsers_;
101
98
 
102
- /// What's the high water mark in terms of memory used from the
103
- /// temporary buffer?
104
- size_t highWaterMemoryUsed_;
105
- };
99
+ /// What's the high water mark in terms of memory used from the
100
+ /// temporary buffer?
101
+ size_t highWaterMemoryUsed_;
102
+ };
106
103
 
107
- /// Our device
108
- int device_;
104
+ /// Our device
105
+ int device_;
109
106
 
110
- /// Memory stack
111
- Stack stack_;
107
+ /// Memory stack
108
+ Stack stack_;
112
109
  };
113
110
 
114
- } } // namespace
111
+ } // namespace gpu
112
+ } // namespace faiss
@@ -5,7 +5,6 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
10
  #include <cuda.h>
@@ -16,43 +15,45 @@
16
15
  #define __device__
17
16
  #endif
18
17
 
19
- namespace faiss { namespace gpu { namespace utils {
18
+ namespace faiss {
19
+ namespace gpu {
20
+ namespace utils {
20
21
 
21
22
  template <typename U, typename V>
22
23
  constexpr __host__ __device__ auto divDown(U a, V b) -> decltype(a + b) {
23
- return (a / b);
24
+ return (a / b);
24
25
  }
25
26
 
26
27
  template <typename U, typename V>
27
28
  constexpr __host__ __device__ auto divUp(U a, V b) -> decltype(a + b) {
28
- return (a + b - 1) / b;
29
+ return (a + b - 1) / b;
29
30
  }
30
31
 
31
32
  template <typename U, typename V>
32
33
  constexpr __host__ __device__ auto roundDown(U a, V b) -> decltype(a + b) {
33
- return divDown(a, b) * b;
34
+ return divDown(a, b) * b;
34
35
  }
35
36
 
36
37
  template <typename U, typename V>
37
38
  constexpr __host__ __device__ auto roundUp(U a, V b) -> decltype(a + b) {
38
- return divUp(a, b) * b;
39
+ return divUp(a, b) * b;
39
40
  }
40
41
 
41
42
  template <class T>
42
43
  constexpr __host__ __device__ T pow(T n, T power) {
43
- return (power > 0 ? n * pow(n, power - 1) : 1);
44
+ return (power > 0 ? n * pow(n, power - 1) : 1);
44
45
  }
45
46
 
46
47
  template <class T>
47
48
  constexpr __host__ __device__ T pow2(T n) {
48
- return pow(2, (T) n);
49
+ return pow(2, (T)n);
49
50
  }
50
51
 
51
52
  static_assert(pow2(8) == 256, "pow2");
52
53
 
53
54
  template <typename T>
54
55
  constexpr __host__ __device__ int log2(T n, int p = 0) {
55
- return (n <= 1) ? p : log2(n / 2, p + 1);
56
+ return (n <= 1) ? p : log2(n / 2, p + 1);
56
57
  }
57
58
 
58
59
  static_assert(log2(2) == 1, "log2");
@@ -61,7 +62,7 @@ static_assert(log2(4) == 2, "log2");
61
62
 
62
63
  template <typename T>
63
64
  constexpr __host__ __device__ bool isPowerOf2(T v) {
64
- return (v && !(v & (v - 1)));
65
+ return (v && !(v & (v - 1)));
65
66
  }
66
67
 
67
68
  static_assert(isPowerOf2(2048), "isPowerOf2");
@@ -69,7 +70,7 @@ static_assert(!isPowerOf2(3333), "isPowerOf2");
69
70
 
70
71
  template <typename T>
71
72
  constexpr __host__ __device__ T nextHighestPowerOf2(T v) {
72
- return (isPowerOf2(v) ? (T) 2 * v : ((T) 1 << (log2(v) + 1)));
73
+ return (isPowerOf2(v) ? (T)2 * v : ((T)1 << (log2(v) + 1)));
73
74
  }
74
75
 
75
76
  static_assert(nextHighestPowerOf2(1) == 2, "nextHighestPowerOf2");
@@ -81,9 +82,13 @@ static_assert(nextHighestPowerOf2(15) == 16, "nextHighestPowerOf2");
81
82
  static_assert(nextHighestPowerOf2(16) == 32, "nextHighestPowerOf2");
82
83
  static_assert(nextHighestPowerOf2(17) == 32, "nextHighestPowerOf2");
83
84
 
84
- static_assert(nextHighestPowerOf2(1536000000u) == 2147483648u,
85
- "nextHighestPowerOf2");
86
- static_assert(nextHighestPowerOf2((size_t) 2147483648ULL) ==
87
- (size_t) 4294967296ULL, "nextHighestPowerOf2");
85
+ static_assert(
86
+ nextHighestPowerOf2(1536000000u) == 2147483648u,
87
+ "nextHighestPowerOf2");
88
+ static_assert(
89
+ nextHighestPowerOf2((size_t)2147483648ULL) == (size_t)4294967296ULL,
90
+ "nextHighestPowerOf2");
88
91
 
89
- } } } // namespace
92
+ } // namespace utils
93
+ } // namespace gpu
94
+ } // namespace faiss