faiss 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
  6. data/vendor/faiss/c_api/AutoTune_c.h +2 -0
  7. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
  8. data/vendor/faiss/c_api/IndexShards_c.h +1 -4
  9. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
  10. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
  11. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
  12. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
  13. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
  14. data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
  15. data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
  16. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
  17. data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
  18. data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
  19. data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
  20. data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
  21. data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
  22. data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
  23. data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
  24. data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
  25. data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
  26. data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
  27. data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
  28. data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
  29. data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
  30. data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
  31. data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
  32. data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
  33. data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
  34. data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
  35. data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
  36. data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
  37. data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
  38. data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
  39. data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
  40. data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
  41. data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
  42. data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
  43. data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
  44. data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
  45. data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
  46. data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
  47. data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
  48. data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
  49. data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
  50. data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
  51. data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
  52. data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
  53. data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
  54. data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
  55. data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
  56. data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
  57. data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
  58. data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
  59. data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
  60. data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
  61. data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
  62. data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
  63. data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
  64. data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
  65. data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
  66. data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
  67. data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
  68. data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
  69. data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
  70. data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
  71. data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
  72. data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
  73. data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
  74. data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
  75. data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
  76. data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
  77. data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
  78. data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
  79. data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
  80. data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
  81. data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
  82. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
  83. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
  84. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
  85. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
  86. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
  87. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
  88. data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
  89. data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
  90. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
  91. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
  92. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
  93. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
  94. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
  95. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
  96. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
  97. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
  98. data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
  99. data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
  100. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
  101. data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
  102. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
  103. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
  104. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
  105. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
  106. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
  107. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
  108. data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
  109. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
  110. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
  111. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
  112. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
  113. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
  114. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
  115. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
  116. data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
  117. data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
  118. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
  119. data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
  120. data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
  121. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
  122. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
  123. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
  124. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
  125. data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
  126. data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
  127. data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
  128. data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
  129. data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
  130. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
  131. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
  132. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
  133. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
  134. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
  135. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
  136. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
  137. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
  138. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
  139. data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
  140. data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
  141. data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
  142. data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
  143. data/vendor/faiss/faiss/impl/io_macros.h +57 -0
  144. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
  145. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
  146. data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
  147. data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
  148. data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
  149. data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
  150. data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
  151. data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
  152. data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
  153. data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
  154. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
  155. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
  156. data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
  157. data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
  158. data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
  159. data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
  160. data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
  161. data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
  162. data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
  163. data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
  164. data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
  165. data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
  166. data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
  167. data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
  168. data/vendor/faiss/misc/test_blas.cpp +4 -1
  169. data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
  170. data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
  171. data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
  172. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
  173. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
  174. data/vendor/faiss/tests/test_merge.cpp +6 -3
  175. data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
  176. data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
  177. data/vendor/faiss/tests/test_params_override.cpp +7 -2
  178. data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
  179. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
  180. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
  181. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
  182. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
  183. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
  184. metadata +154 -153
  185. data/vendor/faiss/gpu/GpuResources.cpp +0 -52
  186. data/vendor/faiss/gpu/GpuResources.h +0 -73
  187. data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
  188. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
  189. data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
  190. data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
  191. data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
  192. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -24,6 +24,8 @@ inline float relativeError(float a, float b) {
24
24
  // This seed is also used for the faiss float_rand API; in a test it
25
25
  // is all within a single thread, so it is ok
26
26
  long s_seed = 1;
27
+ std::mt19937 rng(1);
28
+ std::uniform_int_distribution<> distrib;
27
29
 
28
30
  void newTestSeed() {
29
31
  struct timespec t;
@@ -35,7 +37,7 @@ void newTestSeed() {
35
37
  void setTestSeed(long seed) {
36
38
  printf("testing with random seed %ld\n", seed);
37
39
 
38
- srand48(seed);
40
+ rng = std::mt19937(seed);
39
41
  s_seed = seed;
40
42
  }
41
43
 
@@ -43,7 +45,7 @@ int randVal(int a, int b) {
43
45
  EXPECT_GE(a, 0);
44
46
  EXPECT_LE(a, b);
45
47
 
46
- return a + (lrand48() % (b + 1 - a));
48
+ return a + (distrib(rng) % (b + 1 - a));
47
49
  }
48
50
 
49
51
  bool randBool() {
@@ -5,12 +5,10 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // Copyright 2004-present Facebook. All Rights Reserved
9
-
10
-
11
8
  #include <cmath>
12
9
  #include <cstdio>
13
10
  #include <cstdlib>
11
+ #include <random>
14
12
 
15
13
  #include <sys/time.h>
16
14
 
@@ -64,13 +62,16 @@ int main ()
64
62
  faiss::gpu::GpuIndexIVFPQ index (
65
63
  &resources, d, ncentroids, 4, 8, faiss::METRIC_L2, config);
66
64
 
65
+ std::mt19937 rng;
66
+
67
67
  { // training
68
68
  printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
69
69
  elapsed() - t0, nt, d);
70
70
 
71
71
  std::vector <float> trainvecs (nt * d);
72
+ std::uniform_real_distribution<> distrib;
72
73
  for (size_t i = 0; i < nt * d; i++) {
73
- trainvecs[i] = drand48();
74
+ trainvecs[i] = distrib(rng);
74
75
  }
75
76
 
76
77
  printf ("[%.3f s] Training the index\n",
@@ -100,8 +101,9 @@ int main ()
100
101
  elapsed() - t0, nb);
101
102
 
102
103
  std::vector <float> database (nb * d);
104
+ std::uniform_real_distribution<> distrib;
103
105
  for (size_t i = 0; i < nb * d; i++) {
104
- database[i] = drand48();
106
+ database[i] = distrib(rng);
105
107
  }
106
108
 
107
109
  printf ("[%.3f s] Adding the vectors to the index\n",
@@ -102,7 +102,7 @@ class CublasHandleScope {
102
102
  class CudaEvent {
103
103
  public:
104
104
  /// Creates an event and records it in this stream
105
- explicit CudaEvent(cudaStream_t stream);
105
+ explicit CudaEvent(cudaStream_t stream, bool timer = false);
106
106
  CudaEvent(const CudaEvent& event) = delete;
107
107
  CudaEvent(CudaEvent&& event) noexcept;
108
108
  ~CudaEvent();
@@ -0,0 +1,213 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
11
+ #include <faiss/gpu/utils/StaticUtils.h>
12
+ #include <faiss/impl/FaissAssert.h>
13
+ #include <sstream>
14
+
15
+ namespace faiss { namespace gpu {
16
+
17
+ namespace {
18
+
19
+ size_t adjustStackSize(size_t sz) {
20
+ if (sz == 0) {
21
+ return 0;
22
+ } else {
23
+ // ensure that we have at least 16 bytes, as all allocations are bumped up
24
+ // to 16
25
+ return utils::roundUp(sz, (size_t) 16);
26
+ }
27
+ }
28
+
29
+ } // namespace
30
+
31
+ StackDeviceMemory::Stack::Stack(GpuResources* res, int d, size_t sz)
32
+ : res_(res),
33
+ device_(d),
34
+ alloc_(nullptr),
35
+ allocSize_(adjustStackSize(sz)),
36
+ start_(nullptr),
37
+ end_(nullptr),
38
+ head_(nullptr),
39
+ highWaterMemoryUsed_(0) {
40
+ if (allocSize_ == 0) {
41
+ return;
42
+ }
43
+
44
+ DeviceScope s(device_);
45
+ auto req = AllocRequest(AllocType::TemporaryMemoryBuffer,
46
+ device_,
47
+ MemorySpace::Device,
48
+ res_->getDefaultStream(device_),
49
+ allocSize_);
50
+
51
+ alloc_ = (char*) res_->allocMemory(req);
52
+ FAISS_ASSERT_FMT(
53
+ alloc_,
54
+ "could not reserve temporary memory region of size %zu", allocSize_);
55
+
56
+ // In order to disambiguate between our entire region of temporary memory
57
+ // versus the first allocation in the temporary memory region, ensure that the
58
+ // first address returned is +16 bytes from the beginning
59
+ start_ = alloc_ + 16;
60
+ head_ = start_;
61
+ end_ = alloc_ + allocSize_;
62
+ }
63
+
64
+ StackDeviceMemory::Stack::~Stack() {
65
+ DeviceScope s(device_);
66
+
67
+ // FIXME: make sure there are no outstanding memory allocations?
68
+ if (alloc_) {
69
+ res_->deallocMemory(device_, alloc_);
70
+ }
71
+ }
72
+
73
+ size_t
74
+ StackDeviceMemory::Stack::getSizeAvailable() const {
75
+ return (end_ - head_);
76
+ }
77
+
78
+ char*
79
+ StackDeviceMemory::Stack::getAlloc(size_t size,
80
+ cudaStream_t stream) {
81
+ // The user must check to see that the allocation fit within us
82
+ auto sizeRemaining = getSizeAvailable();
83
+
84
+ FAISS_ASSERT(size <= sizeRemaining);
85
+
86
+ // We can make the allocation out of our stack
87
+ // Find all the ranges that we overlap that may have been
88
+ // previously allocated; our allocation will be [head, endAlloc)
89
+ char* startAlloc = head_;
90
+ char* endAlloc = head_ + size;
91
+
92
+ while (lastUsers_.size() > 0) {
93
+ auto& prevUser = lastUsers_.back();
94
+
95
+ // Because there is a previous user, we must overlap it
96
+ FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
97
+
98
+ if (stream != prevUser.stream_) {
99
+ // Synchronization required
100
+ streamWait({stream}, {prevUser.stream_});
101
+ }
102
+
103
+ if (endAlloc < prevUser.end_) {
104
+ // Update the previous user info
105
+ prevUser.start_ = endAlloc;
106
+
107
+ break;
108
+ }
109
+
110
+ // If we're the exact size of the previous request, then we
111
+ // don't need to continue
112
+ bool done = (prevUser.end_ == endAlloc);
113
+
114
+ lastUsers_.pop_back();
115
+
116
+ if (done) {
117
+ break;
118
+ }
119
+ }
120
+
121
+ head_ = endAlloc;
122
+ FAISS_ASSERT(head_ <= end_);
123
+
124
+ highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
125
+ (size_t) (head_ - start_));
126
+ FAISS_ASSERT(startAlloc);
127
+ return startAlloc;
128
+ }
129
+
130
+ void
131
+ StackDeviceMemory::Stack::returnAlloc(char* p,
132
+ size_t size,
133
+ cudaStream_t stream) {
134
+ // This allocation should be within ourselves
135
+ FAISS_ASSERT(p >= start_ && p < end_);
136
+
137
+ // All allocations should have been adjusted to a multiple of 16 bytes
138
+ FAISS_ASSERT(size % 16 == 0);
139
+
140
+ // This is on our stack
141
+ // Allocations should be freed in the reverse order they are made
142
+ if (p + size != head_) {
143
+ FAISS_ASSERT(p + size == head_);
144
+ }
145
+
146
+ head_ = p;
147
+ lastUsers_.push_back(Range(p, p + size, stream));
148
+ }
149
+
150
+ std::string
151
+ StackDeviceMemory::Stack::toString() const {
152
+ std::stringstream s;
153
+
154
+ s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
155
+ << (void*) start_ << ", " << (void*) end_ << ")\n";
156
+ s << " Available memory " << (size_t) (end_ - head_)
157
+ << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
158
+ s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
159
+
160
+ int i = lastUsers_.size();
161
+ for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
162
+ s << i-- << ": size " << (size_t) (it->end_ - it->start_)
163
+ << " stream " << it->stream_
164
+ << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
165
+ }
166
+
167
+ return s.str();
168
+ }
169
+
170
+ StackDeviceMemory::StackDeviceMemory(GpuResources* res,
171
+ int device,
172
+ size_t allocPerDevice)
173
+ : device_(device),
174
+ stack_(res, device, allocPerDevice) {
175
+ }
176
+
177
+ StackDeviceMemory::~StackDeviceMemory() {
178
+ }
179
+
180
+ int
181
+ StackDeviceMemory::getDevice() const {
182
+ return device_;
183
+ }
184
+
185
+ size_t
186
+ StackDeviceMemory::getSizeAvailable() const {
187
+ return stack_.getSizeAvailable();
188
+ }
189
+
190
+ std::string
191
+ StackDeviceMemory::toString() const {
192
+ return stack_.toString();
193
+ }
194
+
195
+ void*
196
+ StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
197
+ // All allocations should have been adjusted to a multiple of 16 bytes
198
+ FAISS_ASSERT(size % 16 == 0);
199
+ return stack_.getAlloc(size, stream);
200
+ }
201
+
202
+ void
203
+ StackDeviceMemory::deallocMemory(int device,
204
+ cudaStream_t stream,
205
+ size_t size,
206
+ void* p) {
207
+ FAISS_ASSERT(p);
208
+ FAISS_ASSERT(device == device_);
209
+
210
+ stack_.returnAlloc((char*) p, size, stream);
211
+ }
212
+
213
+ } } // namespace
@@ -8,41 +8,38 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include <faiss/gpu/utils/DeviceMemory.h>
11
+ #include <faiss/gpu/GpuResources.h>
12
+ #include <cuda_runtime.h>
12
13
  #include <list>
13
14
  #include <memory>
14
15
  #include <unordered_map>
16
+ #include <tuple>
15
17
 
16
18
  namespace faiss { namespace gpu {
17
19
 
18
20
  /// Device memory manager that provides temporary memory allocations
19
- /// out of a region of memory
20
- class StackDeviceMemory : public DeviceMemory {
21
+ /// out of a region of memory, for a single device
22
+ class StackDeviceMemory {
21
23
  public:
22
24
  /// Allocate a new region of memory that we manage
23
- explicit StackDeviceMemory(int device, size_t allocPerDevice);
25
+ StackDeviceMemory(GpuResources* res,
26
+ int device,
27
+ size_t allocPerDevice);
24
28
 
25
29
  /// Manage a region of memory for a particular device, with or
26
30
  /// without ownership
27
31
  StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
28
32
 
29
- ~StackDeviceMemory() override;
33
+ ~StackDeviceMemory();
30
34
 
31
- /// Enable or disable the warning about not having enough temporary memory
32
- /// when cudaMalloc gets called
33
- void setCudaMallocWarning(bool b);
35
+ int getDevice() const;
34
36
 
35
- int getDevice() const override;
37
+ /// All allocations requested should be a multiple of 16 bytes
38
+ void* allocMemory(cudaStream_t stream, size_t size);
39
+ void deallocMemory(int device, cudaStream_t, size_t size, void* p);
36
40
 
37
- DeviceMemoryReservation getMemory(cudaStream_t stream,
38
- size_t size) override;
39
-
40
- size_t getSizeAvailable() const override;
41
- std::string toString() const override;
42
- size_t getHighWaterCudaMalloc() const override;
43
-
44
- protected:
45
- void returnAllocation(DeviceMemoryReservation& m) override;
41
+ size_t getSizeAvailable() const;
42
+ std::string toString() const;
46
43
 
47
44
  protected:
48
45
  /// Previous allocation ranges and the streams for which
@@ -60,10 +57,8 @@ class StackDeviceMemory : public DeviceMemory {
60
57
 
61
58
  struct Stack {
62
59
  /// Constructor that allocates memory via cudaMalloc
63
- Stack(int device, size_t size);
60
+ Stack(GpuResources* res, int device, size_t size);
64
61
 
65
- /// Constructor that references a pre-allocated region of memory
66
- Stack(int device, void* p, size_t size, bool isOwner);
67
62
  ~Stack();
68
63
 
69
64
  /// Returns how much size is available for an allocation without
@@ -80,23 +75,23 @@ class StackDeviceMemory : public DeviceMemory {
80
75
  /// Returns the stack state
81
76
  std::string toString() const;
82
77
 
83
- /// Returns the high-water mark of cudaMalloc activity
84
- size_t getHighWaterCudaMalloc() const;
78
+ /// Our GpuResources object
79
+ GpuResources* res_;
85
80
 
86
81
  /// Device this allocation is on
87
82
  int device_;
88
83
 
89
- /// Do we own our region of memory?
90
- bool isOwner_;
84
+ /// Where our temporary memory buffer is allocated; we allocate starting 16
85
+ /// bytes into this
86
+ char* alloc_;
87
+
88
+ /// Total size of our allocation
89
+ size_t allocSize_;
91
90
 
92
- /// Where our allocation begins and ends
93
- /// [start_, end_) is valid
91
+ /// Our temporary memory region; [start_, end_) is valid
94
92
  char* start_;
95
93
  char* end_;
96
94
 
97
- /// Total size end_ - start_
98
- size_t size_;
99
-
100
95
  /// Stack head within [start, end)
101
96
  char* head_;
102
97
 
@@ -104,19 +99,9 @@ class StackDeviceMemory : public DeviceMemory {
104
99
  /// possible synchronization purposes
105
100
  std::list<Range> lastUsers_;
106
101
 
107
- /// How much cudaMalloc memory is currently outstanding?
108
- size_t mallocCurrent_;
109
-
110
102
  /// What's the high water mark in terms of memory used from the
111
103
  /// temporary buffer?
112
104
  size_t highWaterMemoryUsed_;
113
-
114
- /// What's the high water mark in terms of memory allocated via
115
- /// cudaMalloc?
116
- size_t highWaterMalloc_;
117
-
118
- /// Whether or not a warning upon cudaMalloc is generated
119
- bool cudaMallocWarning_;
120
105
  };
121
106
 
122
107
  /// Our device
@@ -7,6 +7,7 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
+ #include <algorithm>
10
11
  #include <cstring>
11
12
 
12
13
  #include <faiss/impl/AuxIndexStructures.h>
@@ -21,6 +21,8 @@
21
21
  #include <mutex>
22
22
 
23
23
  #include <faiss/Index.h>
24
+ #include <faiss/impl/platform_macros.h>
25
+
24
26
 
25
27
  namespace faiss {
26
28
 
@@ -218,7 +220,7 @@ struct DistanceComputer {
218
220
  * Interrupt callback
219
221
  ***********************************************************/
220
222
 
221
- struct InterruptCallback {
223
+ struct FAISS_API InterruptCallback {
222
224
  virtual bool want_interrupt () = 0;
223
225
  virtual ~InterruptCallback() {}
224
226