faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
  6. data/vendor/faiss/c_api/AutoTune_c.h +2 -0
  7. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
  8. data/vendor/faiss/c_api/IndexShards_c.h +1 -4
  9. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
  10. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
  11. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
  12. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
  13. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
  14. data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
  15. data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
  16. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
  17. data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
  18. data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
  19. data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
  20. data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
  21. data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
  22. data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
  23. data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
  24. data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
  25. data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
  26. data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
  27. data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
  28. data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
  29. data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
  30. data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
  31. data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
  32. data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
  33. data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
  34. data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
  35. data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
  36. data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
  37. data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
  38. data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
  39. data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
  40. data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
  41. data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
  42. data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
  43. data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
  44. data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
  45. data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
  46. data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
  47. data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
  48. data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
  49. data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
  50. data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
  51. data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
  52. data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
  53. data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
  54. data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
  55. data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
  56. data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
  57. data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
  58. data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
  59. data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
  60. data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
  61. data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
  62. data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
  63. data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
  64. data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
  65. data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
  66. data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
  67. data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
  68. data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
  69. data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
  70. data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
  71. data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
  72. data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
  73. data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
  74. data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
  75. data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
  76. data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
  77. data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
  78. data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
  79. data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
  80. data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
  81. data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
  82. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
  83. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
  84. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
  85. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
  86. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
  87. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
  88. data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
  89. data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
  90. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
  91. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
  92. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
  93. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
  94. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
  95. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
  96. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
  97. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
  98. data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
  99. data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
  100. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
  101. data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
  102. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
  103. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
  104. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
  105. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
  106. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
  107. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
  108. data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
  109. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
  110. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
  111. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
  112. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
  113. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
  114. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
  115. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
  116. data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
  117. data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
  118. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
  119. data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
  120. data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
  121. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
  122. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
  123. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
  124. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
  125. data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
  126. data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
  127. data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
  128. data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
  129. data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
  130. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
  131. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
  132. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
  133. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
  134. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
  135. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
  136. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
  137. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
  138. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
  139. data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
  140. data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
  141. data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
  142. data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
  143. data/vendor/faiss/faiss/impl/io_macros.h +57 -0
  144. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
  145. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
  146. data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
  147. data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
  148. data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
  149. data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
  150. data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
  151. data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
  152. data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
  153. data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
  154. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
  155. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
  156. data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
  157. data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
  158. data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
  159. data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
  160. data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
  161. data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
  162. data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
  163. data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
  164. data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
  165. data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
  166. data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
  167. data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
  168. data/vendor/faiss/misc/test_blas.cpp +4 -1
  169. data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
  170. data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
  171. data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
  172. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
  173. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
  174. data/vendor/faiss/tests/test_merge.cpp +6 -3
  175. data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
  176. data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
  177. data/vendor/faiss/tests/test_params_override.cpp +7 -2
  178. data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
  179. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
  180. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
  181. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
  182. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
  183. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
  184. metadata +154 -153
  185. data/vendor/faiss/gpu/GpuResources.cpp +0 -52
  186. data/vendor/faiss/gpu/GpuResources.h +0 -73
  187. data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
  188. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
  189. data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
  190. data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
  191. data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
  192. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -24,6 +24,8 @@ inline float relativeError(float a, float b) {
24
24
  // This seed is also used for the faiss float_rand API; in a test it
25
25
  // is all within a single thread, so it is ok
26
26
  long s_seed = 1;
27
+ std::mt19937 rng(1);
28
+ std::uniform_int_distribution<> distrib;
27
29
 
28
30
  void newTestSeed() {
29
31
  struct timespec t;
@@ -35,7 +37,7 @@ void newTestSeed() {
35
37
  void setTestSeed(long seed) {
36
38
  printf("testing with random seed %ld\n", seed);
37
39
 
38
- srand48(seed);
40
+ rng = std::mt19937(seed);
39
41
  s_seed = seed;
40
42
  }
41
43
 
@@ -43,7 +45,7 @@ int randVal(int a, int b) {
43
45
  EXPECT_GE(a, 0);
44
46
  EXPECT_LE(a, b);
45
47
 
46
- return a + (lrand48() % (b + 1 - a));
48
+ return a + (distrib(rng) % (b + 1 - a));
47
49
  }
48
50
 
49
51
  bool randBool() {
@@ -5,12 +5,10 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // Copyright 2004-present Facebook. All Rights Reserved
9
-
10
-
11
8
  #include <cmath>
12
9
  #include <cstdio>
13
10
  #include <cstdlib>
11
+ #include <random>
14
12
 
15
13
  #include <sys/time.h>
16
14
 
@@ -64,13 +62,16 @@ int main ()
64
62
  faiss::gpu::GpuIndexIVFPQ index (
65
63
  &resources, d, ncentroids, 4, 8, faiss::METRIC_L2, config);
66
64
 
65
+ std::mt19937 rng;
66
+
67
67
  { // training
68
68
  printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
69
69
  elapsed() - t0, nt, d);
70
70
 
71
71
  std::vector <float> trainvecs (nt * d);
72
+ std::uniform_real_distribution<> distrib;
72
73
  for (size_t i = 0; i < nt * d; i++) {
73
- trainvecs[i] = drand48();
74
+ trainvecs[i] = distrib(rng);
74
75
  }
75
76
 
76
77
  printf ("[%.3f s] Training the index\n",
@@ -100,8 +101,9 @@ int main ()
100
101
  elapsed() - t0, nb);
101
102
 
102
103
  std::vector <float> database (nb * d);
104
+ std::uniform_real_distribution<> distrib;
103
105
  for (size_t i = 0; i < nb * d; i++) {
104
- database[i] = drand48();
106
+ database[i] = distrib(rng);
105
107
  }
106
108
 
107
109
  printf ("[%.3f s] Adding the vectors to the index\n",
@@ -102,7 +102,7 @@ class CublasHandleScope {
102
102
  class CudaEvent {
103
103
  public:
104
104
  /// Creates an event and records it in this stream
105
- explicit CudaEvent(cudaStream_t stream);
105
+ explicit CudaEvent(cudaStream_t stream, bool timer = false);
106
106
  CudaEvent(const CudaEvent& event) = delete;
107
107
  CudaEvent(CudaEvent&& event) noexcept;
108
108
  ~CudaEvent();
@@ -0,0 +1,213 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
11
+ #include <faiss/gpu/utils/StaticUtils.h>
12
+ #include <faiss/impl/FaissAssert.h>
13
+ #include <sstream>
14
+
15
+ namespace faiss { namespace gpu {
16
+
17
+ namespace {
18
+
19
+ size_t adjustStackSize(size_t sz) {
20
+ if (sz == 0) {
21
+ return 0;
22
+ } else {
23
+ // ensure that we have at least 16 bytes, as all allocations are bumped up
24
+ // to 16
25
+ return utils::roundUp(sz, (size_t) 16);
26
+ }
27
+ }
28
+
29
+ } // namespace
30
+
31
+ StackDeviceMemory::Stack::Stack(GpuResources* res, int d, size_t sz)
32
+ : res_(res),
33
+ device_(d),
34
+ alloc_(nullptr),
35
+ allocSize_(adjustStackSize(sz)),
36
+ start_(nullptr),
37
+ end_(nullptr),
38
+ head_(nullptr),
39
+ highWaterMemoryUsed_(0) {
40
+ if (allocSize_ == 0) {
41
+ return;
42
+ }
43
+
44
+ DeviceScope s(device_);
45
+ auto req = AllocRequest(AllocType::TemporaryMemoryBuffer,
46
+ device_,
47
+ MemorySpace::Device,
48
+ res_->getDefaultStream(device_),
49
+ allocSize_);
50
+
51
+ alloc_ = (char*) res_->allocMemory(req);
52
+ FAISS_ASSERT_FMT(
53
+ alloc_,
54
+ "could not reserve temporary memory region of size %zu", allocSize_);
55
+
56
+ // In order to disambiguate between our entire region of temporary memory
57
+ // versus the first allocation in the temporary memory region, ensure that the
58
+ // first address returned is +16 bytes from the beginning
59
+ start_ = alloc_ + 16;
60
+ head_ = start_;
61
+ end_ = alloc_ + allocSize_;
62
+ }
63
+
64
+ StackDeviceMemory::Stack::~Stack() {
65
+ DeviceScope s(device_);
66
+
67
+ // FIXME: make sure there are no outstanding memory allocations?
68
+ if (alloc_) {
69
+ res_->deallocMemory(device_, alloc_);
70
+ }
71
+ }
72
+
73
+ size_t
74
+ StackDeviceMemory::Stack::getSizeAvailable() const {
75
+ return (end_ - head_);
76
+ }
77
+
78
+ char*
79
+ StackDeviceMemory::Stack::getAlloc(size_t size,
80
+ cudaStream_t stream) {
81
+ // The user must check to see that the allocation fit within us
82
+ auto sizeRemaining = getSizeAvailable();
83
+
84
+ FAISS_ASSERT(size <= sizeRemaining);
85
+
86
+ // We can make the allocation out of our stack
87
+ // Find all the ranges that we overlap that may have been
88
+ // previously allocated; our allocation will be [head, endAlloc)
89
+ char* startAlloc = head_;
90
+ char* endAlloc = head_ + size;
91
+
92
+ while (lastUsers_.size() > 0) {
93
+ auto& prevUser = lastUsers_.back();
94
+
95
+ // Because there is a previous user, we must overlap it
96
+ FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
97
+
98
+ if (stream != prevUser.stream_) {
99
+ // Synchronization required
100
+ streamWait({stream}, {prevUser.stream_});
101
+ }
102
+
103
+ if (endAlloc < prevUser.end_) {
104
+ // Update the previous user info
105
+ prevUser.start_ = endAlloc;
106
+
107
+ break;
108
+ }
109
+
110
+ // If we're the exact size of the previous request, then we
111
+ // don't need to continue
112
+ bool done = (prevUser.end_ == endAlloc);
113
+
114
+ lastUsers_.pop_back();
115
+
116
+ if (done) {
117
+ break;
118
+ }
119
+ }
120
+
121
+ head_ = endAlloc;
122
+ FAISS_ASSERT(head_ <= end_);
123
+
124
+ highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
125
+ (size_t) (head_ - start_));
126
+ FAISS_ASSERT(startAlloc);
127
+ return startAlloc;
128
+ }
129
+
130
+ void
131
+ StackDeviceMemory::Stack::returnAlloc(char* p,
132
+ size_t size,
133
+ cudaStream_t stream) {
134
+ // This allocation should be within ourselves
135
+ FAISS_ASSERT(p >= start_ && p < end_);
136
+
137
+ // All allocations should have been adjusted to a multiple of 16 bytes
138
+ FAISS_ASSERT(size % 16 == 0);
139
+
140
+ // This is on our stack
141
+ // Allocations should be freed in the reverse order they are made
142
+ if (p + size != head_) {
143
+ FAISS_ASSERT(p + size == head_);
144
+ }
145
+
146
+ head_ = p;
147
+ lastUsers_.push_back(Range(p, p + size, stream));
148
+ }
149
+
150
+ std::string
151
+ StackDeviceMemory::Stack::toString() const {
152
+ std::stringstream s;
153
+
154
+ s << "SDM device " << device_ << ": Total memory " << allocSize_ << " ["
155
+ << (void*) start_ << ", " << (void*) end_ << ")\n";
156
+ s << " Available memory " << (size_t) (end_ - head_)
157
+ << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
158
+ s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
159
+
160
+ int i = lastUsers_.size();
161
+ for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
162
+ s << i-- << ": size " << (size_t) (it->end_ - it->start_)
163
+ << " stream " << it->stream_
164
+ << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
165
+ }
166
+
167
+ return s.str();
168
+ }
169
+
170
+ StackDeviceMemory::StackDeviceMemory(GpuResources* res,
171
+ int device,
172
+ size_t allocPerDevice)
173
+ : device_(device),
174
+ stack_(res, device, allocPerDevice) {
175
+ }
176
+
177
+ StackDeviceMemory::~StackDeviceMemory() {
178
+ }
179
+
180
+ int
181
+ StackDeviceMemory::getDevice() const {
182
+ return device_;
183
+ }
184
+
185
+ size_t
186
+ StackDeviceMemory::getSizeAvailable() const {
187
+ return stack_.getSizeAvailable();
188
+ }
189
+
190
+ std::string
191
+ StackDeviceMemory::toString() const {
192
+ return stack_.toString();
193
+ }
194
+
195
+ void*
196
+ StackDeviceMemory::allocMemory(cudaStream_t stream, size_t size) {
197
+ // All allocations should have been adjusted to a multiple of 16 bytes
198
+ FAISS_ASSERT(size % 16 == 0);
199
+ return stack_.getAlloc(size, stream);
200
+ }
201
+
202
+ void
203
+ StackDeviceMemory::deallocMemory(int device,
204
+ cudaStream_t stream,
205
+ size_t size,
206
+ void* p) {
207
+ FAISS_ASSERT(p);
208
+ FAISS_ASSERT(device == device_);
209
+
210
+ stack_.returnAlloc((char*) p, size, stream);
211
+ }
212
+
213
+ } } // namespace
@@ -8,41 +8,38 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include <faiss/gpu/utils/DeviceMemory.h>
11
+ #include <faiss/gpu/GpuResources.h>
12
+ #include <cuda_runtime.h>
12
13
  #include <list>
13
14
  #include <memory>
14
15
  #include <unordered_map>
16
+ #include <tuple>
15
17
 
16
18
  namespace faiss { namespace gpu {
17
19
 
18
20
  /// Device memory manager that provides temporary memory allocations
19
- /// out of a region of memory
20
- class StackDeviceMemory : public DeviceMemory {
21
+ /// out of a region of memory, for a single device
22
+ class StackDeviceMemory {
21
23
  public:
22
24
  /// Allocate a new region of memory that we manage
23
- explicit StackDeviceMemory(int device, size_t allocPerDevice);
25
+ StackDeviceMemory(GpuResources* res,
26
+ int device,
27
+ size_t allocPerDevice);
24
28
 
25
29
  /// Manage a region of memory for a particular device, with or
26
30
  /// without ownership
27
31
  StackDeviceMemory(int device, void* p, size_t size, bool isOwner);
28
32
 
29
- ~StackDeviceMemory() override;
33
+ ~StackDeviceMemory();
30
34
 
31
- /// Enable or disable the warning about not having enough temporary memory
32
- /// when cudaMalloc gets called
33
- void setCudaMallocWarning(bool b);
35
+ int getDevice() const;
34
36
 
35
- int getDevice() const override;
37
+ /// All allocations requested should be a multiple of 16 bytes
38
+ void* allocMemory(cudaStream_t stream, size_t size);
39
+ void deallocMemory(int device, cudaStream_t, size_t size, void* p);
36
40
 
37
- DeviceMemoryReservation getMemory(cudaStream_t stream,
38
- size_t size) override;
39
-
40
- size_t getSizeAvailable() const override;
41
- std::string toString() const override;
42
- size_t getHighWaterCudaMalloc() const override;
43
-
44
- protected:
45
- void returnAllocation(DeviceMemoryReservation& m) override;
41
+ size_t getSizeAvailable() const;
42
+ std::string toString() const;
46
43
 
47
44
  protected:
48
45
  /// Previous allocation ranges and the streams for which
@@ -60,10 +57,8 @@ class StackDeviceMemory : public DeviceMemory {
60
57
 
61
58
  struct Stack {
62
59
  /// Constructor that allocates memory via cudaMalloc
63
- Stack(int device, size_t size);
60
+ Stack(GpuResources* res, int device, size_t size);
64
61
 
65
- /// Constructor that references a pre-allocated region of memory
66
- Stack(int device, void* p, size_t size, bool isOwner);
67
62
  ~Stack();
68
63
 
69
64
  /// Returns how much size is available for an allocation without
@@ -80,23 +75,23 @@ class StackDeviceMemory : public DeviceMemory {
80
75
  /// Returns the stack state
81
76
  std::string toString() const;
82
77
 
83
- /// Returns the high-water mark of cudaMalloc activity
84
- size_t getHighWaterCudaMalloc() const;
78
+ /// Our GpuResources object
79
+ GpuResources* res_;
85
80
 
86
81
  /// Device this allocation is on
87
82
  int device_;
88
83
 
89
- /// Do we own our region of memory?
90
- bool isOwner_;
84
+ /// Where our temporary memory buffer is allocated; we allocate starting 16
85
+ /// bytes into this
86
+ char* alloc_;
87
+
88
+ /// Total size of our allocation
89
+ size_t allocSize_;
91
90
 
92
- /// Where our allocation begins and ends
93
- /// [start_, end_) is valid
91
+ /// Our temporary memory region; [start_, end_) is valid
94
92
  char* start_;
95
93
  char* end_;
96
94
 
97
- /// Total size end_ - start_
98
- size_t size_;
99
-
100
95
  /// Stack head within [start, end)
101
96
  char* head_;
102
97
 
@@ -104,19 +99,9 @@ class StackDeviceMemory : public DeviceMemory {
104
99
  /// possible synchronization purposes
105
100
  std::list<Range> lastUsers_;
106
101
 
107
- /// How much cudaMalloc memory is currently outstanding?
108
- size_t mallocCurrent_;
109
-
110
102
  /// What's the high water mark in terms of memory used from the
111
103
  /// temporary buffer?
112
104
  size_t highWaterMemoryUsed_;
113
-
114
- /// What's the high water mark in terms of memory allocated via
115
- /// cudaMalloc?
116
- size_t highWaterMalloc_;
117
-
118
- /// Whether or not a warning upon cudaMalloc is generated
119
- bool cudaMallocWarning_;
120
105
  };
121
106
 
122
107
  /// Our device
@@ -7,6 +7,7 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
+ #include <algorithm>
10
11
  #include <cstring>
11
12
 
12
13
  #include <faiss/impl/AuxIndexStructures.h>
@@ -21,6 +21,8 @@
21
21
  #include <mutex>
22
22
 
23
23
  #include <faiss/Index.h>
24
+ #include <faiss/impl/platform_macros.h>
25
+
24
26
 
25
27
  namespace faiss {
26
28
 
@@ -218,7 +220,7 @@ struct DistanceComputer {
218
220
  * Interrupt callback
219
221
  ***********************************************************/
220
222
 
221
- struct InterruptCallback {
223
+ struct FAISS_API InterruptCallback {
222
224
  virtual bool want_interrupt () = 0;
223
225
  virtual ~InterruptCallback() {}
224
226