faiss 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
  6. data/vendor/faiss/c_api/AutoTune_c.h +2 -0
  7. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
  8. data/vendor/faiss/c_api/IndexShards_c.h +1 -4
  9. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
  10. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
  11. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
  12. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
  13. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
  14. data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
  15. data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
  16. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
  17. data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
  18. data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
  19. data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
  20. data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
  21. data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
  22. data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
  23. data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
  24. data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
  25. data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
  26. data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
  27. data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
  28. data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
  29. data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
  30. data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
  31. data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
  32. data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
  33. data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
  34. data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
  35. data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
  36. data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
  37. data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
  38. data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
  39. data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
  40. data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
  41. data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
  42. data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
  43. data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
  44. data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
  45. data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
  46. data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
  47. data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
  48. data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
  49. data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
  50. data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
  51. data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
  52. data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
  53. data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
  54. data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
  55. data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
  56. data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
  57. data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
  58. data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
  59. data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
  60. data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
  61. data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
  62. data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
  63. data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
  64. data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
  65. data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
  66. data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
  67. data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
  68. data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
  69. data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
  70. data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
  71. data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
  72. data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
  73. data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
  74. data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
  75. data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
  76. data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
  77. data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
  78. data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
  79. data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
  80. data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
  81. data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
  82. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
  83. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
  84. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
  85. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
  86. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
  87. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
  88. data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
  89. data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
  90. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
  91. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
  92. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
  93. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
  94. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
  95. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
  96. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
  97. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
  98. data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
  99. data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
  100. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
  101. data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
  102. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
  103. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
  104. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
  105. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
  106. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
  107. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
  108. data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
  109. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
  110. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
  111. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
  112. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
  113. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
  114. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
  115. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
  116. data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
  117. data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
  118. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
  119. data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
  120. data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
  121. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
  122. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
  123. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
  124. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
  125. data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
  126. data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
  127. data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
  128. data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
  129. data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
  130. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
  131. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
  132. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
  133. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
  134. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
  135. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
  136. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
  137. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
  138. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
  139. data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
  140. data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
  141. data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
  142. data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
  143. data/vendor/faiss/faiss/impl/io_macros.h +57 -0
  144. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
  145. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
  146. data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
  147. data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
  148. data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
  149. data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
  150. data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
  151. data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
  152. data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
  153. data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
  154. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
  155. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
  156. data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
  157. data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
  158. data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
  159. data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
  160. data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
  161. data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
  162. data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
  163. data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
  164. data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
  165. data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
  166. data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
  167. data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
  168. data/vendor/faiss/misc/test_blas.cpp +4 -1
  169. data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
  170. data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
  171. data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
  172. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
  173. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
  174. data/vendor/faiss/tests/test_merge.cpp +6 -3
  175. data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
  176. data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
  177. data/vendor/faiss/tests/test_params_override.cpp +7 -2
  178. data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
  179. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
  180. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
  181. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
  182. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
  183. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
  184. metadata +154 -153
  185. data/vendor/faiss/gpu/GpuResources.cpp +0 -52
  186. data/vendor/faiss/gpu/GpuResources.h +0 -73
  187. data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
  188. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
  189. data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
  190. data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
  191. data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
  192. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -1,71 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #pragma once
10
-
11
- #include <cuda_runtime.h>
12
- #include <string>
13
-
14
- namespace faiss { namespace gpu {
15
-
16
- class DeviceMemory;
17
-
18
- class DeviceMemoryReservation {
19
- public:
20
- DeviceMemoryReservation();
21
- DeviceMemoryReservation(DeviceMemory* state,
22
- int device, void* p, size_t size,
23
- cudaStream_t stream);
24
- DeviceMemoryReservation(DeviceMemoryReservation&& m) noexcept;
25
- ~DeviceMemoryReservation();
26
-
27
- DeviceMemoryReservation& operator=(DeviceMemoryReservation&& m);
28
-
29
- int device() { return device_; }
30
- void* get() { return data_; }
31
- size_t size() { return size_; }
32
- cudaStream_t stream() { return stream_; }
33
-
34
- private:
35
- DeviceMemory* state_;
36
-
37
- int device_;
38
- void* data_;
39
- size_t size_;
40
- cudaStream_t stream_;
41
- };
42
-
43
- /// Manages temporary memory allocations on a GPU device
44
- class DeviceMemory {
45
- public:
46
- virtual ~DeviceMemory();
47
-
48
- /// Returns the device we are managing memory for
49
- virtual int getDevice() const = 0;
50
-
51
- /// Obtains a temporary memory allocation for our device,
52
- /// whose usage is ordered with respect to the given stream.
53
- virtual DeviceMemoryReservation getMemory(cudaStream_t stream,
54
- size_t size) = 0;
55
-
56
- /// Returns the current size available without calling cudaMalloc
57
- virtual size_t getSizeAvailable() const = 0;
58
-
59
- /// Returns a string containing our current memory manager state
60
- virtual std::string toString() const = 0;
61
-
62
- /// Returns the high-water mark of cudaMalloc allocations for our
63
- /// device
64
- virtual size_t getHighWaterCudaMalloc() const = 0;
65
-
66
- protected:
67
- friend class DeviceMemoryReservation;
68
- virtual void returnAllocation(DeviceMemoryReservation& m) = 0;
69
- };
70
-
71
- } } // namespace
@@ -1,89 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #include <faiss/gpu/utils/MemorySpace.h>
10
- #include <faiss/impl/FaissAssert.h>
11
- #include <cuda_runtime.h>
12
-
13
- namespace faiss { namespace gpu {
14
-
15
- /// Allocates CUDA memory for a given memory space
16
- void allocMemorySpaceV(MemorySpace space, void** p, size_t size) {
17
- switch (space) {
18
- case MemorySpace::Device:
19
- {
20
- auto err = cudaMalloc(p, size);
21
-
22
- // Throw if we fail to allocate
23
- FAISS_THROW_IF_NOT_FMT(
24
- err == cudaSuccess,
25
- "failed to cudaMalloc %zu bytes (error %d %s)",
26
- size, (int) err, cudaGetErrorString(err));
27
- }
28
- break;
29
- case MemorySpace::Unified:
30
- {
31
- #ifdef FAISS_UNIFIED_MEM
32
- auto err = cudaMallocManaged(p, size);
33
-
34
- // Throw if we fail to allocate
35
- FAISS_THROW_IF_NOT_FMT(
36
- err == cudaSuccess,
37
- "failed to cudaMallocManaged %zu bytes (error %d %s)",
38
- size, (int) err, cudaGetErrorString(err));
39
- #else
40
- FAISS_THROW_MSG("Attempting to allocate via cudaMallocManaged "
41
- "without CUDA 8+ support");
42
- #endif
43
- }
44
- break;
45
- case MemorySpace::HostPinned:
46
- {
47
- auto err = cudaHostAlloc(p, size, cudaHostAllocDefault);
48
-
49
- // Throw if we fail to allocate
50
- FAISS_THROW_IF_NOT_FMT(
51
- err == cudaSuccess,
52
- "failed to cudaHostAlloc %zu bytes (error %d %s)",
53
- size, (int) err, cudaGetErrorString(err));
54
- }
55
- break;
56
- default:
57
- FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
58
- break;
59
- }
60
- }
61
-
62
- // We'll allow allocation to fail, but free should always succeed and be a
63
- // fatal error if it doesn't free
64
- void freeMemorySpace(MemorySpace space, void* p) {
65
- switch (space) {
66
- case MemorySpace::Device:
67
- case MemorySpace::Unified:
68
- {
69
- auto err = cudaFree(p);
70
- FAISS_ASSERT_FMT(err == cudaSuccess,
71
- "Failed to cudaFree pointer %p (error %d %s)",
72
- p, (int) err, cudaGetErrorString(err));
73
- }
74
- break;
75
- case MemorySpace::HostPinned:
76
- {
77
- auto err = cudaFreeHost(p);
78
- FAISS_ASSERT_FMT(err == cudaSuccess,
79
- "Failed to cudaFreeHost pointer %p (error %d %s)",
80
- p, (int) err, cudaGetErrorString(err));
81
- }
82
- break;
83
- default:
84
- FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
85
- break;
86
- }
87
- }
88
-
89
- } }
@@ -1,44 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #pragma once
10
-
11
- #include <cuda.h>
12
-
13
- #if CUDA_VERSION >= 8000
14
- // Whether or not we enable usage of CUDA Unified Memory
15
- #define FAISS_UNIFIED_MEM 1
16
- #endif
17
-
18
- namespace faiss { namespace gpu {
19
-
20
- enum MemorySpace {
21
- /// Managed using cudaMalloc/cudaFree
22
- Device = 1,
23
- /// Managed using cudaMallocManaged/cudaFree
24
- Unified = 2,
25
- /// Managed using cudaHostAlloc/cudaFreeHost
26
- HostPinned = 3,
27
- };
28
-
29
- /// All memory allocations and de-allocations come through these functions
30
-
31
- /// Allocates CUDA memory for a given memory space (void pointer)
32
- /// Throws a FaissException if we are unable to allocate the memory
33
- void allocMemorySpaceV(MemorySpace space, void** p, size_t size);
34
-
35
- template <typename T>
36
- inline void allocMemorySpace(MemorySpace space, T** p, size_t size) {
37
- allocMemorySpaceV(space, (void**)(void*) p, size);
38
- }
39
-
40
- /// Frees CUDA memory for a given memory space
41
- /// Asserts if we are unable to free the region
42
- void freeMemorySpace(MemorySpace space, void* p);
43
-
44
- } }
@@ -1,239 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #include <faiss/gpu/utils/StackDeviceMemory.h>
10
- #include <faiss/gpu/utils/DeviceUtils.h>
11
- #include <faiss/gpu/utils/MemorySpace.h>
12
- #include <faiss/gpu/utils/StaticUtils.h>
13
- #include <faiss/impl/FaissAssert.h>
14
- #include <stdio.h>
15
- #include <sstream>
16
-
17
- namespace faiss { namespace gpu {
18
-
19
- StackDeviceMemory::Stack::Stack(int d, size_t sz)
20
- : device_(d),
21
- isOwner_(true),
22
- start_(nullptr),
23
- end_(nullptr),
24
- size_(sz),
25
- head_(nullptr),
26
- mallocCurrent_(0),
27
- highWaterMemoryUsed_(0),
28
- highWaterMalloc_(0),
29
- cudaMallocWarning_(true) {
30
- DeviceScope s(device_);
31
-
32
- allocMemorySpace(MemorySpace::Device, &start_, size_);
33
-
34
- head_ = start_;
35
- end_ = start_ + size_;
36
- }
37
-
38
- StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
39
- : device_(d),
40
- isOwner_(isOwner),
41
- start_((char*) p),
42
- end_(((char*) p) + sz),
43
- size_(sz),
44
- head_((char*) p),
45
- mallocCurrent_(0),
46
- highWaterMemoryUsed_(0),
47
- highWaterMalloc_(0),
48
- cudaMallocWarning_(true) {
49
- }
50
-
51
- StackDeviceMemory::Stack::~Stack() {
52
- if (isOwner_) {
53
- DeviceScope s(device_);
54
-
55
- freeMemorySpace(MemorySpace::Device, start_);
56
- }
57
- }
58
-
59
- size_t
60
- StackDeviceMemory::Stack::getSizeAvailable() const {
61
- return (end_ - head_);
62
- }
63
-
64
- char*
65
- StackDeviceMemory::Stack::getAlloc(size_t size,
66
- cudaStream_t stream) {
67
- if (size > (end_ - head_)) {
68
- // Too large for our stack
69
- DeviceScope s(device_);
70
-
71
- if (cudaMallocWarning_) {
72
- // Print our requested size before we attempt the allocation
73
- fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
74
- "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
75
- size, highWaterMalloc_);
76
- }
77
-
78
- char* p = nullptr;
79
- allocMemorySpace(MemorySpace::Device, &p, size);
80
-
81
- mallocCurrent_ += size;
82
- highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
83
-
84
- return p;
85
- } else {
86
- // We can make the allocation out of our stack
87
- // Find all the ranges that we overlap that may have been
88
- // previously allocated; our allocation will be [head, endAlloc)
89
- char* startAlloc = head_;
90
- char* endAlloc = head_ + size;
91
-
92
- while (lastUsers_.size() > 0) {
93
- auto& prevUser = lastUsers_.back();
94
-
95
- // Because there is a previous user, we must overlap it
96
- FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
97
-
98
- if (stream != prevUser.stream_) {
99
- // Synchronization required
100
- // FIXME
101
- FAISS_ASSERT(false);
102
- }
103
-
104
- if (endAlloc < prevUser.end_) {
105
- // Update the previous user info
106
- prevUser.start_ = endAlloc;
107
-
108
- break;
109
- }
110
-
111
- // If we're the exact size of the previous request, then we
112
- // don't need to continue
113
- bool done = (prevUser.end_ == endAlloc);
114
-
115
- lastUsers_.pop_back();
116
-
117
- if (done) {
118
- break;
119
- }
120
- }
121
-
122
- head_ = endAlloc;
123
- FAISS_ASSERT(head_ <= end_);
124
-
125
- highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
126
- (size_t) (head_ - start_));
127
- return startAlloc;
128
- }
129
- }
130
-
131
- void
132
- StackDeviceMemory::Stack::returnAlloc(char* p,
133
- size_t size,
134
- cudaStream_t stream) {
135
- if (p < start_ || p >= end_) {
136
- // This is not on our stack; it was a one-off allocation
137
- DeviceScope s(device_);
138
-
139
- freeMemorySpace(MemorySpace::Device, p);
140
-
141
- FAISS_ASSERT(mallocCurrent_ >= size);
142
- mallocCurrent_ -= size;
143
- } else {
144
- // This is on our stack
145
- // Allocations should be freed in the reverse order they are made
146
- FAISS_ASSERT(p + size == head_);
147
-
148
- head_ = p;
149
- lastUsers_.push_back(Range(p, p + size, stream));
150
- }
151
- }
152
-
153
- std::string
154
- StackDeviceMemory::Stack::toString() const {
155
- std::stringstream s;
156
-
157
- s << "SDM device " << device_ << ": Total memory " << size_ << " ["
158
- << (void*) start_ << ", " << (void*) end_ << ")\n";
159
- s << " Available memory " << (size_t) (end_ - head_)
160
- << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
161
- s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
162
- s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
163
-
164
- int i = lastUsers_.size();
165
- for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
166
- s << i-- << ": size " << (size_t) (it->end_ - it->start_)
167
- << " stream " << it->stream_
168
- << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
169
- }
170
-
171
- return s.str();
172
- }
173
-
174
- size_t
175
- StackDeviceMemory::Stack::getHighWaterCudaMalloc() const {
176
- return highWaterMalloc_;
177
- }
178
-
179
- StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
180
- : device_(device),
181
- stack_(device, allocPerDevice) {
182
- }
183
-
184
- StackDeviceMemory::StackDeviceMemory(int device,
185
- void* p, size_t size, bool isOwner)
186
- : device_(device),
187
- stack_(device, p, size, isOwner) {
188
- }
189
-
190
- StackDeviceMemory::~StackDeviceMemory() {
191
- }
192
-
193
- void
194
- StackDeviceMemory::setCudaMallocWarning(bool b) {
195
- stack_.cudaMallocWarning_ = b;
196
- }
197
-
198
- int
199
- StackDeviceMemory::getDevice() const {
200
- return device_;
201
- }
202
-
203
- DeviceMemoryReservation
204
- StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
205
- // We guarantee 16 byte alignment for allocations, so bump up `size`
206
- // to the next highest multiple of 16
207
- size = utils::roundUp(size, (size_t) 16);
208
-
209
- return DeviceMemoryReservation(this,
210
- device_,
211
- stack_.getAlloc(size, stream),
212
- size,
213
- stream);
214
- }
215
-
216
- size_t
217
- StackDeviceMemory::getSizeAvailable() const {
218
- return stack_.getSizeAvailable();
219
- }
220
-
221
- std::string
222
- StackDeviceMemory::toString() const {
223
- return stack_.toString();
224
- }
225
-
226
- size_t
227
- StackDeviceMemory::getHighWaterCudaMalloc() const {
228
- return stack_.getHighWaterCudaMalloc();
229
- }
230
-
231
- void
232
- StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
233
- FAISS_ASSERT(m.get());
234
- FAISS_ASSERT(device_ == m.device());
235
-
236
- stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
237
- }
238
-
239
- } } // namespace