faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
  6. data/vendor/faiss/c_api/AutoTune_c.h +2 -0
  7. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
  8. data/vendor/faiss/c_api/IndexShards_c.h +1 -4
  9. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
  10. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
  11. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
  12. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
  13. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
  14. data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
  15. data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
  16. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
  17. data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
  18. data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
  19. data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
  20. data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
  21. data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
  22. data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
  23. data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
  24. data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
  25. data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
  26. data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
  27. data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
  28. data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
  29. data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
  30. data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
  31. data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
  32. data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
  33. data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
  34. data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
  35. data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
  36. data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
  37. data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
  38. data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
  39. data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
  40. data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
  41. data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
  42. data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
  43. data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
  44. data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
  45. data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
  46. data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
  47. data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
  48. data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
  49. data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
  50. data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
  51. data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
  52. data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
  53. data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
  54. data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
  55. data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
  56. data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
  57. data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
  58. data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
  59. data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
  60. data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
  61. data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
  62. data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
  63. data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
  64. data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
  65. data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
  66. data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
  67. data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
  68. data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
  69. data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
  70. data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
  71. data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
  72. data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
  73. data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
  74. data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
  75. data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
  76. data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
  77. data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
  78. data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
  79. data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
  80. data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
  81. data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
  82. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
  83. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
  84. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
  85. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
  86. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
  87. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
  88. data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
  89. data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
  90. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
  91. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
  92. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
  93. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
  94. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
  95. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
  96. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
  97. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
  98. data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
  99. data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
  100. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
  101. data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
  102. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
  103. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
  104. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
  105. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
  106. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
  107. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
  108. data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
  109. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
  110. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
  111. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
  112. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
  113. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
  114. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
  115. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
  116. data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
  117. data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
  118. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
  119. data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
  120. data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
  121. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
  122. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
  123. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
  124. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
  125. data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
  126. data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
  127. data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
  128. data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
  129. data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
  130. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
  131. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
  132. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
  133. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
  134. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
  135. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
  136. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
  137. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
  138. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
  139. data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
  140. data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
  141. data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
  142. data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
  143. data/vendor/faiss/faiss/impl/io_macros.h +57 -0
  144. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
  145. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
  146. data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
  147. data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
  148. data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
  149. data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
  150. data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
  151. data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
  152. data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
  153. data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
  154. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
  155. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
  156. data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
  157. data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
  158. data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
  159. data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
  160. data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
  161. data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
  162. data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
  163. data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
  164. data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
  165. data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
  166. data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
  167. data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
  168. data/vendor/faiss/misc/test_blas.cpp +4 -1
  169. data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
  170. data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
  171. data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
  172. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
  173. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
  174. data/vendor/faiss/tests/test_merge.cpp +6 -3
  175. data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
  176. data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
  177. data/vendor/faiss/tests/test_params_override.cpp +7 -2
  178. data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
  179. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
  180. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
  181. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
  182. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
  183. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
  184. metadata +154 -153
  185. data/vendor/faiss/gpu/GpuResources.cpp +0 -52
  186. data/vendor/faiss/gpu/GpuResources.h +0 -73
  187. data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
  188. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
  189. data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
  190. data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
  191. data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
  192. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -1,71 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #pragma once
10
-
11
- #include <cuda_runtime.h>
12
- #include <string>
13
-
14
- namespace faiss { namespace gpu {
15
-
16
- class DeviceMemory;
17
-
18
- class DeviceMemoryReservation {
19
- public:
20
- DeviceMemoryReservation();
21
- DeviceMemoryReservation(DeviceMemory* state,
22
- int device, void* p, size_t size,
23
- cudaStream_t stream);
24
- DeviceMemoryReservation(DeviceMemoryReservation&& m) noexcept;
25
- ~DeviceMemoryReservation();
26
-
27
- DeviceMemoryReservation& operator=(DeviceMemoryReservation&& m);
28
-
29
- int device() { return device_; }
30
- void* get() { return data_; }
31
- size_t size() { return size_; }
32
- cudaStream_t stream() { return stream_; }
33
-
34
- private:
35
- DeviceMemory* state_;
36
-
37
- int device_;
38
- void* data_;
39
- size_t size_;
40
- cudaStream_t stream_;
41
- };
42
-
43
- /// Manages temporary memory allocations on a GPU device
44
- class DeviceMemory {
45
- public:
46
- virtual ~DeviceMemory();
47
-
48
- /// Returns the device we are managing memory for
49
- virtual int getDevice() const = 0;
50
-
51
- /// Obtains a temporary memory allocation for our device,
52
- /// whose usage is ordered with respect to the given stream.
53
- virtual DeviceMemoryReservation getMemory(cudaStream_t stream,
54
- size_t size) = 0;
55
-
56
- /// Returns the current size available without calling cudaMalloc
57
- virtual size_t getSizeAvailable() const = 0;
58
-
59
- /// Returns a string containing our current memory manager state
60
- virtual std::string toString() const = 0;
61
-
62
- /// Returns the high-water mark of cudaMalloc allocations for our
63
- /// device
64
- virtual size_t getHighWaterCudaMalloc() const = 0;
65
-
66
- protected:
67
- friend class DeviceMemoryReservation;
68
- virtual void returnAllocation(DeviceMemoryReservation& m) = 0;
69
- };
70
-
71
- } } // namespace
@@ -1,89 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #include <faiss/gpu/utils/MemorySpace.h>
10
- #include <faiss/impl/FaissAssert.h>
11
- #include <cuda_runtime.h>
12
-
13
- namespace faiss { namespace gpu {
14
-
15
- /// Allocates CUDA memory for a given memory space
16
- void allocMemorySpaceV(MemorySpace space, void** p, size_t size) {
17
- switch (space) {
18
- case MemorySpace::Device:
19
- {
20
- auto err = cudaMalloc(p, size);
21
-
22
- // Throw if we fail to allocate
23
- FAISS_THROW_IF_NOT_FMT(
24
- err == cudaSuccess,
25
- "failed to cudaMalloc %zu bytes (error %d %s)",
26
- size, (int) err, cudaGetErrorString(err));
27
- }
28
- break;
29
- case MemorySpace::Unified:
30
- {
31
- #ifdef FAISS_UNIFIED_MEM
32
- auto err = cudaMallocManaged(p, size);
33
-
34
- // Throw if we fail to allocate
35
- FAISS_THROW_IF_NOT_FMT(
36
- err == cudaSuccess,
37
- "failed to cudaMallocManaged %zu bytes (error %d %s)",
38
- size, (int) err, cudaGetErrorString(err));
39
- #else
40
- FAISS_THROW_MSG("Attempting to allocate via cudaMallocManaged "
41
- "without CUDA 8+ support");
42
- #endif
43
- }
44
- break;
45
- case MemorySpace::HostPinned:
46
- {
47
- auto err = cudaHostAlloc(p, size, cudaHostAllocDefault);
48
-
49
- // Throw if we fail to allocate
50
- FAISS_THROW_IF_NOT_FMT(
51
- err == cudaSuccess,
52
- "failed to cudaHostAlloc %zu bytes (error %d %s)",
53
- size, (int) err, cudaGetErrorString(err));
54
- }
55
- break;
56
- default:
57
- FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
58
- break;
59
- }
60
- }
61
-
62
- // We'll allow allocation to fail, but free should always succeed and be a
63
- // fatal error if it doesn't free
64
- void freeMemorySpace(MemorySpace space, void* p) {
65
- switch (space) {
66
- case MemorySpace::Device:
67
- case MemorySpace::Unified:
68
- {
69
- auto err = cudaFree(p);
70
- FAISS_ASSERT_FMT(err == cudaSuccess,
71
- "Failed to cudaFree pointer %p (error %d %s)",
72
- p, (int) err, cudaGetErrorString(err));
73
- }
74
- break;
75
- case MemorySpace::HostPinned:
76
- {
77
- auto err = cudaFreeHost(p);
78
- FAISS_ASSERT_FMT(err == cudaSuccess,
79
- "Failed to cudaFreeHost pointer %p (error %d %s)",
80
- p, (int) err, cudaGetErrorString(err));
81
- }
82
- break;
83
- default:
84
- FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
85
- break;
86
- }
87
- }
88
-
89
- } }
@@ -1,44 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #pragma once
10
-
11
- #include <cuda.h>
12
-
13
- #if CUDA_VERSION >= 8000
14
- // Whether or not we enable usage of CUDA Unified Memory
15
- #define FAISS_UNIFIED_MEM 1
16
- #endif
17
-
18
- namespace faiss { namespace gpu {
19
-
20
- enum MemorySpace {
21
- /// Managed using cudaMalloc/cudaFree
22
- Device = 1,
23
- /// Managed using cudaMallocManaged/cudaFree
24
- Unified = 2,
25
- /// Managed using cudaHostAlloc/cudaFreeHost
26
- HostPinned = 3,
27
- };
28
-
29
- /// All memory allocations and de-allocations come through these functions
30
-
31
- /// Allocates CUDA memory for a given memory space (void pointer)
32
- /// Throws a FaissException if we are unable to allocate the memory
33
- void allocMemorySpaceV(MemorySpace space, void** p, size_t size);
34
-
35
- template <typename T>
36
- inline void allocMemorySpace(MemorySpace space, T** p, size_t size) {
37
- allocMemorySpaceV(space, (void**)(void*) p, size);
38
- }
39
-
40
- /// Frees CUDA memory for a given memory space
41
- /// Asserts if we are unable to free the region
42
- void freeMemorySpace(MemorySpace space, void* p);
43
-
44
- } }
@@ -1,239 +0,0 @@
1
- /**
2
- * Copyright (c) Facebook, Inc. and its affiliates.
3
- *
4
- * This source code is licensed under the MIT license found in the
5
- * LICENSE file in the root directory of this source tree.
6
- */
7
-
8
-
9
- #include <faiss/gpu/utils/StackDeviceMemory.h>
10
- #include <faiss/gpu/utils/DeviceUtils.h>
11
- #include <faiss/gpu/utils/MemorySpace.h>
12
- #include <faiss/gpu/utils/StaticUtils.h>
13
- #include <faiss/impl/FaissAssert.h>
14
- #include <stdio.h>
15
- #include <sstream>
16
-
17
- namespace faiss { namespace gpu {
18
-
19
- StackDeviceMemory::Stack::Stack(int d, size_t sz)
20
- : device_(d),
21
- isOwner_(true),
22
- start_(nullptr),
23
- end_(nullptr),
24
- size_(sz),
25
- head_(nullptr),
26
- mallocCurrent_(0),
27
- highWaterMemoryUsed_(0),
28
- highWaterMalloc_(0),
29
- cudaMallocWarning_(true) {
30
- DeviceScope s(device_);
31
-
32
- allocMemorySpace(MemorySpace::Device, &start_, size_);
33
-
34
- head_ = start_;
35
- end_ = start_ + size_;
36
- }
37
-
38
- StackDeviceMemory::Stack::Stack(int d, void* p, size_t sz, bool isOwner)
39
- : device_(d),
40
- isOwner_(isOwner),
41
- start_((char*) p),
42
- end_(((char*) p) + sz),
43
- size_(sz),
44
- head_((char*) p),
45
- mallocCurrent_(0),
46
- highWaterMemoryUsed_(0),
47
- highWaterMalloc_(0),
48
- cudaMallocWarning_(true) {
49
- }
50
-
51
- StackDeviceMemory::Stack::~Stack() {
52
- if (isOwner_) {
53
- DeviceScope s(device_);
54
-
55
- freeMemorySpace(MemorySpace::Device, start_);
56
- }
57
- }
58
-
59
- size_t
60
- StackDeviceMemory::Stack::getSizeAvailable() const {
61
- return (end_ - head_);
62
- }
63
-
64
- char*
65
- StackDeviceMemory::Stack::getAlloc(size_t size,
66
- cudaStream_t stream) {
67
- if (size > (end_ - head_)) {
68
- // Too large for our stack
69
- DeviceScope s(device_);
70
-
71
- if (cudaMallocWarning_) {
72
- // Print our requested size before we attempt the allocation
73
- fprintf(stderr, "WARN: increase temp memory to avoid cudaMalloc, "
74
- "or decrease query/add size (alloc %zu B, highwater %zu B)\n",
75
- size, highWaterMalloc_);
76
- }
77
-
78
- char* p = nullptr;
79
- allocMemorySpace(MemorySpace::Device, &p, size);
80
-
81
- mallocCurrent_ += size;
82
- highWaterMalloc_ = std::max(highWaterMalloc_, mallocCurrent_);
83
-
84
- return p;
85
- } else {
86
- // We can make the allocation out of our stack
87
- // Find all the ranges that we overlap that may have been
88
- // previously allocated; our allocation will be [head, endAlloc)
89
- char* startAlloc = head_;
90
- char* endAlloc = head_ + size;
91
-
92
- while (lastUsers_.size() > 0) {
93
- auto& prevUser = lastUsers_.back();
94
-
95
- // Because there is a previous user, we must overlap it
96
- FAISS_ASSERT(prevUser.start_ <= endAlloc && prevUser.end_ >= startAlloc);
97
-
98
- if (stream != prevUser.stream_) {
99
- // Synchronization required
100
- // FIXME
101
- FAISS_ASSERT(false);
102
- }
103
-
104
- if (endAlloc < prevUser.end_) {
105
- // Update the previous user info
106
- prevUser.start_ = endAlloc;
107
-
108
- break;
109
- }
110
-
111
- // If we're the exact size of the previous request, then we
112
- // don't need to continue
113
- bool done = (prevUser.end_ == endAlloc);
114
-
115
- lastUsers_.pop_back();
116
-
117
- if (done) {
118
- break;
119
- }
120
- }
121
-
122
- head_ = endAlloc;
123
- FAISS_ASSERT(head_ <= end_);
124
-
125
- highWaterMemoryUsed_ = std::max(highWaterMemoryUsed_,
126
- (size_t) (head_ - start_));
127
- return startAlloc;
128
- }
129
- }
130
-
131
- void
132
- StackDeviceMemory::Stack::returnAlloc(char* p,
133
- size_t size,
134
- cudaStream_t stream) {
135
- if (p < start_ || p >= end_) {
136
- // This is not on our stack; it was a one-off allocation
137
- DeviceScope s(device_);
138
-
139
- freeMemorySpace(MemorySpace::Device, p);
140
-
141
- FAISS_ASSERT(mallocCurrent_ >= size);
142
- mallocCurrent_ -= size;
143
- } else {
144
- // This is on our stack
145
- // Allocations should be freed in the reverse order they are made
146
- FAISS_ASSERT(p + size == head_);
147
-
148
- head_ = p;
149
- lastUsers_.push_back(Range(p, p + size, stream));
150
- }
151
- }
152
-
153
- std::string
154
- StackDeviceMemory::Stack::toString() const {
155
- std::stringstream s;
156
-
157
- s << "SDM device " << device_ << ": Total memory " << size_ << " ["
158
- << (void*) start_ << ", " << (void*) end_ << ")\n";
159
- s << " Available memory " << (size_t) (end_ - head_)
160
- << " [" << (void*) head_ << ", " << (void*) end_ << ")\n";
161
- s << " High water temp alloc " << highWaterMemoryUsed_ << "\n";
162
- s << " High water cudaMalloc " << highWaterMalloc_ << "\n";
163
-
164
- int i = lastUsers_.size();
165
- for (auto it = lastUsers_.rbegin(); it != lastUsers_.rend(); ++it) {
166
- s << i-- << ": size " << (size_t) (it->end_ - it->start_)
167
- << " stream " << it->stream_
168
- << " [" << (void*) it->start_ << ", " << (void*) it->end_ << ")\n";
169
- }
170
-
171
- return s.str();
172
- }
173
-
174
- size_t
175
- StackDeviceMemory::Stack::getHighWaterCudaMalloc() const {
176
- return highWaterMalloc_;
177
- }
178
-
179
- StackDeviceMemory::StackDeviceMemory(int device, size_t allocPerDevice)
180
- : device_(device),
181
- stack_(device, allocPerDevice) {
182
- }
183
-
184
- StackDeviceMemory::StackDeviceMemory(int device,
185
- void* p, size_t size, bool isOwner)
186
- : device_(device),
187
- stack_(device, p, size, isOwner) {
188
- }
189
-
190
- StackDeviceMemory::~StackDeviceMemory() {
191
- }
192
-
193
- void
194
- StackDeviceMemory::setCudaMallocWarning(bool b) {
195
- stack_.cudaMallocWarning_ = b;
196
- }
197
-
198
- int
199
- StackDeviceMemory::getDevice() const {
200
- return device_;
201
- }
202
-
203
- DeviceMemoryReservation
204
- StackDeviceMemory::getMemory(cudaStream_t stream, size_t size) {
205
- // We guarantee 16 byte alignment for allocations, so bump up `size`
206
- // to the next highest multiple of 16
207
- size = utils::roundUp(size, (size_t) 16);
208
-
209
- return DeviceMemoryReservation(this,
210
- device_,
211
- stack_.getAlloc(size, stream),
212
- size,
213
- stream);
214
- }
215
-
216
- size_t
217
- StackDeviceMemory::getSizeAvailable() const {
218
- return stack_.getSizeAvailable();
219
- }
220
-
221
- std::string
222
- StackDeviceMemory::toString() const {
223
- return stack_.toString();
224
- }
225
-
226
- size_t
227
- StackDeviceMemory::getHighWaterCudaMalloc() const {
228
- return stack_.getHighWaterCudaMalloc();
229
- }
230
-
231
- void
232
- StackDeviceMemory::returnAllocation(DeviceMemoryReservation& m) {
233
- FAISS_ASSERT(m.get());
234
- FAISS_ASSERT(device_ == m.device());
235
-
236
- stack_.returnAlloc((char*) m.get(), m.size(), m.stream());
237
- }
238
-
239
- } } // namespace