faiss 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -5,55 +5,59 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
- #include <faiss/impl/FaissAssert.h>
12
- #include <cuda_runtime.h>
13
10
  #include <cublas_v2.h>
11
+ #include <cuda_runtime.h>
12
+ #include <faiss/impl/FaissAssert.h>
14
13
  #include <memory>
15
14
  #include <utility>
16
15
  #include <vector>
17
16
 
18
- namespace faiss { namespace gpu {
17
+ namespace faiss {
18
+ namespace gpu {
19
19
 
20
20
  class GpuResources;
21
21
 
22
22
  enum AllocType {
23
- /// Unknown allocation type or miscellaneous (not currently categorized)
24
- Other = 0,
25
-
26
- /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
27
- /// vector norms if needed)
28
- FlatData = 1,
29
-
30
- /// Primary data storage for GpuIndexIVF* (the storage for each individual IVF
31
- /// list)
32
- IVFLists = 2,
33
-
34
- /// Quantizer (PQ, SQ) dictionary information
35
- Quantizer = 3,
36
-
37
- /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
38
- /// require the use of possibly large tables. These are marked separately from
39
- /// Quantizer as these can frequently be 100s - 1000s of MiB in size
40
- QuantizerPrecomputedCodes = 4,
41
-
42
- ///
43
- /// StandardGpuResources implementation specific types
44
- ///
45
-
46
- /// When using StandardGpuResources, temporary memory allocations
47
- /// (MemorySpace::Temporary) come out of a stack region of memory that is
48
- /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization). This
49
- /// allocation by StandardGpuResources is marked with this AllocType.
50
- TemporaryMemoryBuffer = 10,
51
-
52
- /// When using StandardGpuResources, any MemorySpace::Temporary allocations
53
- /// that cannot be satisfied within the TemporaryMemoryBuffer region fall back
54
- /// to calling cudaMalloc which are sized to just the request at hand. These
55
- /// "overflow" temporary allocations are marked with this AllocType.
56
- TemporaryMemoryOverflow = 11,
23
+ /// Unknown allocation type or miscellaneous (not currently categorized)
24
+ Other = 0,
25
+
26
+ /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
27
+ /// vector norms if needed)
28
+ FlatData = 1,
29
+
30
+ /// Primary data storage for GpuIndexIVF* (the storage for each individual
31
+ /// IVF
32
+ /// list)
33
+ IVFLists = 2,
34
+
35
+ /// Quantizer (PQ, SQ) dictionary information
36
+ Quantizer = 3,
37
+
38
+ /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
39
+ /// require the use of possibly large tables. These are marked separately
40
+ /// from
41
+ /// Quantizer as these can frequently be 100s - 1000s of MiB in size
42
+ QuantizerPrecomputedCodes = 4,
43
+
44
+ ///
45
+ /// StandardGpuResources implementation specific types
46
+ ///
47
+
48
+ /// When using StandardGpuResources, temporary memory allocations
49
+ /// (MemorySpace::Temporary) come out of a stack region of memory that is
50
+ /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization).
51
+ /// This
52
+ /// allocation by StandardGpuResources is marked with this AllocType.
53
+ TemporaryMemoryBuffer = 10,
54
+
55
+ /// When using StandardGpuResources, any MemorySpace::Temporary allocations
56
+ /// that cannot be satisfied within the TemporaryMemoryBuffer region fall
57
+ /// back
58
+ /// to calling cudaMalloc which are sized to just the request at hand. These
59
+ /// "overflow" temporary allocations are marked with this AllocType.
60
+ TemporaryMemoryOverflow = 11,
57
61
  };
58
62
 
59
63
  /// Convert an AllocType to string
@@ -61,16 +65,17 @@ std::string allocTypeToString(AllocType t);
61
65
 
62
66
  /// Memory regions accessible to the GPU
63
67
  enum MemorySpace {
64
- /// Temporary device memory (guaranteed to no longer be used upon exit of a
65
- /// top-level index call, and where the streams using it have completed GPU
66
- /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
67
- Temporary = 0,
68
+ /// Temporary device memory (guaranteed to no longer be used upon exit of a
69
+ /// top-level index call, and where the streams using it have completed GPU
70
+ /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
71
+ Temporary = 0,
68
72
 
69
- /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
70
- Device = 1,
73
+ /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
74
+ Device = 1,
71
75
 
72
- /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU memory)
73
- Unified = 2,
76
+ /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU
77
+ /// memory)
78
+ Unified = 2,
74
79
  };
75
80
 
76
81
  /// Convert a MemorySpace to string
@@ -78,44 +83,36 @@ std::string memorySpaceToString(MemorySpace s);
78
83
 
79
84
  /// Information on what/where an allocation is
80
85
  struct AllocInfo {
81
- inline AllocInfo()
82
- : type(AllocType::Other),
83
- device(0),
84
- space(MemorySpace::Device),
85
- stream(nullptr) {
86
- }
87
-
88
- inline AllocInfo(AllocType at,
89
- int dev,
90
- MemorySpace sp,
91
- cudaStream_t st)
92
- : type(at),
93
- device(dev),
94
- space(sp),
95
- stream(st) {
96
- }
97
-
98
- /// Returns a string representation of this info
99
- std::string toString() const;
100
-
101
- /// The internal category of the allocation
102
- AllocType type;
103
-
104
- /// The device on which the allocation is happening
105
- int device;
106
-
107
- /// The memory space of the allocation
108
- MemorySpace space;
109
-
110
- /// The stream on which new work on the memory will be ordered (e.g., if a
111
- /// piece of memory cached and to be returned for this call was last used on
112
- /// stream 3 and a new memory request is for stream 4, the memory manager will
113
- /// synchronize stream 4 to wait for the completion of stream 3 via events or
114
- /// other stream synchronization.
115
- ///
116
- /// The memory manager guarantees that the returned memory is free to use
117
- /// without data races on this stream specified.
118
- cudaStream_t stream;
86
+ inline AllocInfo()
87
+ : type(AllocType::Other),
88
+ device(0),
89
+ space(MemorySpace::Device),
90
+ stream(nullptr) {}
91
+
92
+ inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
93
+ : type(at), device(dev), space(sp), stream(st) {}
94
+
95
+ /// Returns a string representation of this info
96
+ std::string toString() const;
97
+
98
+ /// The internal category of the allocation
99
+ AllocType type;
100
+
101
+ /// The device on which the allocation is happening
102
+ int device;
103
+
104
+ /// The memory space of the allocation
105
+ MemorySpace space;
106
+
107
+ /// The stream on which new work on the memory will be ordered (e.g., if a
108
+ /// piece of memory cached and to be returned for this call was last used on
109
+ /// stream 3 and a new memory request is for stream 4, the memory manager
110
+ /// will synchronize stream 4 to wait for the completion of stream 3 via
111
+ /// events or other stream synchronization.
112
+ ///
113
+ /// The memory manager guarantees that the returned memory is free to use
114
+ /// without data races on this stream specified.
115
+ cudaStream_t stream;
119
116
  };
120
117
 
121
118
  /// Create an AllocInfo for the current device with MemorySpace::Device
@@ -129,140 +126,139 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
129
126
 
130
127
  /// Information on what/where an allocation is, along with how big it should be
131
128
  struct AllocRequest : public AllocInfo {
132
- inline AllocRequest()
133
- : AllocInfo(),
134
- size(0) {
135
- }
136
-
137
- inline AllocRequest(const AllocInfo& info,
138
- size_t sz)
139
- : AllocInfo(info),
140
- size(sz) {
141
- }
142
-
143
- inline AllocRequest(AllocType at,
144
- int dev,
145
- MemorySpace sp,
146
- cudaStream_t st,
147
- size_t sz)
148
- : AllocInfo(at, dev, sp, st),
149
- size(sz) {
150
- }
151
-
152
- /// Returns a string representation of this request
153
- std::string toString() const;
154
-
155
- /// The size in bytes of the allocation
156
- size_t size;
129
+ inline AllocRequest() : AllocInfo(), size(0) {}
130
+
131
+ inline AllocRequest(const AllocInfo& info, size_t sz)
132
+ : AllocInfo(info), size(sz) {}
133
+
134
+ inline AllocRequest(
135
+ AllocType at,
136
+ int dev,
137
+ MemorySpace sp,
138
+ cudaStream_t st,
139
+ size_t sz)
140
+ : AllocInfo(at, dev, sp, st), size(sz) {}
141
+
142
+ /// Returns a string representation of this request
143
+ std::string toString() const;
144
+
145
+ /// The size in bytes of the allocation
146
+ size_t size;
157
147
  };
158
148
 
159
149
  /// A RAII object that manages a temporary memory request
160
150
  struct GpuMemoryReservation {
161
- GpuMemoryReservation();
162
- GpuMemoryReservation(GpuResources* r,
163
- int dev,
164
- cudaStream_t str,
165
- void* p,
166
- size_t sz);
167
- GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
168
- ~GpuMemoryReservation();
169
-
170
- GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
171
-
172
- inline void* get() { return data; }
173
-
174
- void release();
175
-
176
- GpuResources* res;
177
- int device;
178
- cudaStream_t stream;
179
- void* data;
180
- size_t size;
151
+ GpuMemoryReservation();
152
+ GpuMemoryReservation(
153
+ GpuResources* r,
154
+ int dev,
155
+ cudaStream_t str,
156
+ void* p,
157
+ size_t sz);
158
+ GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
159
+ ~GpuMemoryReservation();
160
+
161
+ GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
162
+
163
+ inline void* get() {
164
+ return data;
165
+ }
166
+
167
+ void release();
168
+
169
+ GpuResources* res;
170
+ int device;
171
+ cudaStream_t stream;
172
+ void* data;
173
+ size_t size;
181
174
  };
182
175
 
183
176
  /// Base class of GPU-side resource provider; hides provision of
184
177
  /// cuBLAS handles, CUDA streams and all device memory allocation performed
185
178
  class GpuResources {
186
- public:
187
- virtual ~GpuResources();
179
+ public:
180
+ virtual ~GpuResources();
188
181
 
189
- /// Call to pre-allocate resources for a particular device. If this is
190
- /// not called, then resources will be allocated at the first time
191
- /// of demand
192
- virtual void initializeForDevice(int device) = 0;
182
+ /// Call to pre-allocate resources for a particular device. If this is
183
+ /// not called, then resources will be allocated at the first time
184
+ /// of demand
185
+ virtual void initializeForDevice(int device) = 0;
193
186
 
194
- /// Returns the cuBLAS handle that we use for the given device
195
- virtual cublasHandle_t getBlasHandle(int device) = 0;
187
+ /// Returns the cuBLAS handle that we use for the given device
188
+ virtual cublasHandle_t getBlasHandle(int device) = 0;
196
189
 
197
- /// Returns the stream that we order all computation on for the
198
- /// given device
199
- virtual cudaStream_t getDefaultStream(int device) = 0;
190
+ /// Returns the stream that we order all computation on for the
191
+ /// given device
192
+ virtual cudaStream_t getDefaultStream(int device) = 0;
200
193
 
201
- /// Overrides the default stream for a device to the user-supplied stream. The
202
- /// resources object does not own this stream (i.e., it will not destroy it).
203
- virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
194
+ /// Overrides the default stream for a device to the user-supplied stream.
195
+ /// The resources object does not own this stream (i.e., it will not destroy
196
+ /// it).
197
+ virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
204
198
 
205
- /// Returns the set of alternative streams that we use for the given device
206
- virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
199
+ /// Returns the set of alternative streams that we use for the given device
200
+ virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
207
201
 
208
- /// Memory management
209
- /// Returns an allocation from the given memory space, ordered with respect to
210
- /// the given stream (i.e., the first user will be a kernel in this stream).
211
- /// All allocations are sized internally to be the next highest multiple of 16
212
- /// bytes, and all allocations returned are guaranteed to be 16 byte aligned.
213
- virtual void* allocMemory(const AllocRequest& req) = 0;
202
+ /// Memory management
203
+ /// Returns an allocation from the given memory space, ordered with respect
204
+ /// to the given stream (i.e., the first user will be a kernel in this
205
+ /// stream). All allocations are sized internally to be the next highest
206
+ /// multiple of 16 bytes, and all allocations returned are guaranteed to be
207
+ /// 16 byte aligned.
208
+ virtual void* allocMemory(const AllocRequest& req) = 0;
214
209
 
215
- /// Returns a previous allocation
216
- virtual void deallocMemory(int device, void* in) = 0;
210
+ /// Returns a previous allocation
211
+ virtual void deallocMemory(int device, void* in) = 0;
217
212
 
218
- /// For MemorySpace::Temporary, how much space is immediately available
219
- /// without cudaMalloc allocation?
220
- virtual size_t getTempMemoryAvailable(int device) const = 0;
213
+ /// For MemorySpace::Temporary, how much space is immediately available
214
+ /// without cudaMalloc allocation?
215
+ virtual size_t getTempMemoryAvailable(int device) const = 0;
221
216
 
222
- /// Returns the available CPU pinned memory buffer
223
- virtual std::pair<void*, size_t> getPinnedMemory() = 0;
217
+ /// Returns the available CPU pinned memory buffer
218
+ virtual std::pair<void*, size_t> getPinnedMemory() = 0;
224
219
 
225
- /// Returns the stream on which we perform async CPU <-> GPU copies
226
- virtual cudaStream_t getAsyncCopyStream(int device) = 0;
220
+ /// Returns the stream on which we perform async CPU <-> GPU copies
221
+ virtual cudaStream_t getAsyncCopyStream(int device) = 0;
227
222
 
228
- ///
229
- /// Functions provided by default
230
- ///
223
+ ///
224
+ /// Functions provided by default
225
+ ///
231
226
 
232
- /// Calls getBlasHandle with the current device
233
- cublasHandle_t getBlasHandleCurrentDevice();
227
+ /// Calls getBlasHandle with the current device
228
+ cublasHandle_t getBlasHandleCurrentDevice();
234
229
 
235
- /// Calls getDefaultStream with the current device
236
- cudaStream_t getDefaultStreamCurrentDevice();
230
+ /// Calls getDefaultStream with the current device
231
+ cudaStream_t getDefaultStreamCurrentDevice();
237
232
 
238
- /// Calls getTempMemoryAvailable with the current device
239
- size_t getTempMemoryAvailableCurrentDevice() const;
233
+ /// Calls getTempMemoryAvailable with the current device
234
+ size_t getTempMemoryAvailableCurrentDevice() const;
240
235
 
241
- /// Returns a temporary memory allocation via a RAII object
242
- GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
236
+ /// Returns a temporary memory allocation via a RAII object
237
+ GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
243
238
 
244
- /// Synchronizes the CPU with respect to the default stream for the
245
- /// given device
246
- // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
247
- void syncDefaultStream(int device);
239
+ /// Synchronizes the CPU with respect to the default stream for the
240
+ /// given device
241
+ // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
242
+ void syncDefaultStream(int device);
248
243
 
249
- /// Calls syncDefaultStream for the current device
250
- void syncDefaultStreamCurrentDevice();
244
+ /// Calls syncDefaultStream for the current device
245
+ void syncDefaultStreamCurrentDevice();
251
246
 
252
- /// Calls getAlternateStreams for the current device
253
- std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
247
+ /// Calls getAlternateStreams for the current device
248
+ std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
254
249
 
255
- /// Calls getAsyncCopyStream for the current device
256
- cudaStream_t getAsyncCopyStreamCurrentDevice();
250
+ /// Calls getAsyncCopyStream for the current device
251
+ cudaStream_t getAsyncCopyStreamCurrentDevice();
257
252
  };
258
253
 
259
254
  /// Interface for a provider of a shared resources object
260
255
  class GpuResourcesProvider {
261
- public:
262
- virtual ~GpuResourcesProvider();
256
+ public:
257
+ virtual ~GpuResourcesProvider();
263
258
 
264
- /// Returns the shared resources object
265
- virtual std::shared_ptr<GpuResources> getResources() = 0;
259
+ /// Returns the shared resources object
260
+ virtual std::shared_ptr<GpuResources> getResources() = 0;
266
261
  };
267
262
 
268
- } } // namespace
263
+ } // namespace gpu
264
+ } // namespace faiss