faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -5,55 +5,59 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
- #include <faiss/impl/FaissAssert.h>
12
- #include <cuda_runtime.h>
13
10
  #include <cublas_v2.h>
11
+ #include <cuda_runtime.h>
12
+ #include <faiss/impl/FaissAssert.h>
14
13
  #include <memory>
15
14
  #include <utility>
16
15
  #include <vector>
17
16
 
18
- namespace faiss { namespace gpu {
17
+ namespace faiss {
18
+ namespace gpu {
19
19
 
20
20
  class GpuResources;
21
21
 
22
22
  enum AllocType {
23
- /// Unknown allocation type or miscellaneous (not currently categorized)
24
- Other = 0,
25
-
26
- /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
27
- /// vector norms if needed)
28
- FlatData = 1,
29
-
30
- /// Primary data storage for GpuIndexIVF* (the storage for each individual IVF
31
- /// list)
32
- IVFLists = 2,
33
-
34
- /// Quantizer (PQ, SQ) dictionary information
35
- Quantizer = 3,
36
-
37
- /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
38
- /// require the use of possibly large tables. These are marked separately from
39
- /// Quantizer as these can frequently be 100s - 1000s of MiB in size
40
- QuantizerPrecomputedCodes = 4,
41
-
42
- ///
43
- /// StandardGpuResources implementation specific types
44
- ///
45
-
46
- /// When using StandardGpuResources, temporary memory allocations
47
- /// (MemorySpace::Temporary) come out of a stack region of memory that is
48
- /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization). This
49
- /// allocation by StandardGpuResources is marked with this AllocType.
50
- TemporaryMemoryBuffer = 10,
51
-
52
- /// When using StandardGpuResources, any MemorySpace::Temporary allocations
53
- /// that cannot be satisfied within the TemporaryMemoryBuffer region fall back
54
- /// to calling cudaMalloc which are sized to just the request at hand. These
55
- /// "overflow" temporary allocations are marked with this AllocType.
56
- TemporaryMemoryOverflow = 11,
23
+ /// Unknown allocation type or miscellaneous (not currently categorized)
24
+ Other = 0,
25
+
26
+ /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
27
+ /// vector norms if needed)
28
+ FlatData = 1,
29
+
30
+ /// Primary data storage for GpuIndexIVF* (the storage for each individual
31
+ /// IVF
32
+ /// list)
33
+ IVFLists = 2,
34
+
35
+ /// Quantizer (PQ, SQ) dictionary information
36
+ Quantizer = 3,
37
+
38
+ /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
39
+ /// require the use of possibly large tables. These are marked separately
40
+ /// from
41
+ /// Quantizer as these can frequently be 100s - 1000s of MiB in size
42
+ QuantizerPrecomputedCodes = 4,
43
+
44
+ ///
45
+ /// StandardGpuResources implementation specific types
46
+ ///
47
+
48
+ /// When using StandardGpuResources, temporary memory allocations
49
+ /// (MemorySpace::Temporary) come out of a stack region of memory that is
50
+ /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization).
51
+ /// This
52
+ /// allocation by StandardGpuResources is marked with this AllocType.
53
+ TemporaryMemoryBuffer = 10,
54
+
55
+ /// When using StandardGpuResources, any MemorySpace::Temporary allocations
56
+ /// that cannot be satisfied within the TemporaryMemoryBuffer region fall
57
+ /// back
58
+ /// to calling cudaMalloc which are sized to just the request at hand. These
59
+ /// "overflow" temporary allocations are marked with this AllocType.
60
+ TemporaryMemoryOverflow = 11,
57
61
  };
58
62
 
59
63
  /// Convert an AllocType to string
@@ -61,16 +65,17 @@ std::string allocTypeToString(AllocType t);
61
65
 
62
66
  /// Memory regions accessible to the GPU
63
67
  enum MemorySpace {
64
- /// Temporary device memory (guaranteed to no longer be used upon exit of a
65
- /// top-level index call, and where the streams using it have completed GPU
66
- /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
67
- Temporary = 0,
68
+ /// Temporary device memory (guaranteed to no longer be used upon exit of a
69
+ /// top-level index call, and where the streams using it have completed GPU
70
+ /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
71
+ Temporary = 0,
68
72
 
69
- /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
70
- Device = 1,
73
+ /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
74
+ Device = 1,
71
75
 
72
- /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU memory)
73
- Unified = 2,
76
+ /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU
77
+ /// memory)
78
+ Unified = 2,
74
79
  };
75
80
 
76
81
  /// Convert a MemorySpace to string
@@ -78,44 +83,36 @@ std::string memorySpaceToString(MemorySpace s);
78
83
 
79
84
  /// Information on what/where an allocation is
80
85
  struct AllocInfo {
81
- inline AllocInfo()
82
- : type(AllocType::Other),
83
- device(0),
84
- space(MemorySpace::Device),
85
- stream(nullptr) {
86
- }
87
-
88
- inline AllocInfo(AllocType at,
89
- int dev,
90
- MemorySpace sp,
91
- cudaStream_t st)
92
- : type(at),
93
- device(dev),
94
- space(sp),
95
- stream(st) {
96
- }
97
-
98
- /// Returns a string representation of this info
99
- std::string toString() const;
100
-
101
- /// The internal category of the allocation
102
- AllocType type;
103
-
104
- /// The device on which the allocation is happening
105
- int device;
106
-
107
- /// The memory space of the allocation
108
- MemorySpace space;
109
-
110
- /// The stream on which new work on the memory will be ordered (e.g., if a
111
- /// piece of memory cached and to be returned for this call was last used on
112
- /// stream 3 and a new memory request is for stream 4, the memory manager will
113
- /// synchronize stream 4 to wait for the completion of stream 3 via events or
114
- /// other stream synchronization.
115
- ///
116
- /// The memory manager guarantees that the returned memory is free to use
117
- /// without data races on this stream specified.
118
- cudaStream_t stream;
86
+ inline AllocInfo()
87
+ : type(AllocType::Other),
88
+ device(0),
89
+ space(MemorySpace::Device),
90
+ stream(nullptr) {}
91
+
92
+ inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
93
+ : type(at), device(dev), space(sp), stream(st) {}
94
+
95
+ /// Returns a string representation of this info
96
+ std::string toString() const;
97
+
98
+ /// The internal category of the allocation
99
+ AllocType type;
100
+
101
+ /// The device on which the allocation is happening
102
+ int device;
103
+
104
+ /// The memory space of the allocation
105
+ MemorySpace space;
106
+
107
+ /// The stream on which new work on the memory will be ordered (e.g., if a
108
+ /// piece of memory cached and to be returned for this call was last used on
109
+ /// stream 3 and a new memory request is for stream 4, the memory manager
110
+ /// will synchronize stream 4 to wait for the completion of stream 3 via
111
+ /// events or other stream synchronization.
112
+ ///
113
+ /// The memory manager guarantees that the returned memory is free to use
114
+ /// without data races on this stream specified.
115
+ cudaStream_t stream;
119
116
  };
120
117
 
121
118
  /// Create an AllocInfo for the current device with MemorySpace::Device
@@ -129,140 +126,139 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
129
126
 
130
127
  /// Information on what/where an allocation is, along with how big it should be
131
128
  struct AllocRequest : public AllocInfo {
132
- inline AllocRequest()
133
- : AllocInfo(),
134
- size(0) {
135
- }
136
-
137
- inline AllocRequest(const AllocInfo& info,
138
- size_t sz)
139
- : AllocInfo(info),
140
- size(sz) {
141
- }
142
-
143
- inline AllocRequest(AllocType at,
144
- int dev,
145
- MemorySpace sp,
146
- cudaStream_t st,
147
- size_t sz)
148
- : AllocInfo(at, dev, sp, st),
149
- size(sz) {
150
- }
151
-
152
- /// Returns a string representation of this request
153
- std::string toString() const;
154
-
155
- /// The size in bytes of the allocation
156
- size_t size;
129
+ inline AllocRequest() : AllocInfo(), size(0) {}
130
+
131
+ inline AllocRequest(const AllocInfo& info, size_t sz)
132
+ : AllocInfo(info), size(sz) {}
133
+
134
+ inline AllocRequest(
135
+ AllocType at,
136
+ int dev,
137
+ MemorySpace sp,
138
+ cudaStream_t st,
139
+ size_t sz)
140
+ : AllocInfo(at, dev, sp, st), size(sz) {}
141
+
142
+ /// Returns a string representation of this request
143
+ std::string toString() const;
144
+
145
+ /// The size in bytes of the allocation
146
+ size_t size;
157
147
  };
158
148
 
159
149
  /// A RAII object that manages a temporary memory request
160
150
  struct GpuMemoryReservation {
161
- GpuMemoryReservation();
162
- GpuMemoryReservation(GpuResources* r,
163
- int dev,
164
- cudaStream_t str,
165
- void* p,
166
- size_t sz);
167
- GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
168
- ~GpuMemoryReservation();
169
-
170
- GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
171
-
172
- inline void* get() { return data; }
173
-
174
- void release();
175
-
176
- GpuResources* res;
177
- int device;
178
- cudaStream_t stream;
179
- void* data;
180
- size_t size;
151
+ GpuMemoryReservation();
152
+ GpuMemoryReservation(
153
+ GpuResources* r,
154
+ int dev,
155
+ cudaStream_t str,
156
+ void* p,
157
+ size_t sz);
158
+ GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
159
+ ~GpuMemoryReservation();
160
+
161
+ GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
162
+
163
+ inline void* get() {
164
+ return data;
165
+ }
166
+
167
+ void release();
168
+
169
+ GpuResources* res;
170
+ int device;
171
+ cudaStream_t stream;
172
+ void* data;
173
+ size_t size;
181
174
  };
182
175
 
183
176
  /// Base class of GPU-side resource provider; hides provision of
184
177
  /// cuBLAS handles, CUDA streams and all device memory allocation performed
185
178
  class GpuResources {
186
- public:
187
- virtual ~GpuResources();
179
+ public:
180
+ virtual ~GpuResources();
188
181
 
189
- /// Call to pre-allocate resources for a particular device. If this is
190
- /// not called, then resources will be allocated at the first time
191
- /// of demand
192
- virtual void initializeForDevice(int device) = 0;
182
+ /// Call to pre-allocate resources for a particular device. If this is
183
+ /// not called, then resources will be allocated at the first time
184
+ /// of demand
185
+ virtual void initializeForDevice(int device) = 0;
193
186
 
194
- /// Returns the cuBLAS handle that we use for the given device
195
- virtual cublasHandle_t getBlasHandle(int device) = 0;
187
+ /// Returns the cuBLAS handle that we use for the given device
188
+ virtual cublasHandle_t getBlasHandle(int device) = 0;
196
189
 
197
- /// Returns the stream that we order all computation on for the
198
- /// given device
199
- virtual cudaStream_t getDefaultStream(int device) = 0;
190
+ /// Returns the stream that we order all computation on for the
191
+ /// given device
192
+ virtual cudaStream_t getDefaultStream(int device) = 0;
200
193
 
201
- /// Overrides the default stream for a device to the user-supplied stream. The
202
- /// resources object does not own this stream (i.e., it will not destroy it).
203
- virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
194
+ /// Overrides the default stream for a device to the user-supplied stream.
195
+ /// The resources object does not own this stream (i.e., it will not destroy
196
+ /// it).
197
+ virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
204
198
 
205
- /// Returns the set of alternative streams that we use for the given device
206
- virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
199
+ /// Returns the set of alternative streams that we use for the given device
200
+ virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
207
201
 
208
- /// Memory management
209
- /// Returns an allocation from the given memory space, ordered with respect to
210
- /// the given stream (i.e., the first user will be a kernel in this stream).
211
- /// All allocations are sized internally to be the next highest multiple of 16
212
- /// bytes, and all allocations returned are guaranteed to be 16 byte aligned.
213
- virtual void* allocMemory(const AllocRequest& req) = 0;
202
+ /// Memory management
203
+ /// Returns an allocation from the given memory space, ordered with respect
204
+ /// to the given stream (i.e., the first user will be a kernel in this
205
+ /// stream). All allocations are sized internally to be the next highest
206
+ /// multiple of 16 bytes, and all allocations returned are guaranteed to be
207
+ /// 16 byte aligned.
208
+ virtual void* allocMemory(const AllocRequest& req) = 0;
214
209
 
215
- /// Returns a previous allocation
216
- virtual void deallocMemory(int device, void* in) = 0;
210
+ /// Returns a previous allocation
211
+ virtual void deallocMemory(int device, void* in) = 0;
217
212
 
218
- /// For MemorySpace::Temporary, how much space is immediately available
219
- /// without cudaMalloc allocation?
220
- virtual size_t getTempMemoryAvailable(int device) const = 0;
213
+ /// For MemorySpace::Temporary, how much space is immediately available
214
+ /// without cudaMalloc allocation?
215
+ virtual size_t getTempMemoryAvailable(int device) const = 0;
221
216
 
222
- /// Returns the available CPU pinned memory buffer
223
- virtual std::pair<void*, size_t> getPinnedMemory() = 0;
217
+ /// Returns the available CPU pinned memory buffer
218
+ virtual std::pair<void*, size_t> getPinnedMemory() = 0;
224
219
 
225
- /// Returns the stream on which we perform async CPU <-> GPU copies
226
- virtual cudaStream_t getAsyncCopyStream(int device) = 0;
220
+ /// Returns the stream on which we perform async CPU <-> GPU copies
221
+ virtual cudaStream_t getAsyncCopyStream(int device) = 0;
227
222
 
228
- ///
229
- /// Functions provided by default
230
- ///
223
+ ///
224
+ /// Functions provided by default
225
+ ///
231
226
 
232
- /// Calls getBlasHandle with the current device
233
- cublasHandle_t getBlasHandleCurrentDevice();
227
+ /// Calls getBlasHandle with the current device
228
+ cublasHandle_t getBlasHandleCurrentDevice();
234
229
 
235
- /// Calls getDefaultStream with the current device
236
- cudaStream_t getDefaultStreamCurrentDevice();
230
+ /// Calls getDefaultStream with the current device
231
+ cudaStream_t getDefaultStreamCurrentDevice();
237
232
 
238
- /// Calls getTempMemoryAvailable with the current device
239
- size_t getTempMemoryAvailableCurrentDevice() const;
233
+ /// Calls getTempMemoryAvailable with the current device
234
+ size_t getTempMemoryAvailableCurrentDevice() const;
240
235
 
241
- /// Returns a temporary memory allocation via a RAII object
242
- GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
236
+ /// Returns a temporary memory allocation via a RAII object
237
+ GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
243
238
 
244
- /// Synchronizes the CPU with respect to the default stream for the
245
- /// given device
246
- // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
247
- void syncDefaultStream(int device);
239
+ /// Synchronizes the CPU with respect to the default stream for the
240
+ /// given device
241
+ // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
242
+ void syncDefaultStream(int device);
248
243
 
249
- /// Calls syncDefaultStream for the current device
250
- void syncDefaultStreamCurrentDevice();
244
+ /// Calls syncDefaultStream for the current device
245
+ void syncDefaultStreamCurrentDevice();
251
246
 
252
- /// Calls getAlternateStreams for the current device
253
- std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
247
+ /// Calls getAlternateStreams for the current device
248
+ std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
254
249
 
255
- /// Calls getAsyncCopyStream for the current device
256
- cudaStream_t getAsyncCopyStreamCurrentDevice();
250
+ /// Calls getAsyncCopyStream for the current device
251
+ cudaStream_t getAsyncCopyStreamCurrentDevice();
257
252
  };
258
253
 
259
254
  /// Interface for a provider of a shared resources object
260
255
  class GpuResourcesProvider {
261
- public:
262
- virtual ~GpuResourcesProvider();
256
+ public:
257
+ virtual ~GpuResourcesProvider();
263
258
 
264
- /// Returns the shared resources object
265
- virtual std::shared_ptr<GpuResources> getResources() = 0;
259
+ /// Returns the shared resources object
260
+ virtual std::shared_ptr<GpuResources> getResources() = 0;
266
261
  };
267
262
 
268
- } } // namespace
263
+ } // namespace gpu
264
+ } // namespace faiss