faiss 0.1.7 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -7
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +8 -2
  6. data/ext/faiss/index.cpp +102 -69
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +0 -5
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +26 -12
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -5,55 +5,59 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
- #include <faiss/impl/FaissAssert.h>
12
- #include <cuda_runtime.h>
13
10
  #include <cublas_v2.h>
11
+ #include <cuda_runtime.h>
12
+ #include <faiss/impl/FaissAssert.h>
14
13
  #include <memory>
15
14
  #include <utility>
16
15
  #include <vector>
17
16
 
18
- namespace faiss { namespace gpu {
17
+ namespace faiss {
18
+ namespace gpu {
19
19
 
20
20
  class GpuResources;
21
21
 
22
22
  enum AllocType {
23
- /// Unknown allocation type or miscellaneous (not currently categorized)
24
- Other = 0,
25
-
26
- /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
27
- /// vector norms if needed)
28
- FlatData = 1,
29
-
30
- /// Primary data storage for GpuIndexIVF* (the storage for each individual IVF
31
- /// list)
32
- IVFLists = 2,
33
-
34
- /// Quantizer (PQ, SQ) dictionary information
35
- Quantizer = 3,
36
-
37
- /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
38
- /// require the use of possibly large tables. These are marked separately from
39
- /// Quantizer as these can frequently be 100s - 1000s of MiB in size
40
- QuantizerPrecomputedCodes = 4,
41
-
42
- ///
43
- /// StandardGpuResources implementation specific types
44
- ///
45
-
46
- /// When using StandardGpuResources, temporary memory allocations
47
- /// (MemorySpace::Temporary) come out of a stack region of memory that is
48
- /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization). This
49
- /// allocation by StandardGpuResources is marked with this AllocType.
50
- TemporaryMemoryBuffer = 10,
51
-
52
- /// When using StandardGpuResources, any MemorySpace::Temporary allocations
53
- /// that cannot be satisfied within the TemporaryMemoryBuffer region fall back
54
- /// to calling cudaMalloc which are sized to just the request at hand. These
55
- /// "overflow" temporary allocations are marked with this AllocType.
56
- TemporaryMemoryOverflow = 11,
23
+ /// Unknown allocation type or miscellaneous (not currently categorized)
24
+ Other = 0,
25
+
26
+ /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
27
+ /// vector norms if needed)
28
+ FlatData = 1,
29
+
30
+ /// Primary data storage for GpuIndexIVF* (the storage for each individual
31
+ /// IVF
32
+ /// list)
33
+ IVFLists = 2,
34
+
35
+ /// Quantizer (PQ, SQ) dictionary information
36
+ Quantizer = 3,
37
+
38
+ /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
39
+ /// require the use of possibly large tables. These are marked separately
40
+ /// from
41
+ /// Quantizer as these can frequently be 100s - 1000s of MiB in size
42
+ QuantizerPrecomputedCodes = 4,
43
+
44
+ ///
45
+ /// StandardGpuResources implementation specific types
46
+ ///
47
+
48
+ /// When using StandardGpuResources, temporary memory allocations
49
+ /// (MemorySpace::Temporary) come out of a stack region of memory that is
50
+ /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization).
51
+ /// This
52
+ /// allocation by StandardGpuResources is marked with this AllocType.
53
+ TemporaryMemoryBuffer = 10,
54
+
55
+ /// When using StandardGpuResources, any MemorySpace::Temporary allocations
56
+ /// that cannot be satisfied within the TemporaryMemoryBuffer region fall
57
+ /// back
58
+ /// to calling cudaMalloc which are sized to just the request at hand. These
59
+ /// "overflow" temporary allocations are marked with this AllocType.
60
+ TemporaryMemoryOverflow = 11,
57
61
  };
58
62
 
59
63
  /// Convert an AllocType to string
@@ -61,16 +65,17 @@ std::string allocTypeToString(AllocType t);
61
65
 
62
66
  /// Memory regions accessible to the GPU
63
67
  enum MemorySpace {
64
- /// Temporary device memory (guaranteed to no longer be used upon exit of a
65
- /// top-level index call, and where the streams using it have completed GPU
66
- /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
67
- Temporary = 0,
68
+ /// Temporary device memory (guaranteed to no longer be used upon exit of a
69
+ /// top-level index call, and where the streams using it have completed GPU
70
+ /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
71
+ Temporary = 0,
68
72
 
69
- /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
70
- Device = 1,
73
+ /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
74
+ Device = 1,
71
75
 
72
- /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU memory)
73
- Unified = 2,
76
+ /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU
77
+ /// memory)
78
+ Unified = 2,
74
79
  };
75
80
 
76
81
  /// Convert a MemorySpace to string
@@ -78,44 +83,36 @@ std::string memorySpaceToString(MemorySpace s);
78
83
 
79
84
  /// Information on what/where an allocation is
80
85
  struct AllocInfo {
81
- inline AllocInfo()
82
- : type(AllocType::Other),
83
- device(0),
84
- space(MemorySpace::Device),
85
- stream(nullptr) {
86
- }
87
-
88
- inline AllocInfo(AllocType at,
89
- int dev,
90
- MemorySpace sp,
91
- cudaStream_t st)
92
- : type(at),
93
- device(dev),
94
- space(sp),
95
- stream(st) {
96
- }
97
-
98
- /// Returns a string representation of this info
99
- std::string toString() const;
100
-
101
- /// The internal category of the allocation
102
- AllocType type;
103
-
104
- /// The device on which the allocation is happening
105
- int device;
106
-
107
- /// The memory space of the allocation
108
- MemorySpace space;
109
-
110
- /// The stream on which new work on the memory will be ordered (e.g., if a
111
- /// piece of memory cached and to be returned for this call was last used on
112
- /// stream 3 and a new memory request is for stream 4, the memory manager will
113
- /// synchronize stream 4 to wait for the completion of stream 3 via events or
114
- /// other stream synchronization.
115
- ///
116
- /// The memory manager guarantees that the returned memory is free to use
117
- /// without data races on this stream specified.
118
- cudaStream_t stream;
86
+ inline AllocInfo()
87
+ : type(AllocType::Other),
88
+ device(0),
89
+ space(MemorySpace::Device),
90
+ stream(nullptr) {}
91
+
92
+ inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
93
+ : type(at), device(dev), space(sp), stream(st) {}
94
+
95
+ /// Returns a string representation of this info
96
+ std::string toString() const;
97
+
98
+ /// The internal category of the allocation
99
+ AllocType type;
100
+
101
+ /// The device on which the allocation is happening
102
+ int device;
103
+
104
+ /// The memory space of the allocation
105
+ MemorySpace space;
106
+
107
+ /// The stream on which new work on the memory will be ordered (e.g., if a
108
+ /// piece of memory cached and to be returned for this call was last used on
109
+ /// stream 3 and a new memory request is for stream 4, the memory manager
110
+ /// will synchronize stream 4 to wait for the completion of stream 3 via
111
+ /// events or other stream synchronization.
112
+ ///
113
+ /// The memory manager guarantees that the returned memory is free to use
114
+ /// without data races on this stream specified.
115
+ cudaStream_t stream;
119
116
  };
120
117
 
121
118
  /// Create an AllocInfo for the current device with MemorySpace::Device
@@ -129,140 +126,139 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
129
126
 
130
127
  /// Information on what/where an allocation is, along with how big it should be
131
128
  struct AllocRequest : public AllocInfo {
132
- inline AllocRequest()
133
- : AllocInfo(),
134
- size(0) {
135
- }
136
-
137
- inline AllocRequest(const AllocInfo& info,
138
- size_t sz)
139
- : AllocInfo(info),
140
- size(sz) {
141
- }
142
-
143
- inline AllocRequest(AllocType at,
144
- int dev,
145
- MemorySpace sp,
146
- cudaStream_t st,
147
- size_t sz)
148
- : AllocInfo(at, dev, sp, st),
149
- size(sz) {
150
- }
151
-
152
- /// Returns a string representation of this request
153
- std::string toString() const;
154
-
155
- /// The size in bytes of the allocation
156
- size_t size;
129
+ inline AllocRequest() : AllocInfo(), size(0) {}
130
+
131
+ inline AllocRequest(const AllocInfo& info, size_t sz)
132
+ : AllocInfo(info), size(sz) {}
133
+
134
+ inline AllocRequest(
135
+ AllocType at,
136
+ int dev,
137
+ MemorySpace sp,
138
+ cudaStream_t st,
139
+ size_t sz)
140
+ : AllocInfo(at, dev, sp, st), size(sz) {}
141
+
142
+ /// Returns a string representation of this request
143
+ std::string toString() const;
144
+
145
+ /// The size in bytes of the allocation
146
+ size_t size;
157
147
  };
158
148
 
159
149
  /// A RAII object that manages a temporary memory request
160
150
  struct GpuMemoryReservation {
161
- GpuMemoryReservation();
162
- GpuMemoryReservation(GpuResources* r,
163
- int dev,
164
- cudaStream_t str,
165
- void* p,
166
- size_t sz);
167
- GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
168
- ~GpuMemoryReservation();
169
-
170
- GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
171
-
172
- inline void* get() { return data; }
173
-
174
- void release();
175
-
176
- GpuResources* res;
177
- int device;
178
- cudaStream_t stream;
179
- void* data;
180
- size_t size;
151
+ GpuMemoryReservation();
152
+ GpuMemoryReservation(
153
+ GpuResources* r,
154
+ int dev,
155
+ cudaStream_t str,
156
+ void* p,
157
+ size_t sz);
158
+ GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
159
+ ~GpuMemoryReservation();
160
+
161
+ GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
162
+
163
+ inline void* get() {
164
+ return data;
165
+ }
166
+
167
+ void release();
168
+
169
+ GpuResources* res;
170
+ int device;
171
+ cudaStream_t stream;
172
+ void* data;
173
+ size_t size;
181
174
  };
182
175
 
183
176
  /// Base class of GPU-side resource provider; hides provision of
184
177
  /// cuBLAS handles, CUDA streams and all device memory allocation performed
185
178
  class GpuResources {
186
- public:
187
- virtual ~GpuResources();
179
+ public:
180
+ virtual ~GpuResources();
188
181
 
189
- /// Call to pre-allocate resources for a particular device. If this is
190
- /// not called, then resources will be allocated at the first time
191
- /// of demand
192
- virtual void initializeForDevice(int device) = 0;
182
+ /// Call to pre-allocate resources for a particular device. If this is
183
+ /// not called, then resources will be allocated at the first time
184
+ /// of demand
185
+ virtual void initializeForDevice(int device) = 0;
193
186
 
194
- /// Returns the cuBLAS handle that we use for the given device
195
- virtual cublasHandle_t getBlasHandle(int device) = 0;
187
+ /// Returns the cuBLAS handle that we use for the given device
188
+ virtual cublasHandle_t getBlasHandle(int device) = 0;
196
189
 
197
- /// Returns the stream that we order all computation on for the
198
- /// given device
199
- virtual cudaStream_t getDefaultStream(int device) = 0;
190
+ /// Returns the stream that we order all computation on for the
191
+ /// given device
192
+ virtual cudaStream_t getDefaultStream(int device) = 0;
200
193
 
201
- /// Overrides the default stream for a device to the user-supplied stream. The
202
- /// resources object does not own this stream (i.e., it will not destroy it).
203
- virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
194
+ /// Overrides the default stream for a device to the user-supplied stream.
195
+ /// The resources object does not own this stream (i.e., it will not destroy
196
+ /// it).
197
+ virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
204
198
 
205
- /// Returns the set of alternative streams that we use for the given device
206
- virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
199
+ /// Returns the set of alternative streams that we use for the given device
200
+ virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
207
201
 
208
- /// Memory management
209
- /// Returns an allocation from the given memory space, ordered with respect to
210
- /// the given stream (i.e., the first user will be a kernel in this stream).
211
- /// All allocations are sized internally to be the next highest multiple of 16
212
- /// bytes, and all allocations returned are guaranteed to be 16 byte aligned.
213
- virtual void* allocMemory(const AllocRequest& req) = 0;
202
+ /// Memory management
203
+ /// Returns an allocation from the given memory space, ordered with respect
204
+ /// to the given stream (i.e., the first user will be a kernel in this
205
+ /// stream). All allocations are sized internally to be the next highest
206
+ /// multiple of 16 bytes, and all allocations returned are guaranteed to be
207
+ /// 16 byte aligned.
208
+ virtual void* allocMemory(const AllocRequest& req) = 0;
214
209
 
215
- /// Returns a previous allocation
216
- virtual void deallocMemory(int device, void* in) = 0;
210
+ /// Returns a previous allocation
211
+ virtual void deallocMemory(int device, void* in) = 0;
217
212
 
218
- /// For MemorySpace::Temporary, how much space is immediately available
219
- /// without cudaMalloc allocation?
220
- virtual size_t getTempMemoryAvailable(int device) const = 0;
213
+ /// For MemorySpace::Temporary, how much space is immediately available
214
+ /// without cudaMalloc allocation?
215
+ virtual size_t getTempMemoryAvailable(int device) const = 0;
221
216
 
222
- /// Returns the available CPU pinned memory buffer
223
- virtual std::pair<void*, size_t> getPinnedMemory() = 0;
217
+ /// Returns the available CPU pinned memory buffer
218
+ virtual std::pair<void*, size_t> getPinnedMemory() = 0;
224
219
 
225
- /// Returns the stream on which we perform async CPU <-> GPU copies
226
- virtual cudaStream_t getAsyncCopyStream(int device) = 0;
220
+ /// Returns the stream on which we perform async CPU <-> GPU copies
221
+ virtual cudaStream_t getAsyncCopyStream(int device) = 0;
227
222
 
228
- ///
229
- /// Functions provided by default
230
- ///
223
+ ///
224
+ /// Functions provided by default
225
+ ///
231
226
 
232
- /// Calls getBlasHandle with the current device
233
- cublasHandle_t getBlasHandleCurrentDevice();
227
+ /// Calls getBlasHandle with the current device
228
+ cublasHandle_t getBlasHandleCurrentDevice();
234
229
 
235
- /// Calls getDefaultStream with the current device
236
- cudaStream_t getDefaultStreamCurrentDevice();
230
+ /// Calls getDefaultStream with the current device
231
+ cudaStream_t getDefaultStreamCurrentDevice();
237
232
 
238
- /// Calls getTempMemoryAvailable with the current device
239
- size_t getTempMemoryAvailableCurrentDevice() const;
233
+ /// Calls getTempMemoryAvailable with the current device
234
+ size_t getTempMemoryAvailableCurrentDevice() const;
240
235
 
241
- /// Returns a temporary memory allocation via a RAII object
242
- GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
236
+ /// Returns a temporary memory allocation via a RAII object
237
+ GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
243
238
 
244
- /// Synchronizes the CPU with respect to the default stream for the
245
- /// given device
246
- // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
247
- void syncDefaultStream(int device);
239
+ /// Synchronizes the CPU with respect to the default stream for the
240
+ /// given device
241
+ // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
242
+ void syncDefaultStream(int device);
248
243
 
249
- /// Calls syncDefaultStream for the current device
250
- void syncDefaultStreamCurrentDevice();
244
+ /// Calls syncDefaultStream for the current device
245
+ void syncDefaultStreamCurrentDevice();
251
246
 
252
- /// Calls getAlternateStreams for the current device
253
- std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
247
+ /// Calls getAlternateStreams for the current device
248
+ std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
254
249
 
255
- /// Calls getAsyncCopyStream for the current device
256
- cudaStream_t getAsyncCopyStreamCurrentDevice();
250
+ /// Calls getAsyncCopyStream for the current device
251
+ cudaStream_t getAsyncCopyStreamCurrentDevice();
257
252
  };
258
253
 
259
254
  /// Interface for a provider of a shared resources object
260
255
  class GpuResourcesProvider {
261
- public:
262
- virtual ~GpuResourcesProvider();
256
+ public:
257
+ virtual ~GpuResourcesProvider();
263
258
 
264
- /// Returns the shared resources object
265
- virtual std::shared_ptr<GpuResources> getResources() = 0;
259
+ /// Returns the shared resources object
260
+ virtual std::shared_ptr<GpuResources> getResources() = 0;
266
261
  };
267
262
 
268
- } } // namespace
263
+ } // namespace gpu
264
+ } // namespace faiss