faiss 0.1.7 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -7
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +8 -2
  6. data/ext/faiss/index.cpp +102 -69
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +0 -5
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +26 -12
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -5,138 +5,138 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
10
  #include <faiss/gpu/GpuResources.h>
12
- #include <faiss/gpu/utils/StackDeviceMemory.h>
13
11
  #include <faiss/gpu/utils/DeviceUtils.h>
12
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
14
13
  #include <functional>
15
14
  #include <map>
16
15
  #include <unordered_map>
17
16
  #include <vector>
18
17
 
19
- namespace faiss { namespace gpu {
18
+ namespace faiss {
19
+ namespace gpu {
20
20
 
21
21
  /// Standard implementation of the GpuResources object that provides for a
22
22
  /// temporary memory manager
23
23
  class StandardGpuResourcesImpl : public GpuResources {
24
- public:
25
- StandardGpuResourcesImpl();
24
+ public:
25
+ StandardGpuResourcesImpl();
26
26
 
27
- ~StandardGpuResourcesImpl() override;
27
+ ~StandardGpuResourcesImpl() override;
28
28
 
29
- /// Disable allocation of temporary memory; all temporary memory
30
- /// requests will call cudaMalloc / cudaFree at the point of use
31
- void noTempMemory();
29
+ /// Disable allocation of temporary memory; all temporary memory
30
+ /// requests will call cudaMalloc / cudaFree at the point of use
31
+ void noTempMemory();
32
32
 
33
- /// Specify that we wish to use a certain fixed size of memory on
34
- /// all devices as temporary memory. This is the upper bound for the GPU
35
- /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
36
- /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
37
- /// To avoid any temporary memory allocation, pass 0.
38
- void setTempMemory(size_t size);
33
+ /// Specify that we wish to use a certain fixed size of memory on
34
+ /// all devices as temporary memory. This is the upper bound for the GPU
35
+ /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
36
+ /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
37
+ /// To avoid any temporary memory allocation, pass 0.
38
+ void setTempMemory(size_t size);
39
39
 
40
- /// Set amount of pinned memory to allocate, for async GPU <-> CPU
41
- /// transfers
42
- void setPinnedMemory(size_t size);
40
+ /// Set amount of pinned memory to allocate, for async GPU <-> CPU
41
+ /// transfers
42
+ void setPinnedMemory(size_t size);
43
43
 
44
- /// Called to change the stream for work ordering. We do not own `stream`;
45
- /// i.e., it will not be destroyed when the GpuResources object gets cleaned
46
- /// up.
47
- /// We are guaranteed that all Faiss GPU work is ordered with respect to
48
- /// this stream upon exit from an index or other Faiss GPU call.
49
- void setDefaultStream(int device, cudaStream_t stream) override;
44
+ /// Called to change the stream for work ordering. We do not own `stream`;
45
+ /// i.e., it will not be destroyed when the GpuResources object gets cleaned
46
+ /// up.
47
+ /// We are guaranteed that all Faiss GPU work is ordered with respect to
48
+ /// this stream upon exit from an index or other Faiss GPU call.
49
+ void setDefaultStream(int device, cudaStream_t stream) override;
50
50
 
51
- /// Revert the default stream to the original stream managed by this resources
52
- /// object, in case someone called `setDefaultStream`.
53
- void revertDefaultStream(int device);
51
+ /// Revert the default stream to the original stream managed by this
52
+ /// resources object, in case someone called `setDefaultStream`.
53
+ void revertDefaultStream(int device);
54
54
 
55
- /// Returns the stream for the given device on which all Faiss GPU work is
56
- /// ordered.
57
- /// We are guaranteed that all Faiss GPU work is ordered with respect to
58
- /// this stream upon exit from an index or other Faiss GPU call.
59
- cudaStream_t getDefaultStream(int device) override;
55
+ /// Returns the stream for the given device on which all Faiss GPU work is
56
+ /// ordered.
57
+ /// We are guaranteed that all Faiss GPU work is ordered with respect to
58
+ /// this stream upon exit from an index or other Faiss GPU call.
59
+ cudaStream_t getDefaultStream(int device) override;
60
60
 
61
- /// Called to change the work ordering streams to the null stream
62
- /// for all devices
63
- void setDefaultNullStreamAllDevices();
61
+ /// Called to change the work ordering streams to the null stream
62
+ /// for all devices
63
+ void setDefaultNullStreamAllDevices();
64
64
 
65
- /// If enabled, will print every GPU memory allocation and deallocation to
66
- /// standard output
67
- void setLogMemoryAllocations(bool enable);
65
+ /// If enabled, will print every GPU memory allocation and deallocation to
66
+ /// standard output
67
+ void setLogMemoryAllocations(bool enable);
68
68
 
69
- public:
70
- /// Internal system calls
69
+ public:
70
+ /// Internal system calls
71
71
 
72
- /// Initialize resources for this device
73
- void initializeForDevice(int device) override;
72
+ /// Initialize resources for this device
73
+ void initializeForDevice(int device) override;
74
74
 
75
- cublasHandle_t getBlasHandle(int device) override;
75
+ cublasHandle_t getBlasHandle(int device) override;
76
76
 
77
- std::vector<cudaStream_t> getAlternateStreams(int device) override;
77
+ std::vector<cudaStream_t> getAlternateStreams(int device) override;
78
78
 
79
- /// Allocate non-temporary GPU memory
80
- void* allocMemory(const AllocRequest& req) override;
79
+ /// Allocate non-temporary GPU memory
80
+ void* allocMemory(const AllocRequest& req) override;
81
81
 
82
- /// Returns a previous allocation
83
- void deallocMemory(int device, void* in) override;
82
+ /// Returns a previous allocation
83
+ void deallocMemory(int device, void* in) override;
84
84
 
85
- size_t getTempMemoryAvailable(int device) const override;
85
+ size_t getTempMemoryAvailable(int device) const override;
86
86
 
87
- /// Export a description of memory used for Python
88
- std::map<int, std::map<std::string, std::pair<int, size_t>>>
89
- getMemoryInfo() const;
87
+ /// Export a description of memory used for Python
88
+ std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
89
+ const;
90
90
 
91
- std::pair<void*, size_t> getPinnedMemory() override;
91
+ std::pair<void*, size_t> getPinnedMemory() override;
92
92
 
93
- cudaStream_t getAsyncCopyStream(int device) override;
93
+ cudaStream_t getAsyncCopyStream(int device) override;
94
94
 
95
- private:
96
- /// Have GPU resources been initialized for this device yet?
97
- bool isInitialized(int device) const;
95
+ private:
96
+ /// Have GPU resources been initialized for this device yet?
97
+ bool isInitialized(int device) const;
98
98
 
99
- /// Adjust the default temporary memory allocation based on the total GPU
100
- /// memory size
101
- static size_t getDefaultTempMemForGPU(int device, size_t requested);
99
+ /// Adjust the default temporary memory allocation based on the total GPU
100
+ /// memory size
101
+ static size_t getDefaultTempMemForGPU(int device, size_t requested);
102
102
 
103
- private:
104
- /// Set of currently outstanding memory allocations per device
105
- /// device -> (alloc request, allocated ptr)
106
- std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
103
+ private:
104
+ /// Set of currently outstanding memory allocations per device
105
+ /// device -> (alloc request, allocated ptr)
106
+ std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
107
107
 
108
- /// Temporary memory provider, per each device
109
- std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> tempMemory_;
108
+ /// Temporary memory provider, per each device
109
+ std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> tempMemory_;
110
110
 
111
- /// Our default stream that work is ordered on, one per each device
112
- std::unordered_map<int, cudaStream_t> defaultStreams_;
111
+ /// Our default stream that work is ordered on, one per each device
112
+ std::unordered_map<int, cudaStream_t> defaultStreams_;
113
113
 
114
- /// This contains particular streams as set by the user for
115
- /// ordering, if any
116
- std::unordered_map<int, cudaStream_t> userDefaultStreams_;
114
+ /// This contains particular streams as set by the user for
115
+ /// ordering, if any
116
+ std::unordered_map<int, cudaStream_t> userDefaultStreams_;
117
117
 
118
- /// Other streams we can use, per each device
119
- std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
118
+ /// Other streams we can use, per each device
119
+ std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
120
120
 
121
- /// Async copy stream to use for GPU <-> CPU pinned memory copies
122
- std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
121
+ /// Async copy stream to use for GPU <-> CPU pinned memory copies
122
+ std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
123
123
 
124
- /// cuBLAS handle for each device
125
- std::unordered_map<int, cublasHandle_t> blasHandles_;
124
+ /// cuBLAS handle for each device
125
+ std::unordered_map<int, cublasHandle_t> blasHandles_;
126
126
 
127
- /// Pinned memory allocation for use with this GPU
128
- void* pinnedMemAlloc_;
129
- size_t pinnedMemAllocSize_;
127
+ /// Pinned memory allocation for use with this GPU
128
+ void* pinnedMemAlloc_;
129
+ size_t pinnedMemAllocSize_;
130
130
 
131
- /// Another option is to use a specified amount of memory on all
132
- /// devices
133
- size_t tempMemSize_;
131
+ /// Another option is to use a specified amount of memory on all
132
+ /// devices
133
+ size_t tempMemSize_;
134
134
 
135
- /// Amount of pinned memory we should allocate
136
- size_t pinnedMemSize_;
135
+ /// Amount of pinned memory we should allocate
136
+ size_t pinnedMemSize_;
137
137
 
138
- /// Whether or not we log every GPU memory allocation and deallocation
139
- bool allocLogging_;
138
+ /// Whether or not we log every GPU memory allocation and deallocation
139
+ bool allocLogging_;
140
140
  };
141
141
 
142
142
  /// Default implementation of GpuResources that allocates a cuBLAS
@@ -144,61 +144,62 @@ class StandardGpuResourcesImpl : public GpuResources {
144
144
  /// Internally, the Faiss GPU code uses the instance managed by getResources,
145
145
  /// but this is the user-facing object that is internally reference counted.
146
146
  class StandardGpuResources : public GpuResourcesProvider {
147
- public:
148
- StandardGpuResources();
149
- ~StandardGpuResources() override;
147
+ public:
148
+ StandardGpuResources();
149
+ ~StandardGpuResources() override;
150
150
 
151
- std::shared_ptr<GpuResources> getResources() override;
151
+ std::shared_ptr<GpuResources> getResources() override;
152
152
 
153
- /// Disable allocation of temporary memory; all temporary memory
154
- /// requests will call cudaMalloc / cudaFree at the point of use
155
- void noTempMemory();
153
+ /// Disable allocation of temporary memory; all temporary memory
154
+ /// requests will call cudaMalloc / cudaFree at the point of use
155
+ void noTempMemory();
156
156
 
157
- /// Specify that we wish to use a certain fixed size of memory on
158
- /// all devices as temporary memory. This is the upper bound for the GPU
159
- /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
160
- /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
161
- /// To avoid any temporary memory allocation, pass 0.
162
- void setTempMemory(size_t size);
157
+ /// Specify that we wish to use a certain fixed size of memory on
158
+ /// all devices as temporary memory. This is the upper bound for the GPU
159
+ /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
160
+ /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
161
+ /// To avoid any temporary memory allocation, pass 0.
162
+ void setTempMemory(size_t size);
163
163
 
164
- /// Set amount of pinned memory to allocate, for async GPU <-> CPU
165
- /// transfers
166
- void setPinnedMemory(size_t size);
164
+ /// Set amount of pinned memory to allocate, for async GPU <-> CPU
165
+ /// transfers
166
+ void setPinnedMemory(size_t size);
167
167
 
168
- /// Called to change the stream for work ordering. We do not own `stream`;
169
- /// i.e., it will not be destroyed when the GpuResources object gets cleaned
170
- /// up.
171
- /// We are guaranteed that all Faiss GPU work is ordered with respect to
172
- /// this stream upon exit from an index or other Faiss GPU call.
173
- void setDefaultStream(int device, cudaStream_t stream);
168
+ /// Called to change the stream for work ordering. We do not own `stream`;
169
+ /// i.e., it will not be destroyed when the GpuResources object gets cleaned
170
+ /// up.
171
+ /// We are guaranteed that all Faiss GPU work is ordered with respect to
172
+ /// this stream upon exit from an index or other Faiss GPU call.
173
+ void setDefaultStream(int device, cudaStream_t stream);
174
174
 
175
- /// Revert the default stream to the original stream managed by this resources
176
- /// object, in case someone called `setDefaultStream`.
177
- void revertDefaultStream(int device);
175
+ /// Revert the default stream to the original stream managed by this
176
+ /// resources object, in case someone called `setDefaultStream`.
177
+ void revertDefaultStream(int device);
178
178
 
179
- /// Called to change the work ordering streams to the null stream
180
- /// for all devices
181
- void setDefaultNullStreamAllDevices();
179
+ /// Called to change the work ordering streams to the null stream
180
+ /// for all devices
181
+ void setDefaultNullStreamAllDevices();
182
182
 
183
- /// Export a description of memory used for Python
184
- std::map<int, std::map<std::string, std::pair<int, size_t>>>
185
- getMemoryInfo() const;
183
+ /// Export a description of memory used for Python
184
+ std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
185
+ const;
186
186
 
187
- /// Returns the current default stream
188
- cudaStream_t getDefaultStream(int device);
187
+ /// Returns the current default stream
188
+ cudaStream_t getDefaultStream(int device);
189
189
 
190
- /// Returns the current amount of temp memory available
191
- size_t getTempMemoryAvailable(int device) const;
190
+ /// Returns the current amount of temp memory available
191
+ size_t getTempMemoryAvailable(int device) const;
192
192
 
193
- /// Synchronize our default stream with the CPU
194
- void syncDefaultStreamCurrentDevice();
193
+ /// Synchronize our default stream with the CPU
194
+ void syncDefaultStreamCurrentDevice();
195
195
 
196
- /// If enabled, will print every GPU memory allocation and deallocation to
197
- /// standard output
198
- void setLogMemoryAllocations(bool enable);
196
+ /// If enabled, will print every GPU memory allocation and deallocation to
197
+ /// standard output
198
+ void setLogMemoryAllocations(bool enable);
199
199
 
200
- private:
201
- std::shared_ptr<StandardGpuResourcesImpl> res_;
200
+ private:
201
+ std::shared_ptr<StandardGpuResourcesImpl> res_;
202
202
  };
203
203
 
204
- } } // namespace
204
+ } // namespace gpu
205
+ } // namespace faiss