faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -5,138 +5,138 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
10
  #include <faiss/gpu/GpuResources.h>
12
- #include <faiss/gpu/utils/StackDeviceMemory.h>
13
11
  #include <faiss/gpu/utils/DeviceUtils.h>
12
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
14
13
  #include <functional>
15
14
  #include <map>
16
15
  #include <unordered_map>
17
16
  #include <vector>
18
17
 
19
- namespace faiss { namespace gpu {
18
+ namespace faiss {
19
+ namespace gpu {
20
20
 
21
21
  /// Standard implementation of the GpuResources object that provides for a
22
22
  /// temporary memory manager
23
23
  class StandardGpuResourcesImpl : public GpuResources {
24
- public:
25
- StandardGpuResourcesImpl();
24
+ public:
25
+ StandardGpuResourcesImpl();
26
26
 
27
- ~StandardGpuResourcesImpl() override;
27
+ ~StandardGpuResourcesImpl() override;
28
28
 
29
- /// Disable allocation of temporary memory; all temporary memory
30
- /// requests will call cudaMalloc / cudaFree at the point of use
31
- void noTempMemory();
29
+ /// Disable allocation of temporary memory; all temporary memory
30
+ /// requests will call cudaMalloc / cudaFree at the point of use
31
+ void noTempMemory();
32
32
 
33
- /// Specify that we wish to use a certain fixed size of memory on
34
- /// all devices as temporary memory. This is the upper bound for the GPU
35
- /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
36
- /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
37
- /// To avoid any temporary memory allocation, pass 0.
38
- void setTempMemory(size_t size);
33
+ /// Specify that we wish to use a certain fixed size of memory on
34
+ /// all devices as temporary memory. This is the upper bound for the GPU
35
+ /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
36
+ /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
37
+ /// To avoid any temporary memory allocation, pass 0.
38
+ void setTempMemory(size_t size);
39
39
 
40
- /// Set amount of pinned memory to allocate, for async GPU <-> CPU
41
- /// transfers
42
- void setPinnedMemory(size_t size);
40
+ /// Set amount of pinned memory to allocate, for async GPU <-> CPU
41
+ /// transfers
42
+ void setPinnedMemory(size_t size);
43
43
 
44
- /// Called to change the stream for work ordering. We do not own `stream`;
45
- /// i.e., it will not be destroyed when the GpuResources object gets cleaned
46
- /// up.
47
- /// We are guaranteed that all Faiss GPU work is ordered with respect to
48
- /// this stream upon exit from an index or other Faiss GPU call.
49
- void setDefaultStream(int device, cudaStream_t stream) override;
44
+ /// Called to change the stream for work ordering. We do not own `stream`;
45
+ /// i.e., it will not be destroyed when the GpuResources object gets cleaned
46
+ /// up.
47
+ /// We are guaranteed that all Faiss GPU work is ordered with respect to
48
+ /// this stream upon exit from an index or other Faiss GPU call.
49
+ void setDefaultStream(int device, cudaStream_t stream) override;
50
50
 
51
- /// Revert the default stream to the original stream managed by this resources
52
- /// object, in case someone called `setDefaultStream`.
53
- void revertDefaultStream(int device);
51
+ /// Revert the default stream to the original stream managed by this
52
+ /// resources object, in case someone called `setDefaultStream`.
53
+ void revertDefaultStream(int device);
54
54
 
55
- /// Returns the stream for the given device on which all Faiss GPU work is
56
- /// ordered.
57
- /// We are guaranteed that all Faiss GPU work is ordered with respect to
58
- /// this stream upon exit from an index or other Faiss GPU call.
59
- cudaStream_t getDefaultStream(int device) override;
55
+ /// Returns the stream for the given device on which all Faiss GPU work is
56
+ /// ordered.
57
+ /// We are guaranteed that all Faiss GPU work is ordered with respect to
58
+ /// this stream upon exit from an index or other Faiss GPU call.
59
+ cudaStream_t getDefaultStream(int device) override;
60
60
 
61
- /// Called to change the work ordering streams to the null stream
62
- /// for all devices
63
- void setDefaultNullStreamAllDevices();
61
+ /// Called to change the work ordering streams to the null stream
62
+ /// for all devices
63
+ void setDefaultNullStreamAllDevices();
64
64
 
65
- /// If enabled, will print every GPU memory allocation and deallocation to
66
- /// standard output
67
- void setLogMemoryAllocations(bool enable);
65
+ /// If enabled, will print every GPU memory allocation and deallocation to
66
+ /// standard output
67
+ void setLogMemoryAllocations(bool enable);
68
68
 
69
- public:
70
- /// Internal system calls
69
+ public:
70
+ /// Internal system calls
71
71
 
72
- /// Initialize resources for this device
73
- void initializeForDevice(int device) override;
72
+ /// Initialize resources for this device
73
+ void initializeForDevice(int device) override;
74
74
 
75
- cublasHandle_t getBlasHandle(int device) override;
75
+ cublasHandle_t getBlasHandle(int device) override;
76
76
 
77
- std::vector<cudaStream_t> getAlternateStreams(int device) override;
77
+ std::vector<cudaStream_t> getAlternateStreams(int device) override;
78
78
 
79
- /// Allocate non-temporary GPU memory
80
- void* allocMemory(const AllocRequest& req) override;
79
+ /// Allocate non-temporary GPU memory
80
+ void* allocMemory(const AllocRequest& req) override;
81
81
 
82
- /// Returns a previous allocation
83
- void deallocMemory(int device, void* in) override;
82
+ /// Returns a previous allocation
83
+ void deallocMemory(int device, void* in) override;
84
84
 
85
- size_t getTempMemoryAvailable(int device) const override;
85
+ size_t getTempMemoryAvailable(int device) const override;
86
86
 
87
- /// Export a description of memory used for Python
88
- std::map<int, std::map<std::string, std::pair<int, size_t>>>
89
- getMemoryInfo() const;
87
+ /// Export a description of memory used for Python
88
+ std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
89
+ const;
90
90
 
91
- std::pair<void*, size_t> getPinnedMemory() override;
91
+ std::pair<void*, size_t> getPinnedMemory() override;
92
92
 
93
- cudaStream_t getAsyncCopyStream(int device) override;
93
+ cudaStream_t getAsyncCopyStream(int device) override;
94
94
 
95
- private:
96
- /// Have GPU resources been initialized for this device yet?
97
- bool isInitialized(int device) const;
95
+ private:
96
+ /// Have GPU resources been initialized for this device yet?
97
+ bool isInitialized(int device) const;
98
98
 
99
- /// Adjust the default temporary memory allocation based on the total GPU
100
- /// memory size
101
- static size_t getDefaultTempMemForGPU(int device, size_t requested);
99
+ /// Adjust the default temporary memory allocation based on the total GPU
100
+ /// memory size
101
+ static size_t getDefaultTempMemForGPU(int device, size_t requested);
102
102
 
103
- private:
104
- /// Set of currently outstanding memory allocations per device
105
- /// device -> (alloc request, allocated ptr)
106
- std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
103
+ private:
104
+ /// Set of currently outstanding memory allocations per device
105
+ /// device -> (alloc request, allocated ptr)
106
+ std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
107
107
 
108
- /// Temporary memory provider, per each device
109
- std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> tempMemory_;
108
+ /// Temporary memory provider, per each device
109
+ std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> tempMemory_;
110
110
 
111
- /// Our default stream that work is ordered on, one per each device
112
- std::unordered_map<int, cudaStream_t> defaultStreams_;
111
+ /// Our default stream that work is ordered on, one per each device
112
+ std::unordered_map<int, cudaStream_t> defaultStreams_;
113
113
 
114
- /// This contains particular streams as set by the user for
115
- /// ordering, if any
116
- std::unordered_map<int, cudaStream_t> userDefaultStreams_;
114
+ /// This contains particular streams as set by the user for
115
+ /// ordering, if any
116
+ std::unordered_map<int, cudaStream_t> userDefaultStreams_;
117
117
 
118
- /// Other streams we can use, per each device
119
- std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
118
+ /// Other streams we can use, per each device
119
+ std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
120
120
 
121
- /// Async copy stream to use for GPU <-> CPU pinned memory copies
122
- std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
121
+ /// Async copy stream to use for GPU <-> CPU pinned memory copies
122
+ std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
123
123
 
124
- /// cuBLAS handle for each device
125
- std::unordered_map<int, cublasHandle_t> blasHandles_;
124
+ /// cuBLAS handle for each device
125
+ std::unordered_map<int, cublasHandle_t> blasHandles_;
126
126
 
127
- /// Pinned memory allocation for use with this GPU
128
- void* pinnedMemAlloc_;
129
- size_t pinnedMemAllocSize_;
127
+ /// Pinned memory allocation for use with this GPU
128
+ void* pinnedMemAlloc_;
129
+ size_t pinnedMemAllocSize_;
130
130
 
131
- /// Another option is to use a specified amount of memory on all
132
- /// devices
133
- size_t tempMemSize_;
131
+ /// Another option is to use a specified amount of memory on all
132
+ /// devices
133
+ size_t tempMemSize_;
134
134
 
135
- /// Amount of pinned memory we should allocate
136
- size_t pinnedMemSize_;
135
+ /// Amount of pinned memory we should allocate
136
+ size_t pinnedMemSize_;
137
137
 
138
- /// Whether or not we log every GPU memory allocation and deallocation
139
- bool allocLogging_;
138
+ /// Whether or not we log every GPU memory allocation and deallocation
139
+ bool allocLogging_;
140
140
  };
141
141
 
142
142
  /// Default implementation of GpuResources that allocates a cuBLAS
@@ -144,61 +144,62 @@ class StandardGpuResourcesImpl : public GpuResources {
144
144
  /// Internally, the Faiss GPU code uses the instance managed by getResources,
145
145
  /// but this is the user-facing object that is internally reference counted.
146
146
  class StandardGpuResources : public GpuResourcesProvider {
147
- public:
148
- StandardGpuResources();
149
- ~StandardGpuResources() override;
147
+ public:
148
+ StandardGpuResources();
149
+ ~StandardGpuResources() override;
150
150
 
151
- std::shared_ptr<GpuResources> getResources() override;
151
+ std::shared_ptr<GpuResources> getResources() override;
152
152
 
153
- /// Disable allocation of temporary memory; all temporary memory
154
- /// requests will call cudaMalloc / cudaFree at the point of use
155
- void noTempMemory();
153
+ /// Disable allocation of temporary memory; all temporary memory
154
+ /// requests will call cudaMalloc / cudaFree at the point of use
155
+ void noTempMemory();
156
156
 
157
- /// Specify that we wish to use a certain fixed size of memory on
158
- /// all devices as temporary memory. This is the upper bound for the GPU
159
- /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
160
- /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
161
- /// To avoid any temporary memory allocation, pass 0.
162
- void setTempMemory(size_t size);
157
+ /// Specify that we wish to use a certain fixed size of memory on
158
+ /// all devices as temporary memory. This is the upper bound for the GPU
159
+ /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
160
+ /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
161
+ /// To avoid any temporary memory allocation, pass 0.
162
+ void setTempMemory(size_t size);
163
163
 
164
- /// Set amount of pinned memory to allocate, for async GPU <-> CPU
165
- /// transfers
166
- void setPinnedMemory(size_t size);
164
+ /// Set amount of pinned memory to allocate, for async GPU <-> CPU
165
+ /// transfers
166
+ void setPinnedMemory(size_t size);
167
167
 
168
- /// Called to change the stream for work ordering. We do not own `stream`;
169
- /// i.e., it will not be destroyed when the GpuResources object gets cleaned
170
- /// up.
171
- /// We are guaranteed that all Faiss GPU work is ordered with respect to
172
- /// this stream upon exit from an index or other Faiss GPU call.
173
- void setDefaultStream(int device, cudaStream_t stream);
168
+ /// Called to change the stream for work ordering. We do not own `stream`;
169
+ /// i.e., it will not be destroyed when the GpuResources object gets cleaned
170
+ /// up.
171
+ /// We are guaranteed that all Faiss GPU work is ordered with respect to
172
+ /// this stream upon exit from an index or other Faiss GPU call.
173
+ void setDefaultStream(int device, cudaStream_t stream);
174
174
 
175
- /// Revert the default stream to the original stream managed by this resources
176
- /// object, in case someone called `setDefaultStream`.
177
- void revertDefaultStream(int device);
175
+ /// Revert the default stream to the original stream managed by this
176
+ /// resources object, in case someone called `setDefaultStream`.
177
+ void revertDefaultStream(int device);
178
178
 
179
- /// Called to change the work ordering streams to the null stream
180
- /// for all devices
181
- void setDefaultNullStreamAllDevices();
179
+ /// Called to change the work ordering streams to the null stream
180
+ /// for all devices
181
+ void setDefaultNullStreamAllDevices();
182
182
 
183
- /// Export a description of memory used for Python
184
- std::map<int, std::map<std::string, std::pair<int, size_t>>>
185
- getMemoryInfo() const;
183
+ /// Export a description of memory used for Python
184
+ std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
185
+ const;
186
186
 
187
- /// Returns the current default stream
188
- cudaStream_t getDefaultStream(int device);
187
+ /// Returns the current default stream
188
+ cudaStream_t getDefaultStream(int device);
189
189
 
190
- /// Returns the current amount of temp memory available
191
- size_t getTempMemoryAvailable(int device) const;
190
+ /// Returns the current amount of temp memory available
191
+ size_t getTempMemoryAvailable(int device) const;
192
192
 
193
- /// Synchronize our default stream with the CPU
194
- void syncDefaultStreamCurrentDevice();
193
+ /// Synchronize our default stream with the CPU
194
+ void syncDefaultStreamCurrentDevice();
195
195
 
196
- /// If enabled, will print every GPU memory allocation and deallocation to
197
- /// standard output
198
- void setLogMemoryAllocations(bool enable);
196
+ /// If enabled, will print every GPU memory allocation and deallocation to
197
+ /// standard output
198
+ void setLogMemoryAllocations(bool enable);
199
199
 
200
- private:
201
- std::shared_ptr<StandardGpuResourcesImpl> res_;
200
+ private:
201
+ std::shared_ptr<StandardGpuResourcesImpl> res_;
202
202
  };
203
203
 
204
- } } // namespace
204
+ } // namespace gpu
205
+ } // namespace faiss