faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/gpu/GpuIndexIVF.h>
12
+ #include <faiss/IndexScalarQuantizer.h>
13
+
14
+ namespace faiss { namespace gpu {
15
+
16
+ class IVFFlat;
17
+ class GpuIndexFlat;
18
+
19
+ struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
20
+ };
21
+
22
+ /// Wrapper around the GPU implementation that looks like
23
+ /// faiss::IndexIVFScalarQuantizer
24
+ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
25
+ public:
26
+ /// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance,
27
+ /// copying data over to the given GPU, if the input index is trained.
28
+ GpuIndexIVFScalarQuantizer(
29
+ GpuResources* resources,
30
+ const faiss::IndexIVFScalarQuantizer* index,
31
+ GpuIndexIVFScalarQuantizerConfig config =
32
+ GpuIndexIVFScalarQuantizerConfig());
33
+
34
+ /// Constructs a new instance with an empty flat quantizer; the user
35
+ /// provides the number of lists desired.
36
+ GpuIndexIVFScalarQuantizer(
37
+ GpuResources* resources,
38
+ int dims,
39
+ int nlist,
40
+ faiss::ScalarQuantizer::QuantizerType qtype,
41
+ faiss::MetricType metric = MetricType::METRIC_L2,
42
+ bool encodeResidual = true,
43
+ GpuIndexIVFScalarQuantizerConfig config =
44
+ GpuIndexIVFScalarQuantizerConfig());
45
+
46
+ ~GpuIndexIVFScalarQuantizer() override;
47
+
48
+ /// Reserve GPU memory in our inverted lists for this number of vectors
49
+ void reserveMemory(size_t numVecs);
50
+
51
+ /// Initialize ourselves from the given CPU index; will overwrite
52
+ /// all data in ourselves
53
+ void copyFrom(const faiss::IndexIVFScalarQuantizer* index);
54
+
55
+ /// Copy ourselves to the given CPU index; will overwrite all data
56
+ /// in the index instance
57
+ void copyTo(faiss::IndexIVFScalarQuantizer* index) const;
58
+
59
+ /// After adding vectors, one can call this to reclaim device memory
60
+ /// to exactly the amount needed. Returns space reclaimed in bytes
61
+ size_t reclaimMemory();
62
+
63
+ void reset() override;
64
+
65
+ void train(Index::idx_t n, const float* x) override;
66
+
67
+ protected:
68
+ /// Called from GpuIndex for add/add_with_ids
69
+ void addImpl_(int n,
70
+ const float* x,
71
+ const Index::idx_t* ids) override;
72
+
73
+ /// Called from GpuIndex for search
74
+ void searchImpl_(int n,
75
+ const float* x,
76
+ int k,
77
+ float* distances,
78
+ Index::idx_t* labels) const override;
79
+
80
+ /// Called from train to handle SQ residual training
81
+ void trainResiduals_(Index::idx_t n, const float* x);
82
+
83
+ public:
84
+ /// Exposed like the CPU version
85
+ faiss::ScalarQuantizer sq;
86
+
87
+ /// Exposed like the CPU version
88
+ bool by_residual;
89
+
90
+ private:
91
+ GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
92
+
93
+ /// Desired inverted list memory reservation
94
+ size_t reserveMemoryVecs_;
95
+
96
+ /// Instance that we own; contains the inverted list
97
+ IVFFlat* index_;
98
+ };
99
+
100
+ } } // namespace
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ namespace faiss { namespace gpu {
12
+
13
+ /// How user vector index data is stored on the GPU
14
+ enum IndicesOptions {
15
+ /// The user indices are only stored on the CPU; the GPU returns
16
+ /// (inverted list, offset) to the CPU which is then translated to
17
+ /// the real user index.
18
+ INDICES_CPU = 0,
19
+ /// The indices are not stored at all, on either the CPU or
20
+ /// GPU. Only (inverted list, offset) is returned to the user as the
21
+ /// index.
22
+ INDICES_IVF = 1,
23
+ /// Indices are stored as 32 bit integers on the GPU, but returned
24
+ /// as 64 bit integers
25
+ INDICES_32_BIT = 2,
26
+ /// Indices are stored as 64 bit integers on the GPU
27
+ INDICES_64_BIT = 3,
28
+ };
29
+
30
+ } } // namespace
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/GpuResources.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
11
+
12
+ namespace faiss { namespace gpu {
13
+
14
+ GpuResources::~GpuResources() {
15
+ }
16
+
17
+ cublasHandle_t
18
+ GpuResources::getBlasHandleCurrentDevice() {
19
+ return getBlasHandle(getCurrentDevice());
20
+ }
21
+
22
+ cudaStream_t
23
+ GpuResources::getDefaultStreamCurrentDevice() {
24
+ return getDefaultStream(getCurrentDevice());
25
+ }
26
+
27
+ std::vector<cudaStream_t>
28
+ GpuResources::getAlternateStreamsCurrentDevice() {
29
+ return getAlternateStreams(getCurrentDevice());
30
+ }
31
+
32
+ DeviceMemory&
33
+ GpuResources::getMemoryManagerCurrentDevice() {
34
+ return getMemoryManager(getCurrentDevice());
35
+ }
36
+
37
+ cudaStream_t
38
+ GpuResources::getAsyncCopyStreamCurrentDevice() {
39
+ return getAsyncCopyStream(getCurrentDevice());
40
+ }
41
+
42
+ void
43
+ GpuResources::syncDefaultStream(int device) {
44
+ CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
45
+ }
46
+
47
+ void
48
+ GpuResources::syncDefaultStreamCurrentDevice() {
49
+ syncDefaultStream(getCurrentDevice());
50
+ }
51
+
52
+ } } // namespace
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/gpu/utils/DeviceMemory.h>
12
+ #include <cuda_runtime.h>
13
+ #include <cublas_v2.h>
14
+ #include <utility>
15
+ #include <vector>
16
+
17
+ namespace faiss { namespace gpu {
18
+
19
+ /// Base class of GPU-side resource provider; hides provision of
20
+ /// cuBLAS handles, CUDA streams and a temporary memory manager
21
+ class GpuResources {
22
+ public:
23
+ virtual ~GpuResources();
24
+
25
+ /// Call to pre-allocate resources for a particular device. If this is
26
+ /// not called, then resources will be allocated at the first time
27
+ /// of demand
28
+ virtual void initializeForDevice(int device) = 0;
29
+
30
+ /// Returns the cuBLAS handle that we use for the given device
31
+ virtual cublasHandle_t getBlasHandle(int device) = 0;
32
+
33
+ /// Returns the stream that we order all computation on for the
34
+ /// given device
35
+ virtual cudaStream_t getDefaultStream(int device) = 0;
36
+
37
+ /// Returns the set of alternative streams that we use for the given device
38
+ virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
39
+
40
+ /// Returns the temporary memory manager for the given device
41
+ virtual DeviceMemory& getMemoryManager(int device) = 0;
42
+
43
+ /// Returns the available CPU pinned memory buffer
44
+ virtual std::pair<void*, size_t> getPinnedMemory() = 0;
45
+
46
+ /// Returns the stream on which we perform async CPU <-> GPU copies
47
+ virtual cudaStream_t getAsyncCopyStream(int device) = 0;
48
+
49
+ /// Calls getBlasHandle with the current device
50
+ cublasHandle_t getBlasHandleCurrentDevice();
51
+
52
+ /// Calls getDefaultStream with the current device
53
+ cudaStream_t getDefaultStreamCurrentDevice();
54
+
55
+ /// Synchronizes the CPU with respect to the default stream for the
56
+ /// given device
57
+ // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
58
+ void syncDefaultStream(int device);
59
+
60
+ /// Calls syncDefaultStream for the current device
61
+ void syncDefaultStreamCurrentDevice();
62
+
63
+ /// Calls getAlternateStreams for the current device
64
+ std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
65
+
66
+ /// Calls getMemoryManager for the current device
67
+ DeviceMemory& getMemoryManagerCurrentDevice();
68
+
69
+ /// Calls getAsyncCopyStream for the current device
70
+ cudaStream_t getAsyncCopyStreamCurrentDevice();
71
+ };
72
+
73
+ } } // namespace
@@ -0,0 +1,295 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/StandardGpuResources.h>
10
+ #include <faiss/gpu/utils/MemorySpace.h>
11
+ #include <faiss/impl/FaissAssert.h>
12
+ #include <limits>
13
+
14
+ namespace faiss { namespace gpu {
15
+
16
+ namespace {
17
+
18
+ // How many streams per device we allocate by default (for multi-streaming)
19
+ constexpr int kNumStreams = 2;
20
+
21
+ // Use 256 MiB of pinned memory for async CPU <-> GPU copies by default
22
+ constexpr size_t kDefaultPinnedMemoryAllocation = (size_t) 256 * 1024 * 1024;
23
+
24
+ // Default temporary memory allocation for <= 4 GiB memory GPUs
25
+ constexpr size_t k4GiBTempMem = (size_t) 512 * 1024 * 1024;
26
+
27
+ // Default temporary memory allocation for <= 8 GiB memory GPUs
28
+ constexpr size_t k8GiBTempMem = (size_t) 1024 * 1024 * 1024;
29
+
30
+ // Maximum temporary memory allocation for all GPUs
31
+ constexpr size_t kMaxTempMem = (size_t) 1536 * 1024 * 1024;
32
+
33
+ }
34
+
35
+ StandardGpuResources::StandardGpuResources() :
36
+ pinnedMemAlloc_(nullptr),
37
+ pinnedMemAllocSize_(0),
38
+ // let the adjustment function determine the memory size for us by passing
39
+ // in a huge value that will then be adjusted
40
+ tempMemSize_(getDefaultTempMemForGPU(-1,
41
+ std::numeric_limits<size_t>::max())),
42
+ pinnedMemSize_(kDefaultPinnedMemoryAllocation),
43
+ cudaMallocWarning_(true) {
44
+ }
45
+
46
+ StandardGpuResources::~StandardGpuResources() {
47
+ for (auto& entry : defaultStreams_) {
48
+ DeviceScope scope(entry.first);
49
+
50
+ auto it = userDefaultStreams_.find(entry.first);
51
+ if (it == userDefaultStreams_.end()) {
52
+ // The user did not specify this stream, thus we are the ones
53
+ // who have created it
54
+ CUDA_VERIFY(cudaStreamDestroy(entry.second));
55
+ }
56
+ }
57
+
58
+ for (auto& entry : alternateStreams_) {
59
+ DeviceScope scope(entry.first);
60
+
61
+ for (auto stream : entry.second) {
62
+ CUDA_VERIFY(cudaStreamDestroy(stream));
63
+ }
64
+ }
65
+
66
+ for (auto& entry : asyncCopyStreams_) {
67
+ DeviceScope scope(entry.first);
68
+
69
+ CUDA_VERIFY(cudaStreamDestroy(entry.second));
70
+ }
71
+
72
+ for (auto& entry : blasHandles_) {
73
+ DeviceScope scope(entry.first);
74
+
75
+ auto blasStatus = cublasDestroy(entry.second);
76
+ FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
77
+ }
78
+
79
+ if (pinnedMemAlloc_) {
80
+ freeMemorySpace(MemorySpace::HostPinned, pinnedMemAlloc_);
81
+ }
82
+ }
83
+
84
+ size_t
85
+ StandardGpuResources::getDefaultTempMemForGPU(int device,
86
+ size_t requested) {
87
+ auto totalMem = device != -1 ?
88
+ getDeviceProperties(device).totalGlobalMem :
89
+ std::numeric_limits<size_t>::max();
90
+
91
+ if (totalMem <= (size_t) 4 * 1024 * 1024 * 1024) {
92
+ // If the GPU has <= 4 GiB of memory, reserve 512 MiB
93
+
94
+ if (requested > k4GiBTempMem) {
95
+ return k4GiBTempMem;
96
+ }
97
+ } else if (totalMem <= (size_t) 8 * 1024 * 1024 * 1024) {
98
+ // If the GPU has <= 8 GiB of memory, reserve 1 GiB
99
+
100
+ if (requested > k8GiBTempMem) {
101
+ return k8GiBTempMem;
102
+ }
103
+ } else {
104
+ // Never use more than 1.5 GiB
105
+ if (requested > kMaxTempMem) {
106
+ return kMaxTempMem;
107
+ }
108
+ }
109
+
110
+ // use whatever lower limit the user requested
111
+ return requested;
112
+ }
113
+
114
+ void
115
+ StandardGpuResources::noTempMemory() {
116
+ setTempMemory(0);
117
+ setCudaMallocWarning(false);
118
+ }
119
+
120
+ void
121
+ StandardGpuResources::setTempMemory(size_t size) {
122
+ if (tempMemSize_ != size) {
123
+ // adjust based on general limits
124
+ tempMemSize_ = getDefaultTempMemForGPU(-1, size);
125
+
126
+ // We need to re-initialize memory resources for all current devices that
127
+ // have been initialized.
128
+ // This should be safe to do, even if we are currently running work, because
129
+ // the cudaFree call that this implies will force-synchronize all GPUs with
130
+ // the CPU
131
+ for (auto& p : memory_) {
132
+ int device = p.first;
133
+ // Free the existing memory first
134
+ p.second.reset();
135
+
136
+ // Allocate new
137
+ p.second = std::unique_ptr<StackDeviceMemory>(
138
+ new StackDeviceMemory(p.first,
139
+ // adjust for this specific device
140
+ getDefaultTempMemForGPU(device, tempMemSize_)));
141
+ }
142
+ }
143
+ }
144
+
145
+ void
146
+ StandardGpuResources::setPinnedMemory(size_t size) {
147
+ // Should not call this after devices have been initialized
148
+ FAISS_ASSERT(defaultStreams_.size() == 0);
149
+ FAISS_ASSERT(!pinnedMemAlloc_);
150
+
151
+ pinnedMemSize_ = size;
152
+ }
153
+
154
+ void
155
+ StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
156
+ auto it = defaultStreams_.find(device);
157
+ if (it != defaultStreams_.end()) {
158
+ // Replace this stream with the user stream
159
+ CUDA_VERIFY(cudaStreamDestroy(it->second));
160
+ it->second = stream;
161
+ }
162
+
163
+ userDefaultStreams_[device] = stream;
164
+ }
165
+
166
+ void
167
+ StandardGpuResources::setDefaultNullStreamAllDevices() {
168
+ for (int dev = 0; dev < getNumDevices(); ++dev) {
169
+ setDefaultStream(dev, nullptr);
170
+ }
171
+ }
172
+
173
+ void
174
+ StandardGpuResources::setCudaMallocWarning(bool b) {
175
+ cudaMallocWarning_ = b;
176
+
177
+ for (auto& v : memory_) {
178
+ v.second->setCudaMallocWarning(b);
179
+ }
180
+ }
181
+
182
+ bool
183
+ StandardGpuResources::isInitialized(int device) const {
184
+ // Use default streams as a marker for whether or not a certain
185
+ // device has been initialized
186
+ return defaultStreams_.count(device) != 0;
187
+ }
188
+
189
+ void
190
+ StandardGpuResources::initializeForDevice(int device) {
191
+ if (isInitialized(device)) {
192
+ return;
193
+ }
194
+
195
+ // If this is the first device that we're initializing, create our
196
+ // pinned memory allocation
197
+ if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
198
+ allocMemorySpace(MemorySpace::HostPinned, &pinnedMemAlloc_, pinnedMemSize_);
199
+ pinnedMemAllocSize_ = pinnedMemSize_;
200
+ }
201
+
202
+ FAISS_ASSERT(device < getNumDevices());
203
+ DeviceScope scope(device);
204
+
205
+ // Make sure that device properties for all devices are cached
206
+ auto& prop = getDeviceProperties(device);
207
+
208
+ // Also check to make sure we meet our minimum compute capability (3.0)
209
+ FAISS_ASSERT_FMT(prop.major >= 3,
210
+ "Device id %d with CC %d.%d not supported, "
211
+ "need 3.0+ compute capability",
212
+ device, prop.major, prop.minor);
213
+
214
+ // Create streams
215
+ cudaStream_t defaultStream = 0;
216
+ auto it = userDefaultStreams_.find(device);
217
+ if (it != userDefaultStreams_.end()) {
218
+ // We already have a stream provided by the user
219
+ defaultStream = it->second;
220
+ } else {
221
+ CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
222
+ cudaStreamNonBlocking));
223
+ }
224
+
225
+ defaultStreams_[device] = defaultStream;
226
+
227
+ cudaStream_t asyncCopyStream = 0;
228
+ CUDA_VERIFY(cudaStreamCreateWithFlags(&asyncCopyStream,
229
+ cudaStreamNonBlocking));
230
+
231
+ asyncCopyStreams_[device] = asyncCopyStream;
232
+
233
+ std::vector<cudaStream_t> deviceStreams;
234
+ for (int j = 0; j < kNumStreams; ++j) {
235
+ cudaStream_t stream = 0;
236
+ CUDA_VERIFY(cudaStreamCreateWithFlags(&stream,
237
+ cudaStreamNonBlocking));
238
+
239
+ deviceStreams.push_back(stream);
240
+ }
241
+
242
+ alternateStreams_[device] = std::move(deviceStreams);
243
+
244
+ // Create cuBLAS handle
245
+ cublasHandle_t blasHandle = 0;
246
+ auto blasStatus = cublasCreate(&blasHandle);
247
+ FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
248
+ blasHandles_[device] = blasHandle;
249
+
250
+ FAISS_ASSERT(memory_.count(device) == 0);
251
+
252
+ auto mem = std::unique_ptr<StackDeviceMemory>(
253
+ new StackDeviceMemory(device,
254
+ // adjust for this specific device
255
+ getDefaultTempMemForGPU(device, tempMemSize_)));
256
+ mem->setCudaMallocWarning(cudaMallocWarning_);
257
+
258
+ memory_.emplace(device, std::move(mem));
259
+ }
260
+
261
+ cublasHandle_t
262
+ StandardGpuResources::getBlasHandle(int device) {
263
+ initializeForDevice(device);
264
+ return blasHandles_[device];
265
+ }
266
+
267
+ cudaStream_t
268
+ StandardGpuResources::getDefaultStream(int device) {
269
+ initializeForDevice(device);
270
+ return defaultStreams_[device];
271
+ }
272
+
273
+ std::vector<cudaStream_t>
274
+ StandardGpuResources::getAlternateStreams(int device) {
275
+ initializeForDevice(device);
276
+ return alternateStreams_[device];
277
+ }
278
+
279
+ DeviceMemory& StandardGpuResources::getMemoryManager(int device) {
280
+ initializeForDevice(device);
281
+ return *memory_[device];
282
+ }
283
+
284
+ std::pair<void*, size_t>
285
+ StandardGpuResources::getPinnedMemory() {
286
+ return std::make_pair(pinnedMemAlloc_, pinnedMemAllocSize_);
287
+ }
288
+
289
+ cudaStream_t
290
+ StandardGpuResources::getAsyncCopyStream(int device) {
291
+ initializeForDevice(device);
292
+ return asyncCopyStreams_[device];
293
+ }
294
+
295
+ } } // namespace