faiss 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
  6. data/vendor/faiss/c_api/AutoTune_c.h +2 -0
  7. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
  8. data/vendor/faiss/c_api/IndexShards_c.h +1 -4
  9. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
  10. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
  11. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
  12. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
  13. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
  14. data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
  15. data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
  16. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
  17. data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
  18. data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
  19. data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
  20. data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
  21. data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
  22. data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
  23. data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
  24. data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
  25. data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
  26. data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
  27. data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
  28. data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
  29. data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
  30. data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
  31. data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
  32. data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
  33. data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
  34. data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
  35. data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
  36. data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
  37. data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
  38. data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
  39. data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
  40. data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
  41. data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
  42. data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
  43. data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
  44. data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
  45. data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
  46. data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
  47. data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
  48. data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
  49. data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
  50. data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
  51. data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
  52. data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
  53. data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
  54. data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
  55. data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
  56. data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
  57. data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
  58. data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
  59. data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
  60. data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
  61. data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
  62. data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
  63. data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
  64. data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
  65. data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
  66. data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
  67. data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
  68. data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
  69. data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
  70. data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
  71. data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
  72. data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
  73. data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
  74. data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
  75. data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
  76. data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
  77. data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
  78. data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
  79. data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
  80. data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
  81. data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
  82. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
  83. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
  84. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
  85. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
  86. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
  87. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
  88. data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
  89. data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
  90. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
  91. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
  92. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
  93. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
  94. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
  95. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
  96. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
  97. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
  98. data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
  99. data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
  100. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
  101. data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
  102. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
  103. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
  104. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
  105. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
  106. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
  107. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
  108. data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
  109. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
  110. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
  111. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
  112. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
  113. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
  114. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
  115. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
  116. data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
  117. data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
  118. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
  119. data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
  120. data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
  121. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
  122. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
  123. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
  124. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
  125. data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
  126. data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
  127. data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
  128. data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
  129. data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
  130. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
  131. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
  132. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
  133. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
  134. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
  135. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
  136. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
  137. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
  138. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
  139. data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
  140. data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
  141. data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
  142. data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
  143. data/vendor/faiss/faiss/impl/io_macros.h +57 -0
  144. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
  145. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
  146. data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
  147. data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
  148. data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
  149. data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
  150. data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
  151. data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
  152. data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
  153. data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
  154. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
  155. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
  156. data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
  157. data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
  158. data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
  159. data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
  160. data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
  161. data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
  162. data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
  163. data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
  164. data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
  165. data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
  166. data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
  167. data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
  168. data/vendor/faiss/misc/test_blas.cpp +4 -1
  169. data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
  170. data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
  171. data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
  172. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
  173. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
  174. data/vendor/faiss/tests/test_merge.cpp +6 -3
  175. data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
  176. data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
  177. data/vendor/faiss/tests/test_params_override.cpp +7 -2
  178. data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
  179. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
  180. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
  181. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
  182. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
  183. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
  184. metadata +154 -153
  185. data/vendor/faiss/gpu/GpuResources.cpp +0 -52
  186. data/vendor/faiss/gpu/GpuResources.h +0 -73
  187. data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
  188. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
  189. data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
  190. data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
  191. data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
  192. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include <faiss/gpu/GpuIndexIVF.h>
12
+ #include <memory>
12
13
  #include <vector>
13
14
 
14
15
  namespace faiss { struct IndexIVFPQ; }
@@ -21,7 +22,9 @@ class IVFPQ;
21
22
  struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
22
23
  inline GpuIndexIVFPQConfig()
23
24
  : useFloat16LookupTables(false),
24
- usePrecomputedTables(false) {
25
+ usePrecomputedTables(false),
26
+ alternativeLayout(false),
27
+ useMMCodeDistance(false) {
25
28
  }
26
29
 
27
30
  /// Whether or not float16 residual distance tables are used in the
@@ -32,6 +35,20 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
32
35
  /// Whether or not we enable the precomputed table option for
33
36
  /// search, which can substantially increase the memory requirement.
34
37
  bool usePrecomputedTables;
38
+
39
+ /// Use the alternative memory layout for the IVF lists
40
+ /// WARNING: this is a feature under development, do not use!
41
+ bool alternativeLayout;
42
+
43
+ /// Use GEMM-backed computation of PQ code distances for the no precomputed
44
+ /// table version of IVFPQ.
45
+ /// This is for debugging purposes, it should not substantially affect the
46
+ /// results one way for another.
47
+ ///
48
+ /// Note that MM code distance is enabled automatically if one uses a number
49
+ /// of dimensions per sub-quantizer that is not natively specialized (an odd
50
+ /// number like 7 or so).
51
+ bool useMMCodeDistance;
35
52
  };
36
53
 
37
54
  /// IVFPQ index for the GPU
@@ -39,12 +56,12 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
39
56
  public:
40
57
  /// Construct from a pre-existing faiss::IndexIVFPQ instance, copying
41
58
  /// data over to the given GPU, if the input index is trained.
42
- GpuIndexIVFPQ(GpuResources* resources,
59
+ GpuIndexIVFPQ(GpuResourcesProvider* provider,
43
60
  const faiss::IndexIVFPQ* index,
44
61
  GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
45
62
 
46
63
  /// Construct an empty index
47
- GpuIndexIVFPQ(GpuResources* resources,
64
+ GpuIndexIVFPQ(GpuResourcesProvider* provider,
48
65
  int dims,
49
66
  int nlist,
50
67
  int subQuantizers,
@@ -137,7 +154,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
137
154
 
138
155
  /// The product quantizer instance that we own; contains the
139
156
  /// inverted lists
140
- IVFPQ* index_;
157
+ std::unique_ptr<IVFPQ> index_;
141
158
  };
142
159
 
143
160
  } } // namespace
@@ -10,6 +10,7 @@
10
10
 
11
11
  #include <faiss/gpu/GpuIndexIVF.h>
12
12
  #include <faiss/IndexScalarQuantizer.h>
13
+ #include <memory>
13
14
 
14
15
  namespace faiss { namespace gpu {
15
16
 
@@ -26,7 +27,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
26
27
  /// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance,
27
28
  /// copying data over to the given GPU, if the input index is trained.
28
29
  GpuIndexIVFScalarQuantizer(
29
- GpuResources* resources,
30
+ GpuResourcesProvider* provider,
30
31
  const faiss::IndexIVFScalarQuantizer* index,
31
32
  GpuIndexIVFScalarQuantizerConfig config =
32
33
  GpuIndexIVFScalarQuantizerConfig());
@@ -34,7 +35,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
34
35
  /// Constructs a new instance with an empty flat quantizer; the user
35
36
  /// provides the number of lists desired.
36
37
  GpuIndexIVFScalarQuantizer(
37
- GpuResources* resources,
38
+ GpuResourcesProvider* provider,
38
39
  int dims,
39
40
  int nlist,
40
41
  faiss::ScalarQuantizer::QuantizerType qtype,
@@ -94,7 +95,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
94
95
  size_t reserveMemoryVecs_;
95
96
 
96
97
  /// Instance that we own; contains the inverted list
97
- IVFFlat* index_;
98
+ std::unique_ptr<IVFFlat> index_;
98
99
  };
99
100
 
100
101
  } } // namespace
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/GpuResources.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
11
+ #include <sstream>
12
+
13
+ namespace faiss { namespace gpu {
14
+
15
+ std::string allocTypeToString(AllocType t) {
16
+ switch (t) {
17
+ case AllocType::Other:
18
+ return "Other";
19
+ case AllocType::FlatData:
20
+ return "FlatData";
21
+ case AllocType::IVFLists:
22
+ return "IVFLists";
23
+ case AllocType::Quantizer:
24
+ return "Quantizer";
25
+ case AllocType::QuantizerPrecomputedCodes:
26
+ return "QuantizerPrecomputedCodes";
27
+ case AllocType::TemporaryMemoryBuffer:
28
+ return "TemporaryMemoryBuffer";
29
+ case AllocType::TemporaryMemoryOverflow:
30
+ return "TemporaryMemoryOverflow";
31
+ default:
32
+ return "Unknown";
33
+ }
34
+ }
35
+
36
+ std::string memorySpaceToString(MemorySpace s) {
37
+ switch (s) {
38
+ case MemorySpace::Temporary:
39
+ return "Temporary";
40
+ case MemorySpace::Device:
41
+ return "Device";
42
+ case MemorySpace::Unified:
43
+ return "Unified";
44
+ default:
45
+ return "Unknown";
46
+ }
47
+ }
48
+
49
+ std::string
50
+ AllocInfo::toString() const {
51
+ std::stringstream ss;
52
+ ss << "type " << allocTypeToString(type)
53
+ << " dev " << device
54
+ << " space " << memorySpaceToString(space)
55
+ << " stream " << (void*) stream;
56
+
57
+ return ss.str();
58
+ }
59
+
60
+ std::string
61
+ AllocRequest::toString() const {
62
+ std::stringstream ss;
63
+ ss << AllocInfo::toString() << " size " << size << " bytes";
64
+
65
+ return ss.str();
66
+ }
67
+
68
+ AllocInfo makeDevAlloc(AllocType at, cudaStream_t st) {
69
+ return AllocInfo(at, getCurrentDevice(), MemorySpace::Device, st);
70
+ }
71
+
72
+ AllocInfo makeTempAlloc(AllocType at, cudaStream_t st) {
73
+ return AllocInfo(at, getCurrentDevice(), MemorySpace::Temporary, st);
74
+ }
75
+
76
+ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st) {
77
+ return AllocInfo(at, getCurrentDevice(), sp, st);
78
+ }
79
+
80
+ //
81
+ // GpuMemoryReservation
82
+ //
83
+
84
+ GpuMemoryReservation::GpuMemoryReservation()
85
+ : res(nullptr),
86
+ device(0),
87
+ stream(nullptr),
88
+ data(nullptr),
89
+ size(0) {
90
+ }
91
+
92
+ GpuMemoryReservation::GpuMemoryReservation(GpuResources* r,
93
+ int dev,
94
+ cudaStream_t str,
95
+ void* p,
96
+ size_t sz)
97
+ : res(r),
98
+ device(dev),
99
+ stream(str),
100
+ data(p),
101
+ size(sz) {
102
+ }
103
+
104
+ GpuMemoryReservation::GpuMemoryReservation(GpuMemoryReservation&& m) noexcept {
105
+ res = m.res; m.res = nullptr;
106
+ device = m.device; m.device = 0;
107
+ stream = m.stream; m.stream = nullptr;
108
+ data = m.data; m.data = nullptr;
109
+ size = m.size; m.size = 0;
110
+ }
111
+
112
+ GpuMemoryReservation&
113
+ GpuMemoryReservation::operator=(GpuMemoryReservation&& m) {
114
+ // Can't be both a valid allocation and the same allocation
115
+ FAISS_ASSERT(!(res && res == m.res && device == m.device && data == m.data));
116
+
117
+ release();
118
+ res = m.res; m.res = nullptr;
119
+ device = m.device; m.device = 0;
120
+ stream = m.stream; m.stream = nullptr;
121
+ data = m.data; m.data = nullptr;
122
+ size = m.size; m.size = 0;
123
+
124
+ return *this;
125
+ }
126
+
127
+ void
128
+ GpuMemoryReservation::release() {
129
+ if (res) {
130
+ res->deallocMemory(device, data);
131
+ res = nullptr;
132
+ device = 0;
133
+ stream = nullptr;
134
+ data = nullptr;
135
+ size = 0;
136
+ }
137
+ }
138
+
139
+ GpuMemoryReservation::~GpuMemoryReservation() {
140
+ if (res) {
141
+ res->deallocMemory(device, data);
142
+ }
143
+ }
144
+
145
+ //
146
+ // GpuResources
147
+ //
148
+
149
+ GpuResources::~GpuResources() {
150
+ }
151
+
152
+ cublasHandle_t
153
+ GpuResources::getBlasHandleCurrentDevice() {
154
+ return getBlasHandle(getCurrentDevice());
155
+ }
156
+
157
+ cudaStream_t
158
+ GpuResources::getDefaultStreamCurrentDevice() {
159
+ return getDefaultStream(getCurrentDevice());
160
+ }
161
+
162
+ std::vector<cudaStream_t>
163
+ GpuResources::getAlternateStreamsCurrentDevice() {
164
+ return getAlternateStreams(getCurrentDevice());
165
+ }
166
+
167
+ cudaStream_t
168
+ GpuResources::getAsyncCopyStreamCurrentDevice() {
169
+ return getAsyncCopyStream(getCurrentDevice());
170
+ }
171
+
172
+ void
173
+ GpuResources::syncDefaultStream(int device) {
174
+ CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
175
+ }
176
+
177
+ void
178
+ GpuResources::syncDefaultStreamCurrentDevice() {
179
+ syncDefaultStream(getCurrentDevice());
180
+ }
181
+
182
+ GpuMemoryReservation
183
+ GpuResources::allocMemoryHandle(const AllocRequest& req) {
184
+ return GpuMemoryReservation(
185
+ this, req.device, req.stream, allocMemory(req), req.size);
186
+ }
187
+
188
+ size_t
189
+ GpuResources::getTempMemoryAvailableCurrentDevice() const {
190
+ return getTempMemoryAvailable(getCurrentDevice());
191
+ }
192
+
193
+ //
194
+ // GpuResourcesProvider
195
+ //
196
+
197
+ GpuResourcesProvider::~GpuResourcesProvider() {
198
+ }
199
+
200
+ } } // namespace
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/impl/FaissAssert.h>
12
+ #include <cuda_runtime.h>
13
+ #include <cublas_v2.h>
14
+ #include <memory>
15
+ #include <utility>
16
+ #include <vector>
17
+
18
+ namespace faiss { namespace gpu {
19
+
20
+ class GpuResources;
21
+
22
+ enum AllocType {
23
+ /// Unknown allocation type or miscellaneous (not currently categorized)
24
+ Other = 0,
25
+
26
+ /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
27
+ /// vector norms if needed)
28
+ FlatData = 1,
29
+
30
+ /// Primary data storage for GpuIndexIVF* (the storage for each individual IVF
31
+ /// list)
32
+ IVFLists = 2,
33
+
34
+ /// Quantizer (PQ, SQ) dictionary information
35
+ Quantizer = 3,
36
+
37
+ /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
38
+ /// require the use of possibly large tables. These are marked separately from
39
+ /// Quantizer as these can frequently be 100s - 1000s of MiB in size
40
+ QuantizerPrecomputedCodes = 4,
41
+
42
+ ///
43
+ /// StandardGpuResources implementation specific types
44
+ ///
45
+
46
+ /// When using StandardGpuResources, temporary memory allocations
47
+ /// (MemorySpace::Temporary) come out of a stack region of memory that is
48
+ /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization). This
49
+ /// allocation by StandardGpuResources is marked with this AllocType.
50
+ TemporaryMemoryBuffer = 10,
51
+
52
+ /// When using StandardGpuResources, any MemorySpace::Temporary allocations
53
+ /// that cannot be satisfied within the TemporaryMemoryBuffer region fall back
54
+ /// to calling cudaMalloc which are sized to just the request at hand. These
55
+ /// "overflow" temporary allocations are marked with this AllocType.
56
+ TemporaryMemoryOverflow = 11,
57
+ };
58
+
59
+ /// Convert an AllocType to string
60
+ std::string allocTypeToString(AllocType t);
61
+
62
+ /// Memory regions accessible to the GPU
63
+ enum MemorySpace {
64
+ /// Temporary device memory (guaranteed to no longer be used upon exit of a
65
+ /// top-level index call, and where the streams using it have completed GPU
66
+ /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
67
+ Temporary = 0,
68
+
69
+ /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
70
+ Device = 1,
71
+
72
+ /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU memory)
73
+ Unified = 2,
74
+ };
75
+
76
+ /// Convert a MemorySpace to string
77
+ std::string memorySpaceToString(MemorySpace s);
78
+
79
+ /// Information on what/where an allocation is
80
+ struct AllocInfo {
81
+ inline AllocInfo()
82
+ : type(AllocType::Other),
83
+ device(0),
84
+ space(MemorySpace::Device),
85
+ stream(nullptr) {
86
+ }
87
+
88
+ inline AllocInfo(AllocType at,
89
+ int dev,
90
+ MemorySpace sp,
91
+ cudaStream_t st)
92
+ : type(at),
93
+ device(dev),
94
+ space(sp),
95
+ stream(st) {
96
+ }
97
+
98
+ /// Returns a string representation of this info
99
+ std::string toString() const;
100
+
101
+ /// The internal category of the allocation
102
+ AllocType type;
103
+
104
+ /// The device on which the allocation is happening
105
+ int device;
106
+
107
+ /// The memory space of the allocation
108
+ MemorySpace space;
109
+
110
+ /// The stream on which new work on the memory will be ordered (e.g., if a
111
+ /// piece of memory cached and to be returned for this call was last used on
112
+ /// stream 3 and a new memory request is for stream 4, the memory manager will
113
+ /// synchronize stream 4 to wait for the completion of stream 3 via events or
114
+ /// other stream synchronization.
115
+ ///
116
+ /// The memory manager guarantees that the returned memory is free to use
117
+ /// without data races on this stream specified.
118
+ cudaStream_t stream;
119
+ };
120
+
121
+ /// Create an AllocInfo for the current device with MemorySpace::Device
122
+ AllocInfo makeDevAlloc(AllocType at, cudaStream_t st);
123
+
124
+ /// Create an AllocInfo for the current device with MemorySpace::Temporary
125
+ AllocInfo makeTempAlloc(AllocType at, cudaStream_t st);
126
+
127
+ /// Create an AllocInfo for the current device
128
+ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
129
+
130
+ /// Information on what/where an allocation is, along with how big it should be
131
+ struct AllocRequest : public AllocInfo {
132
+ inline AllocRequest()
133
+ : AllocInfo(),
134
+ size(0) {
135
+ }
136
+
137
+ inline AllocRequest(const AllocInfo& info,
138
+ size_t sz)
139
+ : AllocInfo(info),
140
+ size(sz) {
141
+ }
142
+
143
+ inline AllocRequest(AllocType at,
144
+ int dev,
145
+ MemorySpace sp,
146
+ cudaStream_t st,
147
+ size_t sz)
148
+ : AllocInfo(at, dev, sp, st),
149
+ size(sz) {
150
+ }
151
+
152
+ /// Returns a string representation of this request
153
+ std::string toString() const;
154
+
155
+ /// The size in bytes of the allocation
156
+ size_t size;
157
+ };
158
+
159
+ /// A RAII object that manages a temporary memory request
160
+ struct GpuMemoryReservation {
161
+ GpuMemoryReservation();
162
+ GpuMemoryReservation(GpuResources* r,
163
+ int dev,
164
+ cudaStream_t str,
165
+ void* p,
166
+ size_t sz);
167
+ GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
168
+ ~GpuMemoryReservation();
169
+
170
+ GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
171
+
172
+ inline void* get() { return data; }
173
+
174
+ void release();
175
+
176
+ GpuResources* res;
177
+ int device;
178
+ cudaStream_t stream;
179
+ void* data;
180
+ size_t size;
181
+ };
182
+
183
+ /// Base class of GPU-side resource provider; hides provision of
184
+ /// cuBLAS handles, CUDA streams and all device memory allocation performed
185
+ class GpuResources {
186
+ public:
187
+ virtual ~GpuResources();
188
+
189
+ /// Call to pre-allocate resources for a particular device. If this is
190
+ /// not called, then resources will be allocated at the first time
191
+ /// of demand
192
+ virtual void initializeForDevice(int device) = 0;
193
+
194
+ /// Returns the cuBLAS handle that we use for the given device
195
+ virtual cublasHandle_t getBlasHandle(int device) = 0;
196
+
197
+ /// Returns the stream that we order all computation on for the
198
+ /// given device
199
+ virtual cudaStream_t getDefaultStream(int device) = 0;
200
+
201
+ /// Returns the set of alternative streams that we use for the given device
202
+ virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
203
+
204
+ /// Memory management
205
+ /// Returns an allocation from the given memory space, ordered with respect to
206
+ /// the given stream (i.e., the first user will be a kernel in this stream).
207
+ /// All allocations are sized internally to be the next highest multiple of 16
208
+ /// bytes, and all allocations returned are guaranteed to be 16 byte aligned.
209
+ virtual void* allocMemory(const AllocRequest& req) = 0;
210
+
211
+ /// Returns a previous allocation
212
+ virtual void deallocMemory(int device, void* in) = 0;
213
+
214
+ /// For MemorySpace::Temporary, how much space is immediately available
215
+ /// without cudaMalloc allocation?
216
+ virtual size_t getTempMemoryAvailable(int device) const = 0;
217
+
218
+ /// Returns the available CPU pinned memory buffer
219
+ virtual std::pair<void*, size_t> getPinnedMemory() = 0;
220
+
221
+ /// Returns the stream on which we perform async CPU <-> GPU copies
222
+ virtual cudaStream_t getAsyncCopyStream(int device) = 0;
223
+
224
+ ///
225
+ /// Functions provided by default
226
+ ///
227
+
228
+ /// Calls getBlasHandle with the current device
229
+ cublasHandle_t getBlasHandleCurrentDevice();
230
+
231
+ /// Calls getDefaultStream with the current device
232
+ cudaStream_t getDefaultStreamCurrentDevice();
233
+
234
+ /// Calls getTempMemoryAvailable with the current device
235
+ size_t getTempMemoryAvailableCurrentDevice() const;
236
+
237
+ /// Returns a temporary memory allocation via a RAII object
238
+ GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
239
+
240
+ /// Synchronizes the CPU with respect to the default stream for the
241
+ /// given device
242
+ // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
243
+ void syncDefaultStream(int device);
244
+
245
+ /// Calls syncDefaultStream for the current device
246
+ void syncDefaultStreamCurrentDevice();
247
+
248
+ /// Calls getAlternateStreams for the current device
249
+ std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
250
+
251
+ /// Calls getAsyncCopyStream for the current device
252
+ cudaStream_t getAsyncCopyStreamCurrentDevice();
253
+ };
254
+
255
+ /// Interface for a provider of a shared resources object
256
+ class GpuResourcesProvider {
257
+ public:
258
+ virtual ~GpuResourcesProvider();
259
+
260
+ /// Returns the shared resources object
261
+ virtual std::shared_ptr<GpuResources> getResources() = 0;
262
+ };
263
+
264
+ } } // namespace