faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
  6. data/vendor/faiss/c_api/AutoTune_c.h +2 -0
  7. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
  8. data/vendor/faiss/c_api/IndexShards_c.h +1 -4
  9. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
  10. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
  11. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
  12. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
  13. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
  14. data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
  15. data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
  16. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
  17. data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
  18. data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
  19. data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
  20. data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
  21. data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
  22. data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
  23. data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
  24. data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
  25. data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
  26. data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
  27. data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
  28. data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
  29. data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
  30. data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
  31. data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
  32. data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
  33. data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
  34. data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
  35. data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
  36. data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
  37. data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
  38. data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
  39. data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
  40. data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
  41. data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
  42. data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
  43. data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
  44. data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
  45. data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
  46. data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
  47. data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
  48. data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
  49. data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
  50. data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
  51. data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
  52. data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
  53. data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
  54. data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
  55. data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
  56. data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
  57. data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
  58. data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
  59. data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
  60. data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
  61. data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
  62. data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
  63. data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
  64. data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
  65. data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
  66. data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
  67. data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
  68. data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
  69. data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
  70. data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
  71. data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
  72. data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
  73. data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
  74. data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
  75. data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
  76. data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
  77. data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
  78. data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
  79. data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
  80. data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
  81. data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
  82. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
  83. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
  84. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
  85. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
  86. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
  87. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
  88. data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
  89. data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
  90. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
  91. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
  92. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
  93. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
  94. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
  95. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
  96. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
  97. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
  98. data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
  99. data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
  100. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
  101. data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
  102. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
  103. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
  104. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
  105. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
  106. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
  107. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
  108. data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
  109. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
  110. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
  111. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
  112. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
  113. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
  114. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
  115. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
  116. data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
  117. data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
  118. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
  119. data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
  120. data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
  121. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
  122. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
  123. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
  124. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
  125. data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
  126. data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
  127. data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
  128. data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
  129. data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
  130. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
  131. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
  132. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
  133. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
  134. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
  135. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
  136. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
  137. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
  138. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
  139. data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
  140. data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
  141. data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
  142. data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
  143. data/vendor/faiss/faiss/impl/io_macros.h +57 -0
  144. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
  145. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
  146. data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
  147. data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
  148. data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
  149. data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
  150. data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
  151. data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
  152. data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
  153. data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
  154. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
  155. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
  156. data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
  157. data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
  158. data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
  159. data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
  160. data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
  161. data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
  162. data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
  163. data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
  164. data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
  165. data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
  166. data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
  167. data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
  168. data/vendor/faiss/misc/test_blas.cpp +4 -1
  169. data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
  170. data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
  171. data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
  172. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
  173. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
  174. data/vendor/faiss/tests/test_merge.cpp +6 -3
  175. data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
  176. data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
  177. data/vendor/faiss/tests/test_params_override.cpp +7 -2
  178. data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
  179. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
  180. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
  181. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
  182. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
  183. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
  184. metadata +154 -153
  185. data/vendor/faiss/gpu/GpuResources.cpp +0 -52
  186. data/vendor/faiss/gpu/GpuResources.h +0 -73
  187. data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
  188. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
  189. data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
  190. data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
  191. data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
  192. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include <faiss/gpu/GpuIndexIVF.h>
12
+ #include <memory>
12
13
  #include <vector>
13
14
 
14
15
  namespace faiss { struct IndexIVFPQ; }
@@ -21,7 +22,9 @@ class IVFPQ;
21
22
  struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
22
23
  inline GpuIndexIVFPQConfig()
23
24
  : useFloat16LookupTables(false),
24
- usePrecomputedTables(false) {
25
+ usePrecomputedTables(false),
26
+ alternativeLayout(false),
27
+ useMMCodeDistance(false) {
25
28
  }
26
29
 
27
30
  /// Whether or not float16 residual distance tables are used in the
@@ -32,6 +35,20 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
32
35
  /// Whether or not we enable the precomputed table option for
33
36
  /// search, which can substantially increase the memory requirement.
34
37
  bool usePrecomputedTables;
38
+
39
+ /// Use the alternative memory layout for the IVF lists
40
+ /// WARNING: this is a feature under development, do not use!
41
+ bool alternativeLayout;
42
+
43
+ /// Use GEMM-backed computation of PQ code distances for the no precomputed
44
+ /// table version of IVFPQ.
45
+ /// This is for debugging purposes, it should not substantially affect the
46
+ /// results one way for another.
47
+ ///
48
+ /// Note that MM code distance is enabled automatically if one uses a number
49
+ /// of dimensions per sub-quantizer that is not natively specialized (an odd
50
+ /// number like 7 or so).
51
+ bool useMMCodeDistance;
35
52
  };
36
53
 
37
54
  /// IVFPQ index for the GPU
@@ -39,12 +56,12 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
39
56
  public:
40
57
  /// Construct from a pre-existing faiss::IndexIVFPQ instance, copying
41
58
  /// data over to the given GPU, if the input index is trained.
42
- GpuIndexIVFPQ(GpuResources* resources,
59
+ GpuIndexIVFPQ(GpuResourcesProvider* provider,
43
60
  const faiss::IndexIVFPQ* index,
44
61
  GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
45
62
 
46
63
  /// Construct an empty index
47
- GpuIndexIVFPQ(GpuResources* resources,
64
+ GpuIndexIVFPQ(GpuResourcesProvider* provider,
48
65
  int dims,
49
66
  int nlist,
50
67
  int subQuantizers,
@@ -137,7 +154,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
137
154
 
138
155
  /// The product quantizer instance that we own; contains the
139
156
  /// inverted lists
140
- IVFPQ* index_;
157
+ std::unique_ptr<IVFPQ> index_;
141
158
  };
142
159
 
143
160
  } } // namespace
@@ -10,6 +10,7 @@
10
10
 
11
11
  #include <faiss/gpu/GpuIndexIVF.h>
12
12
  #include <faiss/IndexScalarQuantizer.h>
13
+ #include <memory>
13
14
 
14
15
  namespace faiss { namespace gpu {
15
16
 
@@ -26,7 +27,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
26
27
  /// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance,
27
28
  /// copying data over to the given GPU, if the input index is trained.
28
29
  GpuIndexIVFScalarQuantizer(
29
- GpuResources* resources,
30
+ GpuResourcesProvider* provider,
30
31
  const faiss::IndexIVFScalarQuantizer* index,
31
32
  GpuIndexIVFScalarQuantizerConfig config =
32
33
  GpuIndexIVFScalarQuantizerConfig());
@@ -34,7 +35,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
34
35
  /// Constructs a new instance with an empty flat quantizer; the user
35
36
  /// provides the number of lists desired.
36
37
  GpuIndexIVFScalarQuantizer(
37
- GpuResources* resources,
38
+ GpuResourcesProvider* provider,
38
39
  int dims,
39
40
  int nlist,
40
41
  faiss::ScalarQuantizer::QuantizerType qtype,
@@ -94,7 +95,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
94
95
  size_t reserveMemoryVecs_;
95
96
 
96
97
  /// Instance that we own; contains the inverted list
97
- IVFFlat* index_;
98
+ std::unique_ptr<IVFFlat> index_;
98
99
  };
99
100
 
100
101
  } } // namespace
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/GpuResources.h>
10
+ #include <faiss/gpu/utils/DeviceUtils.h>
11
+ #include <sstream>
12
+
13
+ namespace faiss { namespace gpu {
14
+
15
+ std::string allocTypeToString(AllocType t) {
16
+ switch (t) {
17
+ case AllocType::Other:
18
+ return "Other";
19
+ case AllocType::FlatData:
20
+ return "FlatData";
21
+ case AllocType::IVFLists:
22
+ return "IVFLists";
23
+ case AllocType::Quantizer:
24
+ return "Quantizer";
25
+ case AllocType::QuantizerPrecomputedCodes:
26
+ return "QuantizerPrecomputedCodes";
27
+ case AllocType::TemporaryMemoryBuffer:
28
+ return "TemporaryMemoryBuffer";
29
+ case AllocType::TemporaryMemoryOverflow:
30
+ return "TemporaryMemoryOverflow";
31
+ default:
32
+ return "Unknown";
33
+ }
34
+ }
35
+
36
+ std::string memorySpaceToString(MemorySpace s) {
37
+ switch (s) {
38
+ case MemorySpace::Temporary:
39
+ return "Temporary";
40
+ case MemorySpace::Device:
41
+ return "Device";
42
+ case MemorySpace::Unified:
43
+ return "Unified";
44
+ default:
45
+ return "Unknown";
46
+ }
47
+ }
48
+
49
+ std::string
50
+ AllocInfo::toString() const {
51
+ std::stringstream ss;
52
+ ss << "type " << allocTypeToString(type)
53
+ << " dev " << device
54
+ << " space " << memorySpaceToString(space)
55
+ << " stream " << (void*) stream;
56
+
57
+ return ss.str();
58
+ }
59
+
60
+ std::string
61
+ AllocRequest::toString() const {
62
+ std::stringstream ss;
63
+ ss << AllocInfo::toString() << " size " << size << " bytes";
64
+
65
+ return ss.str();
66
+ }
67
+
68
+ AllocInfo makeDevAlloc(AllocType at, cudaStream_t st) {
69
+ return AllocInfo(at, getCurrentDevice(), MemorySpace::Device, st);
70
+ }
71
+
72
+ AllocInfo makeTempAlloc(AllocType at, cudaStream_t st) {
73
+ return AllocInfo(at, getCurrentDevice(), MemorySpace::Temporary, st);
74
+ }
75
+
76
+ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st) {
77
+ return AllocInfo(at, getCurrentDevice(), sp, st);
78
+ }
79
+
80
+ //
81
+ // GpuMemoryReservation
82
+ //
83
+
84
+ GpuMemoryReservation::GpuMemoryReservation()
85
+ : res(nullptr),
86
+ device(0),
87
+ stream(nullptr),
88
+ data(nullptr),
89
+ size(0) {
90
+ }
91
+
92
+ GpuMemoryReservation::GpuMemoryReservation(GpuResources* r,
93
+ int dev,
94
+ cudaStream_t str,
95
+ void* p,
96
+ size_t sz)
97
+ : res(r),
98
+ device(dev),
99
+ stream(str),
100
+ data(p),
101
+ size(sz) {
102
+ }
103
+
104
+ GpuMemoryReservation::GpuMemoryReservation(GpuMemoryReservation&& m) noexcept {
105
+ res = m.res; m.res = nullptr;
106
+ device = m.device; m.device = 0;
107
+ stream = m.stream; m.stream = nullptr;
108
+ data = m.data; m.data = nullptr;
109
+ size = m.size; m.size = 0;
110
+ }
111
+
112
+ GpuMemoryReservation&
113
+ GpuMemoryReservation::operator=(GpuMemoryReservation&& m) {
114
+ // Can't be both a valid allocation and the same allocation
115
+ FAISS_ASSERT(!(res && res == m.res && device == m.device && data == m.data));
116
+
117
+ release();
118
+ res = m.res; m.res = nullptr;
119
+ device = m.device; m.device = 0;
120
+ stream = m.stream; m.stream = nullptr;
121
+ data = m.data; m.data = nullptr;
122
+ size = m.size; m.size = 0;
123
+
124
+ return *this;
125
+ }
126
+
127
+ void
128
+ GpuMemoryReservation::release() {
129
+ if (res) {
130
+ res->deallocMemory(device, data);
131
+ res = nullptr;
132
+ device = 0;
133
+ stream = nullptr;
134
+ data = nullptr;
135
+ size = 0;
136
+ }
137
+ }
138
+
139
+ GpuMemoryReservation::~GpuMemoryReservation() {
140
+ if (res) {
141
+ res->deallocMemory(device, data);
142
+ }
143
+ }
144
+
145
+ //
146
+ // GpuResources
147
+ //
148
+
149
+ GpuResources::~GpuResources() {
150
+ }
151
+
152
+ cublasHandle_t
153
+ GpuResources::getBlasHandleCurrentDevice() {
154
+ return getBlasHandle(getCurrentDevice());
155
+ }
156
+
157
+ cudaStream_t
158
+ GpuResources::getDefaultStreamCurrentDevice() {
159
+ return getDefaultStream(getCurrentDevice());
160
+ }
161
+
162
+ std::vector<cudaStream_t>
163
+ GpuResources::getAlternateStreamsCurrentDevice() {
164
+ return getAlternateStreams(getCurrentDevice());
165
+ }
166
+
167
+ cudaStream_t
168
+ GpuResources::getAsyncCopyStreamCurrentDevice() {
169
+ return getAsyncCopyStream(getCurrentDevice());
170
+ }
171
+
172
+ void
173
+ GpuResources::syncDefaultStream(int device) {
174
+ CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
175
+ }
176
+
177
+ void
178
+ GpuResources::syncDefaultStreamCurrentDevice() {
179
+ syncDefaultStream(getCurrentDevice());
180
+ }
181
+
182
+ GpuMemoryReservation
183
+ GpuResources::allocMemoryHandle(const AllocRequest& req) {
184
+ return GpuMemoryReservation(
185
+ this, req.device, req.stream, allocMemory(req), req.size);
186
+ }
187
+
188
+ size_t
189
+ GpuResources::getTempMemoryAvailableCurrentDevice() const {
190
+ return getTempMemoryAvailable(getCurrentDevice());
191
+ }
192
+
193
+ //
194
+ // GpuResourcesProvider
195
+ //
196
+
197
+ GpuResourcesProvider::~GpuResourcesProvider() {
198
+ }
199
+
200
+ } } // namespace
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/impl/FaissAssert.h>
12
+ #include <cuda_runtime.h>
13
+ #include <cublas_v2.h>
14
+ #include <memory>
15
+ #include <utility>
16
+ #include <vector>
17
+
18
+ namespace faiss { namespace gpu {
19
+
20
+ class GpuResources;
21
+
22
+ enum AllocType {
23
+ /// Unknown allocation type or miscellaneous (not currently categorized)
24
+ Other = 0,
25
+
26
+ /// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
27
+ /// vector norms if needed)
28
+ FlatData = 1,
29
+
30
+ /// Primary data storage for GpuIndexIVF* (the storage for each individual IVF
31
+ /// list)
32
+ IVFLists = 2,
33
+
34
+ /// Quantizer (PQ, SQ) dictionary information
35
+ Quantizer = 3,
36
+
37
+ /// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
38
+ /// require the use of possibly large tables. These are marked separately from
39
+ /// Quantizer as these can frequently be 100s - 1000s of MiB in size
40
+ QuantizerPrecomputedCodes = 4,
41
+
42
+ ///
43
+ /// StandardGpuResources implementation specific types
44
+ ///
45
+
46
+ /// When using StandardGpuResources, temporary memory allocations
47
+ /// (MemorySpace::Temporary) come out of a stack region of memory that is
48
+ /// allocated up front for each gpu (e.g., 1.5 GiB upon initialization). This
49
+ /// allocation by StandardGpuResources is marked with this AllocType.
50
+ TemporaryMemoryBuffer = 10,
51
+
52
+ /// When using StandardGpuResources, any MemorySpace::Temporary allocations
53
+ /// that cannot be satisfied within the TemporaryMemoryBuffer region fall back
54
+ /// to calling cudaMalloc which are sized to just the request at hand. These
55
+ /// "overflow" temporary allocations are marked with this AllocType.
56
+ TemporaryMemoryOverflow = 11,
57
+ };
58
+
59
+ /// Convert an AllocType to string
60
+ std::string allocTypeToString(AllocType t);
61
+
62
+ /// Memory regions accessible to the GPU
63
+ enum MemorySpace {
64
+ /// Temporary device memory (guaranteed to no longer be used upon exit of a
65
+ /// top-level index call, and where the streams using it have completed GPU
66
+ /// work). Typically backed by Device memory (cudaMalloc/cudaFree).
67
+ Temporary = 0,
68
+
69
+ /// Managed using cudaMalloc/cudaFree (typical GPU device memory)
70
+ Device = 1,
71
+
72
+ /// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU memory)
73
+ Unified = 2,
74
+ };
75
+
76
+ /// Convert a MemorySpace to string
77
+ std::string memorySpaceToString(MemorySpace s);
78
+
79
+ /// Information on what/where an allocation is
80
+ struct AllocInfo {
81
+ inline AllocInfo()
82
+ : type(AllocType::Other),
83
+ device(0),
84
+ space(MemorySpace::Device),
85
+ stream(nullptr) {
86
+ }
87
+
88
+ inline AllocInfo(AllocType at,
89
+ int dev,
90
+ MemorySpace sp,
91
+ cudaStream_t st)
92
+ : type(at),
93
+ device(dev),
94
+ space(sp),
95
+ stream(st) {
96
+ }
97
+
98
+ /// Returns a string representation of this info
99
+ std::string toString() const;
100
+
101
+ /// The internal category of the allocation
102
+ AllocType type;
103
+
104
+ /// The device on which the allocation is happening
105
+ int device;
106
+
107
+ /// The memory space of the allocation
108
+ MemorySpace space;
109
+
110
+ /// The stream on which new work on the memory will be ordered (e.g., if a
111
+ /// piece of memory cached and to be returned for this call was last used on
112
+ /// stream 3 and a new memory request is for stream 4, the memory manager will
113
+ /// synchronize stream 4 to wait for the completion of stream 3 via events or
114
+ /// other stream synchronization.
115
+ ///
116
+ /// The memory manager guarantees that the returned memory is free to use
117
+ /// without data races on this stream specified.
118
+ cudaStream_t stream;
119
+ };
120
+
121
+ /// Create an AllocInfo for the current device with MemorySpace::Device
122
+ AllocInfo makeDevAlloc(AllocType at, cudaStream_t st);
123
+
124
+ /// Create an AllocInfo for the current device with MemorySpace::Temporary
125
+ AllocInfo makeTempAlloc(AllocType at, cudaStream_t st);
126
+
127
+ /// Create an AllocInfo for the current device
128
+ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
129
+
130
+ /// Information on what/where an allocation is, along with how big it should be
131
+ struct AllocRequest : public AllocInfo {
132
+ inline AllocRequest()
133
+ : AllocInfo(),
134
+ size(0) {
135
+ }
136
+
137
+ inline AllocRequest(const AllocInfo& info,
138
+ size_t sz)
139
+ : AllocInfo(info),
140
+ size(sz) {
141
+ }
142
+
143
+ inline AllocRequest(AllocType at,
144
+ int dev,
145
+ MemorySpace sp,
146
+ cudaStream_t st,
147
+ size_t sz)
148
+ : AllocInfo(at, dev, sp, st),
149
+ size(sz) {
150
+ }
151
+
152
+ /// Returns a string representation of this request
153
+ std::string toString() const;
154
+
155
+ /// The size in bytes of the allocation
156
+ size_t size;
157
+ };
158
+
159
+ /// A RAII object that manages a temporary memory request
160
+ struct GpuMemoryReservation {
161
+ GpuMemoryReservation();
162
+ GpuMemoryReservation(GpuResources* r,
163
+ int dev,
164
+ cudaStream_t str,
165
+ void* p,
166
+ size_t sz);
167
+ GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
168
+ ~GpuMemoryReservation();
169
+
170
+ GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
171
+
172
+ inline void* get() { return data; }
173
+
174
+ void release();
175
+
176
+ GpuResources* res;
177
+ int device;
178
+ cudaStream_t stream;
179
+ void* data;
180
+ size_t size;
181
+ };
182
+
183
+ /// Base class of GPU-side resource provider; hides provision of
184
+ /// cuBLAS handles, CUDA streams and all device memory allocation performed
185
+ class GpuResources {
186
+ public:
187
+ virtual ~GpuResources();
188
+
189
+ /// Call to pre-allocate resources for a particular device. If this is
190
+ /// not called, then resources will be allocated at the first time
191
+ /// of demand
192
+ virtual void initializeForDevice(int device) = 0;
193
+
194
+ /// Returns the cuBLAS handle that we use for the given device
195
+ virtual cublasHandle_t getBlasHandle(int device) = 0;
196
+
197
+ /// Returns the stream that we order all computation on for the
198
+ /// given device
199
+ virtual cudaStream_t getDefaultStream(int device) = 0;
200
+
201
+ /// Returns the set of alternative streams that we use for the given device
202
+ virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
203
+
204
+ /// Memory management
205
+ /// Returns an allocation from the given memory space, ordered with respect to
206
+ /// the given stream (i.e., the first user will be a kernel in this stream).
207
+ /// All allocations are sized internally to be the next highest multiple of 16
208
+ /// bytes, and all allocations returned are guaranteed to be 16 byte aligned.
209
+ virtual void* allocMemory(const AllocRequest& req) = 0;
210
+
211
+ /// Returns a previous allocation
212
+ virtual void deallocMemory(int device, void* in) = 0;
213
+
214
+ /// For MemorySpace::Temporary, how much space is immediately available
215
+ /// without cudaMalloc allocation?
216
+ virtual size_t getTempMemoryAvailable(int device) const = 0;
217
+
218
+ /// Returns the available CPU pinned memory buffer
219
+ virtual std::pair<void*, size_t> getPinnedMemory() = 0;
220
+
221
+ /// Returns the stream on which we perform async CPU <-> GPU copies
222
+ virtual cudaStream_t getAsyncCopyStream(int device) = 0;
223
+
224
+ ///
225
+ /// Functions provided by default
226
+ ///
227
+
228
+ /// Calls getBlasHandle with the current device
229
+ cublasHandle_t getBlasHandleCurrentDevice();
230
+
231
+ /// Calls getDefaultStream with the current device
232
+ cudaStream_t getDefaultStreamCurrentDevice();
233
+
234
+ /// Calls getTempMemoryAvailable with the current device
235
+ size_t getTempMemoryAvailableCurrentDevice() const;
236
+
237
+ /// Returns a temporary memory allocation via a RAII object
238
+ GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
239
+
240
+ /// Synchronizes the CPU with respect to the default stream for the
241
+ /// given device
242
+ // equivalent to cudaDeviceSynchronize(getDefaultStream(device))
243
+ void syncDefaultStream(int device);
244
+
245
+ /// Calls syncDefaultStream for the current device
246
+ void syncDefaultStreamCurrentDevice();
247
+
248
+ /// Calls getAlternateStreams for the current device
249
+ std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
250
+
251
+ /// Calls getAsyncCopyStream for the current device
252
+ cudaStream_t getAsyncCopyStreamCurrentDevice();
253
+ };
254
+
255
+ /// Interface for a provider of a shared resources object
256
+ class GpuResourcesProvider {
257
+ public:
258
+ virtual ~GpuResourcesProvider();
259
+
260
+ /// Returns the shared resources object
261
+ virtual std::shared_ptr<GpuResources> getResources() = 0;
262
+ };
263
+
264
+ } } // namespace