faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/benchs/bench_6bit_codec.cpp +80 -0
  6. data/vendor/faiss/c_api/AutoTune_c.h +2 -0
  7. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -6
  8. data/vendor/faiss/c_api/IndexShards_c.h +1 -4
  9. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +4 -2
  10. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +1 -1
  11. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +1 -1
  12. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +1 -1
  13. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +1 -1
  14. data/vendor/faiss/demos/demo_imi_flat.cpp +5 -2
  15. data/vendor/faiss/demos/demo_imi_pq.cpp +6 -2
  16. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +7 -2
  17. data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} +9 -9
  18. data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} +0 -0
  19. data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} +13 -12
  20. data/vendor/faiss/{Clustering.h → faiss/Clustering.h} +0 -0
  21. data/vendor/faiss/{DirectMap.cpp → faiss/DirectMap.cpp} +0 -0
  22. data/vendor/faiss/{DirectMap.h → faiss/DirectMap.h} +0 -0
  23. data/vendor/faiss/{IVFlib.cpp → faiss/IVFlib.cpp} +86 -11
  24. data/vendor/faiss/{IVFlib.h → faiss/IVFlib.h} +26 -8
  25. data/vendor/faiss/{Index.cpp → faiss/Index.cpp} +0 -0
  26. data/vendor/faiss/{Index.h → faiss/Index.h} +1 -1
  27. data/vendor/faiss/{Index2Layer.cpp → faiss/Index2Layer.cpp} +12 -11
  28. data/vendor/faiss/{Index2Layer.h → faiss/Index2Layer.h} +0 -0
  29. data/vendor/faiss/{IndexBinary.cpp → faiss/IndexBinary.cpp} +2 -1
  30. data/vendor/faiss/{IndexBinary.h → faiss/IndexBinary.h} +0 -0
  31. data/vendor/faiss/{IndexBinaryFlat.cpp → faiss/IndexBinaryFlat.cpp} +0 -0
  32. data/vendor/faiss/{IndexBinaryFlat.h → faiss/IndexBinaryFlat.h} +0 -0
  33. data/vendor/faiss/{IndexBinaryFromFloat.cpp → faiss/IndexBinaryFromFloat.cpp} +1 -0
  34. data/vendor/faiss/{IndexBinaryFromFloat.h → faiss/IndexBinaryFromFloat.h} +0 -0
  35. data/vendor/faiss/{IndexBinaryHNSW.cpp → faiss/IndexBinaryHNSW.cpp} +1 -2
  36. data/vendor/faiss/{IndexBinaryHNSW.h → faiss/IndexBinaryHNSW.h} +0 -0
  37. data/vendor/faiss/{IndexBinaryHash.cpp → faiss/IndexBinaryHash.cpp} +16 -7
  38. data/vendor/faiss/{IndexBinaryHash.h → faiss/IndexBinaryHash.h} +2 -1
  39. data/vendor/faiss/{IndexBinaryIVF.cpp → faiss/IndexBinaryIVF.cpp} +10 -16
  40. data/vendor/faiss/{IndexBinaryIVF.h → faiss/IndexBinaryIVF.h} +1 -1
  41. data/vendor/faiss/{IndexFlat.cpp → faiss/IndexFlat.cpp} +0 -0
  42. data/vendor/faiss/{IndexFlat.h → faiss/IndexFlat.h} +0 -0
  43. data/vendor/faiss/{IndexHNSW.cpp → faiss/IndexHNSW.cpp} +63 -32
  44. data/vendor/faiss/{IndexHNSW.h → faiss/IndexHNSW.h} +0 -0
  45. data/vendor/faiss/{IndexIVF.cpp → faiss/IndexIVF.cpp} +129 -46
  46. data/vendor/faiss/{IndexIVF.h → faiss/IndexIVF.h} +7 -3
  47. data/vendor/faiss/{IndexIVFFlat.cpp → faiss/IndexIVFFlat.cpp} +6 -5
  48. data/vendor/faiss/{IndexIVFFlat.h → faiss/IndexIVFFlat.h} +0 -0
  49. data/vendor/faiss/{IndexIVFPQ.cpp → faiss/IndexIVFPQ.cpp} +9 -8
  50. data/vendor/faiss/{IndexIVFPQ.h → faiss/IndexIVFPQ.h} +4 -2
  51. data/vendor/faiss/{IndexIVFPQR.cpp → faiss/IndexIVFPQR.cpp} +3 -1
  52. data/vendor/faiss/{IndexIVFPQR.h → faiss/IndexIVFPQR.h} +0 -0
  53. data/vendor/faiss/{IndexIVFSpectralHash.cpp → faiss/IndexIVFSpectralHash.cpp} +1 -1
  54. data/vendor/faiss/{IndexIVFSpectralHash.h → faiss/IndexIVFSpectralHash.h} +0 -0
  55. data/vendor/faiss/{IndexLSH.cpp → faiss/IndexLSH.cpp} +0 -0
  56. data/vendor/faiss/{IndexLSH.h → faiss/IndexLSH.h} +0 -0
  57. data/vendor/faiss/{IndexLattice.cpp → faiss/IndexLattice.cpp} +0 -0
  58. data/vendor/faiss/{IndexLattice.h → faiss/IndexLattice.h} +0 -0
  59. data/vendor/faiss/{IndexPQ.cpp → faiss/IndexPQ.cpp} +6 -6
  60. data/vendor/faiss/{IndexPQ.h → faiss/IndexPQ.h} +3 -1
  61. data/vendor/faiss/{IndexPreTransform.cpp → faiss/IndexPreTransform.cpp} +0 -0
  62. data/vendor/faiss/{IndexPreTransform.h → faiss/IndexPreTransform.h} +0 -0
  63. data/vendor/faiss/{IndexReplicas.cpp → faiss/IndexReplicas.cpp} +102 -10
  64. data/vendor/faiss/{IndexReplicas.h → faiss/IndexReplicas.h} +6 -0
  65. data/vendor/faiss/{IndexScalarQuantizer.cpp → faiss/IndexScalarQuantizer.cpp} +3 -3
  66. data/vendor/faiss/{IndexScalarQuantizer.h → faiss/IndexScalarQuantizer.h} +0 -0
  67. data/vendor/faiss/{IndexShards.cpp → faiss/IndexShards.cpp} +37 -12
  68. data/vendor/faiss/{IndexShards.h → faiss/IndexShards.h} +3 -4
  69. data/vendor/faiss/{InvertedLists.cpp → faiss/InvertedLists.cpp} +2 -2
  70. data/vendor/faiss/{InvertedLists.h → faiss/InvertedLists.h} +1 -0
  71. data/vendor/faiss/{MatrixStats.cpp → faiss/MatrixStats.cpp} +0 -0
  72. data/vendor/faiss/{MatrixStats.h → faiss/MatrixStats.h} +0 -0
  73. data/vendor/faiss/{MetaIndexes.cpp → faiss/MetaIndexes.cpp} +5 -3
  74. data/vendor/faiss/{MetaIndexes.h → faiss/MetaIndexes.h} +0 -0
  75. data/vendor/faiss/{MetricType.h → faiss/MetricType.h} +0 -0
  76. data/vendor/faiss/{OnDiskInvertedLists.cpp → faiss/OnDiskInvertedLists.cpp} +141 -3
  77. data/vendor/faiss/{OnDiskInvertedLists.h → faiss/OnDiskInvertedLists.h} +27 -7
  78. data/vendor/faiss/{VectorTransform.cpp → faiss/VectorTransform.cpp} +4 -3
  79. data/vendor/faiss/{VectorTransform.h → faiss/VectorTransform.h} +0 -0
  80. data/vendor/faiss/{clone_index.cpp → faiss/clone_index.cpp} +0 -0
  81. data/vendor/faiss/{clone_index.h → faiss/clone_index.h} +0 -0
  82. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.cpp +0 -0
  83. data/vendor/faiss/{gpu → faiss/gpu}/GpuAutoTune.h +0 -0
  84. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.cpp +14 -14
  85. data/vendor/faiss/{gpu → faiss/gpu}/GpuCloner.h +6 -7
  86. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.cpp +0 -0
  87. data/vendor/faiss/{gpu → faiss/gpu}/GpuClonerOptions.h +0 -0
  88. data/vendor/faiss/{gpu → faiss/gpu}/GpuDistance.h +12 -4
  89. data/vendor/faiss/{gpu → faiss/gpu}/GpuFaissAssert.h +0 -0
  90. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndex.h +3 -9
  91. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexBinaryFlat.h +7 -7
  92. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexFlat.h +35 -10
  93. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVF.h +1 -2
  94. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFFlat.h +4 -3
  95. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFPQ.h +21 -4
  96. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndexIVFScalarQuantizer.h +4 -3
  97. data/vendor/faiss/{gpu → faiss/gpu}/GpuIndicesOptions.h +0 -0
  98. data/vendor/faiss/faiss/gpu/GpuResources.cpp +200 -0
  99. data/vendor/faiss/faiss/gpu/GpuResources.h +264 -0
  100. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +572 -0
  101. data/vendor/faiss/{gpu → faiss/gpu}/StandardGpuResources.h +83 -15
  102. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.cpp +0 -0
  103. data/vendor/faiss/{gpu → faiss/gpu}/impl/RemapIndices.h +0 -0
  104. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper-inl.h +1 -1
  105. data/vendor/faiss/{gpu → faiss/gpu}/perf/IndexWrapper.h +1 -1
  106. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfClustering.cpp +1 -1
  107. data/vendor/faiss/{gpu → faiss/gpu}/perf/PerfIVFPQAdd.cpp +0 -0
  108. data/vendor/faiss/{gpu → faiss/gpu}/perf/WriteIndex.cpp +0 -0
  109. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexBinaryFlat.cpp +0 -0
  110. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexFlat.cpp +1 -1
  111. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFFlat.cpp +0 -0
  112. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuIndexIVFPQ.cpp +141 -52
  113. data/vendor/faiss/{gpu → faiss/gpu}/test/TestGpuMemoryException.cpp +0 -0
  114. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.cpp +4 -2
  115. data/vendor/faiss/{gpu → faiss/gpu}/test/TestUtils.h +0 -0
  116. data/vendor/faiss/{gpu → faiss/gpu}/test/demo_ivfpq_indexing_gpu.cpp +7 -5
  117. data/vendor/faiss/{gpu → faiss/gpu}/utils/DeviceUtils.h +1 -1
  118. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +213 -0
  119. data/vendor/faiss/{gpu → faiss/gpu}/utils/StackDeviceMemory.h +25 -40
  120. data/vendor/faiss/{gpu → faiss/gpu}/utils/StaticUtils.h +0 -0
  121. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.cpp +0 -0
  122. data/vendor/faiss/{gpu → faiss/gpu}/utils/Timer.h +0 -0
  123. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.cpp +1 -0
  124. data/vendor/faiss/{impl → faiss/impl}/AuxIndexStructures.h +3 -1
  125. data/vendor/faiss/{impl → faiss/impl}/FaissAssert.h +1 -0
  126. data/vendor/faiss/{impl → faiss/impl}/FaissException.cpp +26 -0
  127. data/vendor/faiss/{impl → faiss/impl}/FaissException.h +4 -0
  128. data/vendor/faiss/{impl → faiss/impl}/HNSW.cpp +26 -26
  129. data/vendor/faiss/{impl → faiss/impl}/HNSW.h +19 -11
  130. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.cpp +1 -1
  131. data/vendor/faiss/{impl → faiss/impl}/PolysemousTraining.h +1 -1
  132. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer-inl.h +0 -1
  133. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.cpp +9 -9
  134. data/vendor/faiss/{impl → faiss/impl}/ProductQuantizer.h +0 -0
  135. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.cpp +63 -39
  136. data/vendor/faiss/{impl → faiss/impl}/ScalarQuantizer.h +1 -1
  137. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex-inl.h +0 -0
  138. data/vendor/faiss/{impl → faiss/impl}/ThreadedIndex.h +0 -0
  139. data/vendor/faiss/{impl → faiss/impl}/index_read.cpp +99 -116
  140. data/vendor/faiss/{impl → faiss/impl}/index_write.cpp +15 -50
  141. data/vendor/faiss/{impl → faiss/impl}/io.cpp +15 -10
  142. data/vendor/faiss/{impl → faiss/impl}/io.h +22 -8
  143. data/vendor/faiss/faiss/impl/io_macros.h +57 -0
  144. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.cpp +52 -36
  145. data/vendor/faiss/{impl → faiss/impl}/lattice_Zn.h +3 -3
  146. data/vendor/faiss/faiss/impl/platform_macros.h +24 -0
  147. data/vendor/faiss/{index_factory.cpp → faiss/index_factory.cpp} +33 -12
  148. data/vendor/faiss/{index_factory.h → faiss/index_factory.h} +0 -0
  149. data/vendor/faiss/{index_io.h → faiss/index_io.h} +55 -1
  150. data/vendor/faiss/faiss/python/python_callbacks.cpp +112 -0
  151. data/vendor/faiss/faiss/python/python_callbacks.h +45 -0
  152. data/vendor/faiss/{utils → faiss/utils}/Heap.cpp +5 -5
  153. data/vendor/faiss/{utils → faiss/utils}/Heap.h +1 -3
  154. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.cpp +0 -0
  155. data/vendor/faiss/{utils → faiss/utils}/WorkerThread.h +0 -0
  156. data/vendor/faiss/{utils → faiss/utils}/distances.cpp +28 -13
  157. data/vendor/faiss/{utils → faiss/utils}/distances.h +2 -1
  158. data/vendor/faiss/{utils → faiss/utils}/distances_simd.cpp +5 -5
  159. data/vendor/faiss/{utils → faiss/utils}/extra_distances.cpp +8 -7
  160. data/vendor/faiss/{utils → faiss/utils}/extra_distances.h +0 -0
  161. data/vendor/faiss/{utils → faiss/utils}/hamming-inl.h +1 -3
  162. data/vendor/faiss/{utils → faiss/utils}/hamming.cpp +8 -7
  163. data/vendor/faiss/{utils → faiss/utils}/hamming.h +7 -1
  164. data/vendor/faiss/{utils → faiss/utils}/random.cpp +5 -5
  165. data/vendor/faiss/{utils → faiss/utils}/random.h +0 -0
  166. data/vendor/faiss/{utils → faiss/utils}/utils.cpp +27 -28
  167. data/vendor/faiss/{utils → faiss/utils}/utils.h +4 -0
  168. data/vendor/faiss/misc/test_blas.cpp +4 -1
  169. data/vendor/faiss/tests/test_binary_flat.cpp +0 -2
  170. data/vendor/faiss/tests/test_dealloc_invlists.cpp +6 -1
  171. data/vendor/faiss/tests/test_ivfpq_codec.cpp +4 -1
  172. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +6 -4
  173. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +12 -5
  174. data/vendor/faiss/tests/test_merge.cpp +6 -3
  175. data/vendor/faiss/tests/test_ondisk_ivf.cpp +7 -2
  176. data/vendor/faiss/tests/test_pairs_decoding.cpp +5 -1
  177. data/vendor/faiss/tests/test_params_override.cpp +7 -2
  178. data/vendor/faiss/tests/test_sliding_ivf.cpp +10 -4
  179. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +14 -8
  180. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +11 -7
  181. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +12 -7
  182. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +6 -3
  183. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +7 -3
  184. metadata +154 -153
  185. data/vendor/faiss/gpu/GpuResources.cpp +0 -52
  186. data/vendor/faiss/gpu/GpuResources.h +0 -73
  187. data/vendor/faiss/gpu/StandardGpuResources.cpp +0 -303
  188. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +0 -77
  189. data/vendor/faiss/gpu/utils/DeviceMemory.h +0 -71
  190. data/vendor/faiss/gpu/utils/MemorySpace.cpp +0 -89
  191. data/vendor/faiss/gpu/utils/MemorySpace.h +0 -44
  192. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +0 -239
@@ -11,18 +11,20 @@
11
11
  #include <faiss/gpu/GpuResources.h>
12
12
  #include <faiss/gpu/utils/StackDeviceMemory.h>
13
13
  #include <faiss/gpu/utils/DeviceUtils.h>
14
+ #include <functional>
15
+ #include <map>
14
16
  #include <unordered_map>
15
17
  #include <vector>
16
18
 
17
19
  namespace faiss { namespace gpu {
18
20
 
19
- /// Default implementation of GpuResources that allocates a cuBLAS
20
- /// stream and 2 streams for use, as well as temporary memory
21
- class StandardGpuResources : public GpuResources {
21
+ /// Standard implementation of the GpuResources object that provides for a
22
+ /// temporary memory manager
23
+ class StandardGpuResourcesImpl : public GpuResources {
22
24
  public:
23
- StandardGpuResources();
25
+ StandardGpuResourcesImpl();
24
26
 
25
- ~StandardGpuResources() override;
27
+ ~StandardGpuResourcesImpl() override;
26
28
 
27
29
  /// Disable allocation of temporary memory; all temporary memory
28
30
  /// requests will call cudaMalloc / cudaFree at the point of use
@@ -46,9 +48,9 @@ class StandardGpuResources : public GpuResources {
46
48
  /// for all devices
47
49
  void setDefaultNullStreamAllDevices();
48
50
 
49
- /// Enable or disable the warning about not having enough temporary memory
50
- /// when cudaMalloc gets called
51
- void setCudaMallocWarning(bool b);
51
+ /// If enabled, will print every GPU memory allocation and deallocation to
52
+ /// standard output
53
+ void setLogMemoryAllocations(bool enable);
52
54
 
53
55
  public:
54
56
  /// Internal system calls
@@ -62,7 +64,17 @@ class StandardGpuResources : public GpuResources {
62
64
 
63
65
  std::vector<cudaStream_t> getAlternateStreams(int device) override;
64
66
 
65
- DeviceMemory& getMemoryManager(int device) override;
67
+ /// Allocate non-temporary GPU memory
68
+ void* allocMemory(const AllocRequest& req) override;
69
+
70
+ /// Returns a previous allocation
71
+ void deallocMemory(int device, void* in) override;
72
+
73
+ size_t getTempMemoryAvailable(int device) const override;
74
+
75
+ /// Export a description of memory used for Python
76
+ std::map<int, std::map<std::string, std::pair<int, size_t>>>
77
+ getMemoryInfo() const;
66
78
 
67
79
  std::pair<void*, size_t> getPinnedMemory() override;
68
80
 
@@ -77,6 +89,13 @@ class StandardGpuResources : public GpuResources {
77
89
  static size_t getDefaultTempMemForGPU(int device, size_t requested);
78
90
 
79
91
  private:
92
+ /// Set of currently outstanding memory allocations per device
93
+ /// device -> (alloc request, allocated ptr)
94
+ std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
95
+
96
+ /// Temporary memory provider, per each device
97
+ std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> tempMemory_;
98
+
80
99
  /// Our default stream that work is ordered on, one per each device
81
100
  std::unordered_map<int, cudaStream_t> defaultStreams_;
82
101
 
@@ -85,7 +104,7 @@ class StandardGpuResources : public GpuResources {
85
104
  std::unordered_map<int, cudaStream_t> userDefaultStreams_;
86
105
 
87
106
  /// Other streams we can use, per each device
88
- std::unordered_map<int, std::vector<cudaStream_t> > alternateStreams_;
107
+ std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
89
108
 
90
109
  /// Async copy stream to use for GPU <-> CPU pinned memory copies
91
110
  std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
@@ -93,9 +112,6 @@ class StandardGpuResources : public GpuResources {
93
112
  /// cuBLAS handle for each device
94
113
  std::unordered_map<int, cublasHandle_t> blasHandles_;
95
114
 
96
- /// Temporary memory provider, per each device
97
- std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
98
-
99
115
  /// Pinned memory allocation for use with this GPU
100
116
  void* pinnedMemAlloc_;
101
117
  size_t pinnedMemAllocSize_;
@@ -107,8 +123,60 @@ class StandardGpuResources : public GpuResources {
107
123
  /// Amount of pinned memory we should allocate
108
124
  size_t pinnedMemSize_;
109
125
 
110
- /// Whether or not a warning upon cudaMalloc is generated
111
- bool cudaMallocWarning_;
126
+ /// Whether or not we log every GPU memory allocation and deallocation
127
+ bool allocLogging_;
128
+ };
129
+
130
+ /// Default implementation of GpuResources that allocates a cuBLAS
131
+ /// stream and 2 streams for use, as well as temporary memory
132
+ class StandardGpuResources : public GpuResourcesProvider {
133
+ public:
134
+ StandardGpuResources();
135
+ ~StandardGpuResources() override;
136
+
137
+ std::shared_ptr<GpuResources> getResources() override;
138
+
139
+ /// Disable allocation of temporary memory; all temporary memory
140
+ /// requests will call cudaMalloc / cudaFree at the point of use
141
+ void noTempMemory();
142
+
143
+ /// Specify that we wish to use a certain fixed size of memory on
144
+ /// all devices as temporary memory. This is the upper bound for the GPU
145
+ /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
146
+ /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
147
+ /// To avoid any temporary memory allocation, pass 0.
148
+ void setTempMemory(size_t size);
149
+
150
+ /// Set amount of pinned memory to allocate, for async GPU <-> CPU
151
+ /// transfers
152
+ void setPinnedMemory(size_t size);
153
+
154
+ /// Called to change the stream for work ordering
155
+ void setDefaultStream(int device, cudaStream_t stream);
156
+
157
+ /// Called to change the work ordering streams to the null stream
158
+ /// for all devices
159
+ void setDefaultNullStreamAllDevices();
160
+
161
+ /// Export a description of memory used for Python
162
+ std::map<int, std::map<std::string, std::pair<int, size_t>>>
163
+ getMemoryInfo() const;
164
+
165
+ /// Returns the current default stream
166
+ cudaStream_t getDefaultStream(int device);
167
+
168
+ /// Returns the current amount of temp memory available
169
+ size_t getTempMemoryAvailable(int device) const;
170
+
171
+ /// Synchronize our default stream with the CPU
172
+ void syncDefaultStreamCurrentDevice();
173
+
174
+ /// If enabled, will print every GPU memory allocation and deallocation to
175
+ /// standard output
176
+ void setLogMemoryAllocations(bool enable);
177
+
178
+ private:
179
+ std::shared_ptr<StandardGpuResourcesImpl> res_;
112
180
  };
113
181
 
114
182
  } } // namespace
@@ -13,7 +13,7 @@ namespace faiss { namespace gpu {
13
13
  template <typename GpuIndex>
14
14
  IndexWrapper<GpuIndex>::IndexWrapper(
15
15
  int numGpus,
16
- std::function<std::unique_ptr<GpuIndex>(GpuResources*, int)> init) {
16
+ std::function<std::unique_ptr<GpuIndex>(GpuResourcesProvider*, int)> init) {
17
17
  FAISS_ASSERT(numGpus <= faiss::gpu::getNumDevices());
18
18
  for (int i = 0; i < numGpus; ++i) {
19
19
  auto res = std::unique_ptr<faiss::gpu::StandardGpuResources>(
@@ -27,7 +27,7 @@ struct IndexWrapper {
27
27
 
28
28
  IndexWrapper(
29
29
  int numGpus,
30
- std::function<std::unique_ptr<GpuIndex>(GpuResources*, int)> init);
30
+ std::function<std::unique_ptr<GpuIndex>(GpuResourcesProvider*, int)> init);
31
31
  faiss::Index* getIndex();
32
32
 
33
33
  void runOnIndices(std::function<void(GpuIndex*)> f);
@@ -53,7 +53,7 @@ int main(int argc, char** argv) {
53
53
  printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
54
54
  printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
55
55
 
56
- auto initFn = [](faiss::gpu::GpuResources* res, int dev) ->
56
+ auto initFn = [](faiss::gpu::GpuResourcesProvider* res, int dev) ->
57
57
  std::unique_ptr<faiss::gpu::GpuIndexFlat> {
58
58
  if (FLAGS_pinned_mem >= 0) {
59
59
  ((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
@@ -277,7 +277,7 @@ TEST(TestGpuIndexFlat, CopyFrom) {
277
277
  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
278
278
 
279
279
  faiss::gpu::GpuIndexFlatConfig config;
280
- config.device = 0;
280
+ config.device = device;
281
281
  config.useFloat16 = false;
282
282
  config.storeTransposed = false;
283
283
 
@@ -51,7 +51,7 @@ struct Options {
51
51
  // support non-multiple of 8 subcodes for IVFPQ.
52
52
  bitsPerCode = 8;
53
53
  nprobe = std::min(faiss::gpu::randVal(40, 1000), numCentroids);
54
- numQuery = faiss::gpu::randVal(1, 8);
54
+ numQuery = faiss::gpu::randVal(4, 8);
55
55
 
56
56
  // Due to the approximate nature of the query and of floating point
57
57
  // differences between GPU and CPU, to stay within our error bounds, only
@@ -91,7 +91,7 @@ struct Options {
91
91
  }
92
92
 
93
93
  float getCompareEpsilon() const {
94
- return 0.03f;
94
+ return 0.035f;
95
95
  }
96
96
 
97
97
  float getPctMaxDiff1() const {
@@ -131,12 +131,12 @@ TEST(TestGpuIndexIVFPQ, Query_L2) {
131
131
  cpuIndex.train(opt.numTrain, trainVecs.data());
132
132
  cpuIndex.add(opt.numAdd, addVecs.data());
133
133
 
134
+ // Use the default temporary memory management to test the memory manager
134
135
  faiss::gpu::StandardGpuResources res;
135
- res.noTempMemory();
136
136
 
137
137
  faiss::gpu::GpuIndexIVFPQConfig config;
138
138
  config.device = opt.device;
139
- config.usePrecomputedTables = opt.usePrecomputed;
139
+ config.usePrecomputedTables = (tries % 2 == 0);
140
140
  config.indicesOptions = opt.indicesOpt;
141
141
  config.useFloat16LookupTables = opt.useFloat16;
142
142
 
@@ -151,6 +151,93 @@ TEST(TestGpuIndexIVFPQ, Query_L2) {
151
151
  }
152
152
  }
153
153
 
154
+ void testMMCodeDistance(faiss::MetricType mt) {
155
+ // Explicitly test the code distance via batch matrix multiplication route
156
+ // (even for dimension sizes that would otherwise be handled by the
157
+ // specialized route (via enabling `useMMCodeDistance`)
158
+ for (int tries = 0; tries < 2; ++tries) {
159
+ Options opt;
160
+
161
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
162
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
163
+
164
+ faiss::IndexFlat coarseQuantizer(opt.dim, mt);
165
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
166
+ opt.codes, opt.bitsPerCode);
167
+ cpuIndex.nprobe = opt.nprobe;
168
+ cpuIndex.train(opt.numTrain, trainVecs.data());
169
+ cpuIndex.add(opt.numAdd, addVecs.data());
170
+
171
+ // Use the default temporary memory management to test the memory manager
172
+ faiss::gpu::StandardGpuResources res;
173
+
174
+ faiss::gpu::GpuIndexIVFPQConfig config;
175
+ config.device = opt.device;
176
+ config.usePrecomputedTables = false;
177
+ config.useMMCodeDistance = true;
178
+ config.indicesOptions = opt.indicesOpt;
179
+
180
+ // Make sure that the float16 version works as well
181
+ config.useFloat16LookupTables = (tries % 2 == 0);
182
+ config.flatConfig.useFloat16 = (tries % 2 == 1);
183
+
184
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
185
+ gpuIndex.setNumProbes(opt.nprobe);
186
+
187
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
188
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
189
+ opt.getCompareEpsilon(),
190
+ opt.getPctMaxDiff1(),
191
+ opt.getPctMaxDiffN());
192
+ }
193
+
194
+ // These sizes are not specialized, they will fall back to the MM version
195
+ for (int dimPerSubQ : {7, 11}) {
196
+ Options opt;
197
+
198
+ opt.codes = 12;
199
+ opt.dim = dimPerSubQ * opt.codes;
200
+
201
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
202
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
203
+
204
+ faiss::IndexFlat coarseQuantizer(opt.dim, mt);
205
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, opt.dim, opt.numCentroids,
206
+ opt.codes, opt.bitsPerCode);
207
+ cpuIndex.nprobe = opt.nprobe;
208
+ cpuIndex.train(opt.numTrain, trainVecs.data());
209
+ cpuIndex.add(opt.numAdd, addVecs.data());
210
+
211
+ // Use the default temporary memory management to test the memory manager
212
+ faiss::gpu::StandardGpuResources res;
213
+
214
+ faiss::gpu::GpuIndexIVFPQConfig config;
215
+ config.device = opt.device;
216
+ config.usePrecomputedTables = false;
217
+ config.indicesOptions = opt.indicesOpt;
218
+
219
+ // Make sure that the float16 version works as well
220
+ config.useFloat16LookupTables = (dimPerSubQ == 7);
221
+
222
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
223
+ gpuIndex.setNumProbes(opt.nprobe);
224
+
225
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
226
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
227
+ opt.getCompareEpsilon(),
228
+ opt.getPctMaxDiff1(),
229
+ opt.getPctMaxDiffN());
230
+ }
231
+ }
232
+
233
+ TEST(TestGpuIndexIVFPQ, Query_L2_MMCodeDistance) {
234
+ testMMCodeDistance(faiss::MetricType::METRIC_L2);
235
+ }
236
+
237
+ TEST(TestGpuIndexIVFPQ, Query_IP_MMCodeDistance) {
238
+ testMMCodeDistance(faiss::MetricType::METRIC_INNER_PRODUCT);
239
+ }
240
+
154
241
  TEST(TestGpuIndexIVFPQ, Query_IP) {
155
242
  for (int tries = 0; tries < 2; ++tries) {
156
243
  Options opt;
@@ -167,8 +254,8 @@ TEST(TestGpuIndexIVFPQ, Query_IP) {
167
254
  cpuIndex.train(opt.numTrain, trainVecs.data());
168
255
  cpuIndex.add(opt.numAdd, addVecs.data());
169
256
 
257
+ // Use the default temporary memory management to test the memory manager
170
258
  faiss::gpu::StandardGpuResources res;
171
- res.noTempMemory();
172
259
 
173
260
  faiss::gpu::GpuIndexIVFPQConfig config;
174
261
  config.device = opt.device;
@@ -199,8 +286,8 @@ TEST(TestGpuIndexIVFPQ, Float16Coarse) {
199
286
  cpuIndex.nprobe = opt.nprobe;
200
287
  cpuIndex.train(opt.numTrain, trainVecs.data());
201
288
 
289
+ // Use the default temporary memory management to test the memory manager
202
290
  faiss::gpu::StandardGpuResources res;
203
- res.noTempMemory();
204
291
 
205
292
  faiss::gpu::GpuIndexIVFPQConfig config;
206
293
  config.device = opt.device;
@@ -235,8 +322,8 @@ TEST(TestGpuIndexIVFPQ, Add_L2) {
235
322
  cpuIndex.nprobe = opt.nprobe;
236
323
  cpuIndex.train(opt.numTrain, trainVecs.data());
237
324
 
325
+ // Use the default temporary memory management to test the memory manager
238
326
  faiss::gpu::StandardGpuResources res;
239
- res.noTempMemory();
240
327
 
241
328
  faiss::gpu::GpuIndexIVFPQConfig config;
242
329
  config.device = opt.device;
@@ -272,8 +359,8 @@ TEST(TestGpuIndexIVFPQ, Add_IP) {
272
359
  cpuIndex.nprobe = opt.nprobe;
273
360
  cpuIndex.train(opt.numTrain, trainVecs.data());
274
361
 
362
+ // Use the default temporary memory management to test the memory manager
275
363
  faiss::gpu::StandardGpuResources res;
276
- res.noTempMemory();
277
364
 
278
365
  faiss::gpu::GpuIndexIVFPQConfig config;
279
366
  config.device = opt.device;
@@ -296,54 +383,56 @@ TEST(TestGpuIndexIVFPQ, Add_IP) {
296
383
  }
297
384
 
298
385
  TEST(TestGpuIndexIVFPQ, CopyTo) {
299
- Options opt;
300
- std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
301
- std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
386
+ for (int tries = 0; tries < 2; ++tries) {
387
+ Options opt;
388
+ std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
389
+ std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
302
390
 
303
- faiss::gpu::StandardGpuResources res;
304
- res.noTempMemory();
391
+ // Use the default temporary memory management to test the memory manager
392
+ faiss::gpu::StandardGpuResources res;
305
393
 
306
- faiss::gpu::GpuIndexIVFPQConfig config;
307
- config.device = opt.device;
308
- config.usePrecomputedTables = opt.usePrecomputed;
309
- config.indicesOptions = opt.indicesOpt;
310
- config.useFloat16LookupTables = opt.useFloat16;
394
+ faiss::gpu::GpuIndexIVFPQConfig config;
395
+ config.device = opt.device;
396
+ config.usePrecomputedTables = (tries % 2 == 0);
397
+ config.indicesOptions = opt.indicesOpt;
398
+ config.useFloat16LookupTables = opt.useFloat16;
311
399
 
312
- faiss::gpu::GpuIndexIVFPQ gpuIndex(&res,
313
- opt.dim,
314
- opt.numCentroids,
315
- opt.codes,
316
- opt.bitsPerCode,
317
- faiss::METRIC_L2,
318
- config);
319
- gpuIndex.setNumProbes(opt.nprobe);
320
- gpuIndex.train(opt.numTrain, trainVecs.data());
321
- gpuIndex.add(opt.numAdd, addVecs.data());
400
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(&res,
401
+ opt.dim,
402
+ opt.numCentroids,
403
+ opt.codes,
404
+ opt.bitsPerCode,
405
+ faiss::METRIC_L2,
406
+ config);
407
+ gpuIndex.setNumProbes(opt.nprobe);
408
+ gpuIndex.train(opt.numTrain, trainVecs.data());
409
+ gpuIndex.add(opt.numAdd, addVecs.data());
322
410
 
323
- // Use garbage values to see if we overwrite them
324
- faiss::IndexFlatL2 cpuQuantizer(1);
325
- faiss::IndexIVFPQ cpuIndex(&cpuQuantizer, 1, 1, 1, 1);
411
+ // Use garbage values to see if we overwrite them
412
+ faiss::IndexFlatL2 cpuQuantizer(1);
413
+ faiss::IndexIVFPQ cpuIndex(&cpuQuantizer, 1, 1, 1, 1);
326
414
 
327
- gpuIndex.copyTo(&cpuIndex);
415
+ gpuIndex.copyTo(&cpuIndex);
328
416
 
329
- EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
330
- EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
417
+ EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
418
+ EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
331
419
 
332
- EXPECT_EQ(cpuIndex.d, gpuIndex.d);
333
- EXPECT_EQ(cpuIndex.d, opt.dim);
334
- EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
335
- EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
336
- EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers());
337
- EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes);
338
- EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
339
- EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
420
+ EXPECT_EQ(cpuIndex.d, gpuIndex.d);
421
+ EXPECT_EQ(cpuIndex.d, opt.dim);
422
+ EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
423
+ EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
424
+ EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers());
425
+ EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes);
426
+ EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
427
+ EXPECT_EQ(gpuIndex.getBitsPerCode(), opt.bitsPerCode);
340
428
 
341
- // Query both objects; results should be equivalent
342
- faiss::gpu::compareIndices(cpuIndex, gpuIndex,
343
- opt.numQuery, opt.dim, opt.k, opt.toString(),
344
- opt.getCompareEpsilon(),
345
- opt.getPctMaxDiff1(),
346
- opt.getPctMaxDiffN());
429
+ // Query both objects; results should be equivalent
430
+ faiss::gpu::compareIndices(cpuIndex, gpuIndex,
431
+ opt.numQuery, opt.dim, opt.k, opt.toString(),
432
+ opt.getCompareEpsilon(),
433
+ opt.getPctMaxDiff1(),
434
+ opt.getPctMaxDiffN());
435
+ }
347
436
  }
348
437
 
349
438
  TEST(TestGpuIndexIVFPQ, CopyFrom) {
@@ -358,9 +447,8 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
358
447
  cpuIndex.train(opt.numTrain, trainVecs.data());
359
448
  cpuIndex.add(opt.numAdd, addVecs.data());
360
449
 
361
- // Use garbage values to see if we overwrite them
450
+ // Use the default temporary memory management to test the memory manager
362
451
  faiss::gpu::StandardGpuResources res;
363
- res.noTempMemory();
364
452
 
365
453
  faiss::gpu::GpuIndexIVFPQConfig config;
366
454
  config.device = opt.device;
@@ -368,6 +456,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
368
456
  config.indicesOptions = opt.indicesOpt;
369
457
  config.useFloat16LookupTables = opt.useFloat16;
370
458
 
459
+ // Use garbage values to see if we overwrite them
371
460
  faiss::gpu::GpuIndexIVFPQ
372
461
  gpuIndex(&res, 1, 1, 1, 1, faiss::METRIC_L2, config);
373
462
  gpuIndex.setNumProbes(1);
@@ -401,8 +490,8 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
401
490
  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
402
491
  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
403
492
 
493
+ // Use the default temporary memory management to test the memory manager
404
494
  faiss::gpu::StandardGpuResources res;
405
- res.noTempMemory();
406
495
 
407
496
  faiss::gpu::GpuIndexIVFPQConfig config;
408
497
  config.device = opt.device;
@@ -447,8 +536,8 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
447
536
  TEST(TestGpuIndexIVFPQ, AddNaN) {
448
537
  Options opt;
449
538
 
539
+ // Use the default temporary memory management to test the memory manager
450
540
  faiss::gpu::StandardGpuResources res;
451
- res.noTempMemory();
452
541
 
453
542
  faiss::gpu::GpuIndexIVFPQConfig config;
454
543
  config.device = opt.device;