faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/gpu/GpuResources.h>
12
+ #include <faiss/gpu/utils/StackDeviceMemory.h>
13
+ #include <faiss/gpu/utils/DeviceUtils.h>
14
+ #include <unordered_map>
15
+ #include <vector>
16
+
17
+ namespace faiss { namespace gpu {
18
+
19
+ /// Default implementation of GpuResources that allocates a cuBLAS
20
+ /// stream and 2 streams for use, as well as temporary memory
21
+ class StandardGpuResources : public GpuResources {
22
+ public:
23
+ StandardGpuResources();
24
+
25
+ ~StandardGpuResources() override;
26
+
27
+ /// Disable allocation of temporary memory; all temporary memory
28
+ /// requests will call cudaMalloc / cudaFree at the point of use
29
+ void noTempMemory();
30
+
31
+ /// Specify that we wish to use a certain fixed size of memory on
32
+ /// all devices as temporary memory. This is the upper bound for the GPU
33
+ /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
34
+ /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
35
+ /// To avoid any temporary memory allocation, pass 0.
36
+ void setTempMemory(size_t size);
37
+
38
+ /// Set amount of pinned memory to allocate, for async GPU <-> CPU
39
+ /// transfers
40
+ void setPinnedMemory(size_t size);
41
+
42
+ /// Called to change the stream for work ordering
43
+ void setDefaultStream(int device, cudaStream_t stream);
44
+
45
+ /// Called to change the work ordering streams to the null stream
46
+ /// for all devices
47
+ void setDefaultNullStreamAllDevices();
48
+
49
+ /// Enable or disable the warning about not having enough temporary memory
50
+ /// when cudaMalloc gets called
51
+ void setCudaMallocWarning(bool b);
52
+
53
+ public:
54
+ /// Internal system calls
55
+
56
+ /// Initialize resources for this device
57
+ void initializeForDevice(int device) override;
58
+
59
+ cublasHandle_t getBlasHandle(int device) override;
60
+
61
+ cudaStream_t getDefaultStream(int device) override;
62
+
63
+ std::vector<cudaStream_t> getAlternateStreams(int device) override;
64
+
65
+ DeviceMemory& getMemoryManager(int device) override;
66
+
67
+ std::pair<void*, size_t> getPinnedMemory() override;
68
+
69
+ cudaStream_t getAsyncCopyStream(int device) override;
70
+
71
+ private:
72
+ /// Have GPU resources been initialized for this device yet?
73
+ bool isInitialized(int device) const;
74
+
75
+ /// Adjust the default temporary memory allocation based on the total GPU
76
+ /// memory size
77
+ static size_t getDefaultTempMemForGPU(int device, size_t requested);
78
+
79
+ private:
80
+ /// Our default stream that work is ordered on, one per each device
81
+ std::unordered_map<int, cudaStream_t> defaultStreams_;
82
+
83
+ /// This contains particular streams as set by the user for
84
+ /// ordering, if any
85
+ std::unordered_map<int, cudaStream_t> userDefaultStreams_;
86
+
87
+ /// Other streams we can use, per each device
88
+ std::unordered_map<int, std::vector<cudaStream_t> > alternateStreams_;
89
+
90
+ /// Async copy stream to use for GPU <-> CPU pinned memory copies
91
+ std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
92
+
93
+ /// cuBLAS handle for each device
94
+ std::unordered_map<int, cublasHandle_t> blasHandles_;
95
+
96
+ /// Temporary memory provider, per each device
97
+ std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
98
+
99
+ /// Pinned memory allocation for use with this GPU
100
+ void* pinnedMemAlloc_;
101
+ size_t pinnedMemAllocSize_;
102
+
103
+ /// Another option is to use a specified amount of memory on all
104
+ /// devices
105
+ size_t tempMemSize_;
106
+
107
+ /// Amount of pinned memory we should allocate
108
+ size_t pinnedMemSize_;
109
+
110
+ /// Whether or not a warning upon cudaMalloc is generated
111
+ bool cudaMallocWarning_;
112
+ };
113
+
114
+ } } // namespace
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/impl/RemapIndices.h>
10
+ #include <faiss/impl/FaissAssert.h>
11
+
12
+ namespace faiss { namespace gpu {
13
+
14
+ // Utility function to translate (list id, offset) to a user index on
15
+ // the CPU. In a cpp in order to use OpenMP
16
+ void ivfOffsetToUserIndex(
17
+ long* indices,
18
+ int numLists,
19
+ int queries,
20
+ int k,
21
+ const std::vector<std::vector<long>>& listOffsetToUserIndex) {
22
+ FAISS_ASSERT(numLists == listOffsetToUserIndex.size());
23
+
24
+ #pragma omp parallel for
25
+ for (int q = 0; q < queries; ++q) {
26
+ for (int r = 0; r < k; ++r) {
27
+ long offsetIndex = indices[q * k + r];
28
+
29
+ if (offsetIndex < 0) continue;
30
+
31
+ int listId = (int) (offsetIndex >> 32);
32
+ int listOffset = (int) (offsetIndex & 0xffffffff);
33
+
34
+ FAISS_ASSERT(listId < numLists);
35
+ auto& listIndices = listOffsetToUserIndex[listId];
36
+
37
+ FAISS_ASSERT(listOffset < listIndices.size());
38
+ indices[q * k + r] = listIndices[listOffset];
39
+ }
40
+ }
41
+ }
42
+
43
+ } } // namespace
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <vector>
12
+
13
+ namespace faiss { namespace gpu {
14
+
15
+ /// Utility function to translate (list id, offset) to a user index on
16
+ /// the CPU. In a cpp in order to use OpenMP.
17
+ void ivfOffsetToUserIndex(
18
+ long* indices,
19
+ int numLists,
20
+ int queries,
21
+ int k,
22
+ const std::vector<std::vector<long>>& listOffsetToUserIndex);
23
+
24
+ } } // namespace
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/impl/FaissAssert.h>
10
+
11
+ namespace faiss { namespace gpu {
12
+
13
+ template <typename GpuIndex>
14
+ IndexWrapper<GpuIndex>::IndexWrapper(
15
+ int numGpus,
16
+ std::function<std::unique_ptr<GpuIndex>(GpuResources*, int)> init) {
17
+ FAISS_ASSERT(numGpus <= faiss::gpu::getNumDevices());
18
+ for (int i = 0; i < numGpus; ++i) {
19
+ auto res = std::unique_ptr<faiss::gpu::StandardGpuResources>(
20
+ new StandardGpuResources);
21
+
22
+ subIndex.emplace_back(init(res.get(), i));
23
+ resources.emplace_back(std::move(res));
24
+ }
25
+
26
+ if (numGpus > 1) {
27
+ // create proxy
28
+ replicaIndex =
29
+ std::unique_ptr<faiss::IndexReplicas>(new faiss::IndexReplicas);
30
+
31
+ for (auto& index : subIndex) {
32
+ replicaIndex->addIndex(index.get());
33
+ }
34
+ }
35
+ }
36
+
37
+ template <typename GpuIndex>
38
+ faiss::Index*
39
+ IndexWrapper<GpuIndex>::getIndex() {
40
+ if ((bool) replicaIndex) {
41
+ return replicaIndex.get();
42
+ } else {
43
+ FAISS_ASSERT(!subIndex.empty());
44
+ return subIndex.front().get();
45
+ }
46
+ }
47
+
48
+ template <typename GpuIndex>
49
+ void
50
+ IndexWrapper<GpuIndex>::runOnIndices(std::function<void(GpuIndex*)> f) {
51
+
52
+ if ((bool) replicaIndex) {
53
+ replicaIndex->runOnIndex(
54
+ [f](int, faiss::Index* index) {
55
+ f(dynamic_cast<GpuIndex*>(index));
56
+ });
57
+ } else {
58
+ FAISS_ASSERT(!subIndex.empty());
59
+ f(subIndex.front().get());
60
+ }
61
+ }
62
+
63
+ template <typename GpuIndex>
64
+ void
65
+ IndexWrapper<GpuIndex>::setNumProbes(int nprobe) {
66
+ runOnIndices([nprobe](GpuIndex* index) {
67
+ index->setNumProbes(nprobe);
68
+ });
69
+ }
70
+
71
+ } }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/IndexReplicas.h>
12
+ #include <faiss/gpu/StandardGpuResources.h>
13
+ #include <functional>
14
+ #include <memory>
15
+ #include <vector>
16
+
17
+ namespace faiss { namespace gpu {
18
+
19
+ // If we want to run multi-GPU, create a proxy to wrap the indices.
20
+ // If we don't want multi-GPU, don't involve the proxy, so it doesn't
21
+ // affect the timings.
22
+ template <typename GpuIndex>
23
+ struct IndexWrapper {
24
+ std::vector<std::unique_ptr<faiss::gpu::StandardGpuResources>> resources;
25
+ std::vector<std::unique_ptr<GpuIndex>> subIndex;
26
+ std::unique_ptr<faiss::IndexReplicas> replicaIndex;
27
+
28
+ IndexWrapper(
29
+ int numGpus,
30
+ std::function<std::unique_ptr<GpuIndex>(GpuResources*, int)> init);
31
+ faiss::Index* getIndex();
32
+
33
+ void runOnIndices(std::function<void(GpuIndex*)> f);
34
+ void setNumProbes(int nprobe);
35
+ };
36
+
37
+ } }
38
+
39
+ #include <faiss/gpu/perf/IndexWrapper-inl.h>
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/utils/random.h>
10
+ #include <faiss/Clustering.h>
11
+ #include <faiss/gpu/GpuIndexFlat.h>
12
+ #include <faiss/gpu/StandardGpuResources.h>
13
+ #include <faiss/gpu/perf/IndexWrapper.h>
14
+ #include <faiss/gpu/utils/DeviceUtils.h>
15
+ #include <faiss/gpu/utils/Timer.h>
16
+ #include <gflags/gflags.h>
17
+ #include <memory>
18
+ #include <vector>
19
+
20
+ #include <cuda_profiler_api.h>
21
+
22
+ DEFINE_int32(num, 10000, "# of vecs");
23
+ DEFINE_int32(k, 100, "# of clusters");
24
+ DEFINE_int32(dim, 128, "# of dimensions");
25
+ DEFINE_int32(niter, 10, "# of iterations");
26
+ DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric");
27
+ DEFINE_bool(use_float16, false, "use float16 vectors and math");
28
+ DEFINE_bool(transposed, false, "transposed vector storage");
29
+ DEFINE_bool(verbose, false, "turn on clustering logging");
30
+ DEFINE_int64(seed, -1, "specify random seed");
31
+ DEFINE_int32(num_gpus, 1, "number of gpus to use");
32
+ DEFINE_int64(min_paging_size, -1, "minimum size to use CPU -> GPU paged copies");
33
+ DEFINE_int64(pinned_mem, -1, "pinned memory allocation to use");
34
+ DEFINE_int32(max_points, -1, "max points per centroid");
35
+
36
+ using namespace faiss::gpu;
37
+
38
+ int main(int argc, char** argv) {
39
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
40
+
41
+ cudaProfilerStop();
42
+
43
+ auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
44
+ printf("using seed %ld\n", seed);
45
+
46
+ std::vector<float> vecs((size_t) FLAGS_num * FLAGS_dim);
47
+ faiss::float_rand(vecs.data(), vecs.size(), seed);
48
+
49
+ printf("K-means metric %s dim %d centroids %d num train %d niter %d\n",
50
+ FLAGS_L2_metric ? "L2" : "IP",
51
+ FLAGS_dim, FLAGS_k, FLAGS_num, FLAGS_niter);
52
+ printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
53
+ printf("transposed storage %s\n", FLAGS_transposed ? "enabled" : "disabled");
54
+ printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
55
+
56
+ auto initFn = [](faiss::gpu::GpuResources* res, int dev) ->
57
+ std::unique_ptr<faiss::gpu::GpuIndexFlat> {
58
+ if (FLAGS_pinned_mem >= 0) {
59
+ ((faiss::gpu::StandardGpuResources*) res)->setPinnedMemory(
60
+ FLAGS_pinned_mem);
61
+ }
62
+
63
+ GpuIndexFlatConfig config;
64
+ config.device = dev;
65
+ config.useFloat16 = FLAGS_use_float16;
66
+ config.storeTransposed = FLAGS_transposed;
67
+
68
+ auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
69
+ FLAGS_L2_metric ?
70
+ (faiss::gpu::GpuIndexFlat*)
71
+ new faiss::gpu::GpuIndexFlatL2(res, FLAGS_dim, config) :
72
+ (faiss::gpu::GpuIndexFlat*)
73
+ new faiss::gpu::GpuIndexFlatIP(res, FLAGS_dim, config));
74
+
75
+ if (FLAGS_min_paging_size >= 0) {
76
+ p->setMinPagingSize(FLAGS_min_paging_size);
77
+ }
78
+ return p;
79
+ };
80
+
81
+ IndexWrapper<faiss::gpu::GpuIndexFlat> gpuIndex(FLAGS_num_gpus, initFn);
82
+
83
+ CUDA_VERIFY(cudaProfilerStart());
84
+ faiss::gpu::synchronizeAllDevices();
85
+
86
+ float gpuTime = 0.0f;
87
+
88
+ faiss::ClusteringParameters cp;
89
+ cp.niter = FLAGS_niter;
90
+ cp.verbose = FLAGS_verbose;
91
+
92
+ if (FLAGS_max_points > 0) {
93
+ cp.max_points_per_centroid = FLAGS_max_points;
94
+ }
95
+
96
+ faiss::Clustering kmeans(FLAGS_dim, FLAGS_k, cp);
97
+
98
+ // Time k-means
99
+ {
100
+ CpuTimer timer;
101
+
102
+ kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
103
+
104
+ // There is a device -> host copy above, so no need to time
105
+ // additional synchronization with the GPU
106
+ gpuTime = timer.elapsedMilliseconds();
107
+ }
108
+
109
+ CUDA_VERIFY(cudaProfilerStop());
110
+ printf("k-means time %.3f ms\n", gpuTime);
111
+
112
+ CUDA_VERIFY(cudaDeviceSynchronize());
113
+
114
+ return 0;
115
+ }
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+
10
+ #include <cuda_profiler_api.h>
11
+ #include <faiss/IndexFlat.h>
12
+ #include <faiss/IndexIVFPQ.h>
13
+ #include <faiss/gpu/GpuIndexIVFPQ.h>
14
+ #include <faiss/gpu/StandardGpuResources.h>
15
+ #include <faiss/gpu/test/TestUtils.h>
16
+ #include <faiss/gpu/utils/DeviceUtils.h>
17
+ #include <faiss/gpu/utils/Timer.h>
18
+ #include <gflags/gflags.h>
19
+ #include <map>
20
+ #include <vector>
21
+
22
+ DEFINE_int32(batches, 10, "number of batches of vectors to add");
23
+ DEFINE_int32(batch_size, 10000, "number of vectors in each batch");
24
+ DEFINE_int32(dim, 256, "dimension of vectors");
25
+ DEFINE_int32(centroids, 4096, "num coarse centroids to use");
26
+ DEFINE_int32(bytes_per_vec, 32, "bytes per encoded vector");
27
+ DEFINE_int32(bits_per_code, 8, "bits per PQ code");
28
+ DEFINE_int32(index, 2, "0 = no indices on GPU; 1 = 32 bit, 2 = 64 bit on GPU");
29
+ DEFINE_bool(time_gpu, true, "time add to GPU");
30
+ DEFINE_bool(time_cpu, false, "time add to CPU");
31
+ DEFINE_bool(per_batch_time, false, "print per-batch times");
32
+ DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
33
+
34
+ int main(int argc, char** argv) {
35
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
36
+
37
+ cudaProfilerStop();
38
+
39
+ int dim = FLAGS_dim;
40
+ int numCentroids = FLAGS_centroids;
41
+ int bytesPerVec = FLAGS_bytes_per_vec;
42
+ int bitsPerCode = FLAGS_bits_per_code;
43
+
44
+ faiss::gpu::StandardGpuResources res;
45
+
46
+ // IndexIVFPQ will complain, but just give us enough to get through this
47
+ int numTrain = 4 * numCentroids;
48
+ std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
49
+
50
+ faiss::IndexFlatL2 coarseQuantizer(dim);
51
+ faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, dim, numCentroids,
52
+ bytesPerVec, bitsPerCode);
53
+ if (FLAGS_time_cpu) {
54
+ cpuIndex.train(numTrain, trainVecs.data());
55
+ }
56
+
57
+ faiss::gpu::GpuIndexIVFPQConfig config;
58
+ config.device = 0;
59
+ config.indicesOptions = (faiss::gpu::IndicesOptions) FLAGS_index;
60
+
61
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(
62
+ &res, dim, numCentroids, bytesPerVec, bitsPerCode,
63
+ faiss::METRIC_L2, config);
64
+
65
+ if (FLAGS_time_gpu) {
66
+ gpuIndex.train(numTrain, trainVecs.data());
67
+ if (FLAGS_reserve_memory) {
68
+ size_t numVecs = (size_t) FLAGS_batches * (size_t) FLAGS_batch_size;
69
+ gpuIndex.reserveMemory(numVecs);
70
+ }
71
+ }
72
+
73
+ cudaDeviceSynchronize();
74
+ CUDA_VERIFY(cudaProfilerStart());
75
+
76
+ float totalGpuTime = 0.0f;
77
+ float totalCpuTime = 0.0f;
78
+
79
+ for (int i = 0; i < FLAGS_batches; ++i) {
80
+ if (!FLAGS_per_batch_time) {
81
+ if (i % 10 == 0) {
82
+ printf("Adding batch %d\n", i + 1);
83
+ }
84
+ }
85
+
86
+ auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
87
+
88
+ if (FLAGS_time_gpu) {
89
+ faiss::gpu::CpuTimer timer;
90
+ gpuIndex.add(FLAGS_batch_size, addVecs.data());
91
+ CUDA_VERIFY(cudaDeviceSynchronize());
92
+ auto time = timer.elapsedMilliseconds();
93
+
94
+ totalGpuTime += time;
95
+
96
+ if (FLAGS_per_batch_time) {
97
+ printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
98
+ i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
99
+ }
100
+ }
101
+
102
+ if (FLAGS_time_cpu) {
103
+ faiss::gpu::CpuTimer timer;
104
+ cpuIndex.add(FLAGS_batch_size, addVecs.data());
105
+ auto time = timer.elapsedMilliseconds();
106
+
107
+ totalCpuTime += time;
108
+
109
+ if (FLAGS_per_batch_time) {
110
+ printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
111
+ i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
112
+ }
113
+ }
114
+ }
115
+
116
+ CUDA_VERIFY(cudaProfilerStop());
117
+
118
+ int total = FLAGS_batch_size * FLAGS_batches;
119
+
120
+ if (FLAGS_time_gpu) {
121
+ printf("%d dim, %d centroids, %d x %d encoding\n"
122
+ "GPU time to add %d vectors (%d batches, %d per batch): "
123
+ "%.3f ms (%.3f us per)\n",
124
+ dim, numCentroids, bytesPerVec, bitsPerCode,
125
+ total, FLAGS_batches, FLAGS_batch_size,
126
+ totalGpuTime, totalGpuTime * 1000.0f / (float) total);
127
+ }
128
+
129
+ if (FLAGS_time_cpu) {
130
+ printf("%d dim, %d centroids, %d x %d encoding\n"
131
+ "CPU time to add %d vectors (%d batches, %d per batch): "
132
+ "%.3f ms (%.3f us per)\n",
133
+ dim, numCentroids, bytesPerVec, bitsPerCode,
134
+ total, FLAGS_batches, FLAGS_batch_size,
135
+ totalCpuTime, totalCpuTime * 1000.0f / (float) total);
136
+ }
137
+
138
+ return 0;
139
+ }