faiss 0.1.5 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +12 -0
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +6 -2
  6. data/ext/faiss/index.cpp +114 -43
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss.rb +0 -5
  15. data/lib/faiss/version.rb +1 -1
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +24 -10
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -5,124 +5,122 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
- #include <faiss/gpu/GpuIndexIVF.h>
12
10
  #include <faiss/IndexScalarQuantizer.h>
11
+ #include <faiss/gpu/GpuIndexIVF.h>
13
12
  #include <memory>
14
13
 
15
- namespace faiss { namespace gpu {
14
+ namespace faiss {
15
+ namespace gpu {
16
16
 
17
17
  class IVFFlat;
18
18
  class GpuIndexFlat;
19
19
 
20
20
  struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
21
- inline GpuIndexIVFScalarQuantizerConfig()
22
- : interleavedLayout(true) {
23
- }
21
+ inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
24
22
 
25
- /// Use the alternative memory layout for the IVF lists
26
- /// (currently the default)
27
- bool interleavedLayout;
23
+ /// Use the alternative memory layout for the IVF lists
24
+ /// (currently the default)
25
+ bool interleavedLayout;
28
26
  };
29
27
 
30
28
  /// Wrapper around the GPU implementation that looks like
31
29
  /// faiss::IndexIVFScalarQuantizer
32
30
  class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
33
- public:
34
- /// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance,
35
- /// copying data over to the given GPU, if the input index is trained.
36
- GpuIndexIVFScalarQuantizer(
37
- GpuResourcesProvider* provider,
38
- const faiss::IndexIVFScalarQuantizer* index,
39
- GpuIndexIVFScalarQuantizerConfig config =
40
- GpuIndexIVFScalarQuantizerConfig());
41
-
42
- /// Constructs a new instance with an empty flat quantizer; the user
43
- /// provides the number of lists desired.
44
- GpuIndexIVFScalarQuantizer(
45
- GpuResourcesProvider* provider,
46
- int dims,
47
- int nlist,
48
- faiss::ScalarQuantizer::QuantizerType qtype,
49
- faiss::MetricType metric = MetricType::METRIC_L2,
50
- bool encodeResidual = true,
51
- GpuIndexIVFScalarQuantizerConfig config =
52
- GpuIndexIVFScalarQuantizerConfig());
53
-
54
- ~GpuIndexIVFScalarQuantizer() override;
55
-
56
- /// Reserve GPU memory in our inverted lists for this number of vectors
57
- void reserveMemory(size_t numVecs);
58
-
59
- /// Initialize ourselves from the given CPU index; will overwrite
60
- /// all data in ourselves
61
- void copyFrom(const faiss::IndexIVFScalarQuantizer* index);
62
-
63
- /// Copy ourselves to the given CPU index; will overwrite all data
64
- /// in the index instance
65
- void copyTo(faiss::IndexIVFScalarQuantizer* index) const;
66
-
67
- /// After adding vectors, one can call this to reclaim device memory
68
- /// to exactly the amount needed. Returns space reclaimed in bytes
69
- size_t reclaimMemory();
70
-
71
- /// Clears out all inverted lists, but retains the coarse and scalar quantizer
72
- /// information
73
- void reset() override;
74
-
75
- /// Trains the coarse and scalar quantizer based on the given vector data
76
- void train(Index::idx_t n, const float* x) override;
77
-
78
- /// Returns the number of vectors present in a particular inverted list
79
- int getListLength(int listId) const override;
80
-
81
- /// Return the encoded vector data contained in a particular inverted list,
82
- /// for debugging purposes.
83
- /// If gpuFormat is true, the data is returned as it is encoded in the
84
- /// GPU-side representation.
85
- /// Otherwise, it is converted to the CPU format.
86
- /// compliant format, while the native GPU format may differ.
87
- std::vector<uint8_t>
88
- getListVectorData(int listId, bool gpuFormat = false) const override;
89
-
90
- /// Return the vector indices contained in a particular inverted list, for
91
- /// debugging purposes.
92
- std::vector<Index::idx_t> getListIndices(int listId) const override;
93
-
94
- protected:
95
- /// Called from GpuIndex for add/add_with_ids
96
- void addImpl_(int n,
97
- const float* x,
98
- const Index::idx_t* ids) override;
99
-
100
- /// Called from GpuIndex for search
101
- void searchImpl_(int n,
102
- const float* x,
103
- int k,
104
- float* distances,
105
- Index::idx_t* labels) const override;
106
-
107
- /// Called from train to handle SQ residual training
108
- void trainResiduals_(Index::idx_t n, const float* x);
109
-
110
- public:
111
- /// Exposed like the CPU version
112
- faiss::ScalarQuantizer sq;
113
-
114
- /// Exposed like the CPU version
115
- bool by_residual;
116
-
117
- protected:
118
- /// Our configuration options
119
- const GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
120
-
121
- /// Desired inverted list memory reservation
122
- size_t reserveMemoryVecs_;
123
-
124
- /// Instance that we own; contains the inverted list
125
- std::unique_ptr<IVFFlat> index_;
31
+ public:
32
+ /// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance,
33
+ /// copying data over to the given GPU, if the input index is trained.
34
+ GpuIndexIVFScalarQuantizer(
35
+ GpuResourcesProvider* provider,
36
+ const faiss::IndexIVFScalarQuantizer* index,
37
+ GpuIndexIVFScalarQuantizerConfig config =
38
+ GpuIndexIVFScalarQuantizerConfig());
39
+
40
+ /// Constructs a new instance with an empty flat quantizer; the user
41
+ /// provides the number of lists desired.
42
+ GpuIndexIVFScalarQuantizer(
43
+ GpuResourcesProvider* provider,
44
+ int dims,
45
+ int nlist,
46
+ faiss::ScalarQuantizer::QuantizerType qtype,
47
+ faiss::MetricType metric = MetricType::METRIC_L2,
48
+ bool encodeResidual = true,
49
+ GpuIndexIVFScalarQuantizerConfig config =
50
+ GpuIndexIVFScalarQuantizerConfig());
51
+
52
+ ~GpuIndexIVFScalarQuantizer() override;
53
+
54
+ /// Reserve GPU memory in our inverted lists for this number of vectors
55
+ void reserveMemory(size_t numVecs);
56
+
57
+ /// Initialize ourselves from the given CPU index; will overwrite
58
+ /// all data in ourselves
59
+ void copyFrom(const faiss::IndexIVFScalarQuantizer* index);
60
+
61
+ /// Copy ourselves to the given CPU index; will overwrite all data
62
+ /// in the index instance
63
+ void copyTo(faiss::IndexIVFScalarQuantizer* index) const;
64
+
65
+ /// After adding vectors, one can call this to reclaim device memory
66
+ /// to exactly the amount needed. Returns space reclaimed in bytes
67
+ size_t reclaimMemory();
68
+
69
+ /// Clears out all inverted lists, but retains the coarse and scalar
70
+ /// quantizer information
71
+ void reset() override;
72
+
73
+ /// Trains the coarse and scalar quantizer based on the given vector data
74
+ void train(Index::idx_t n, const float* x) override;
75
+
76
+ /// Returns the number of vectors present in a particular inverted list
77
+ int getListLength(int listId) const override;
78
+
79
+ /// Return the encoded vector data contained in a particular inverted list,
80
+ /// for debugging purposes.
81
+ /// If gpuFormat is true, the data is returned as it is encoded in the
82
+ /// GPU-side representation.
83
+ /// Otherwise, it is converted to the CPU format.
84
+ /// compliant format, while the native GPU format may differ.
85
+ std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
86
+ const override;
87
+
88
+ /// Return the vector indices contained in a particular inverted list, for
89
+ /// debugging purposes.
90
+ std::vector<Index::idx_t> getListIndices(int listId) const override;
91
+
92
+ protected:
93
+ /// Called from GpuIndex for add/add_with_ids
94
+ void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
95
+
96
+ /// Called from GpuIndex for search
97
+ void searchImpl_(
98
+ int n,
99
+ const float* x,
100
+ int k,
101
+ float* distances,
102
+ Index::idx_t* labels) const override;
103
+
104
+ /// Called from train to handle SQ residual training
105
+ void trainResiduals_(Index::idx_t n, const float* x);
106
+
107
+ public:
108
+ /// Exposed like the CPU version
109
+ faiss::ScalarQuantizer sq;
110
+
111
+ /// Exposed like the CPU version
112
+ bool by_residual;
113
+
114
+ protected:
115
+ /// Our configuration options
116
+ const GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
117
+
118
+ /// Desired inverted list memory reservation
119
+ size_t reserveMemoryVecs_;
120
+
121
+ /// Instance that we own; contains the inverted list
122
+ std::unique_ptr<IVFFlat> index_;
126
123
  };
127
124
 
128
- } } // namespace
125
+ } // namespace gpu
126
+ } // namespace faiss
@@ -5,26 +5,27 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #pragma once
10
9
 
11
- namespace faiss { namespace gpu {
10
+ namespace faiss {
11
+ namespace gpu {
12
12
 
13
13
  /// How user vector index data is stored on the GPU
14
14
  enum IndicesOptions {
15
- /// The user indices are only stored on the CPU; the GPU returns
16
- /// (inverted list, offset) to the CPU which is then translated to
17
- /// the real user index.
18
- INDICES_CPU = 0,
19
- /// The indices are not stored at all, on either the CPU or
20
- /// GPU. Only (inverted list, offset) is returned to the user as the
21
- /// index.
22
- INDICES_IVF = 1,
23
- /// Indices are stored as 32 bit integers on the GPU, but returned
24
- /// as 64 bit integers
25
- INDICES_32_BIT = 2,
26
- /// Indices are stored as 64 bit integers on the GPU
27
- INDICES_64_BIT = 3,
15
+ /// The user indices are only stored on the CPU; the GPU returns
16
+ /// (inverted list, offset) to the CPU which is then translated to
17
+ /// the real user index.
18
+ INDICES_CPU = 0,
19
+ /// The indices are not stored at all, on either the CPU or
20
+ /// GPU. Only (inverted list, offset) is returned to the user as the
21
+ /// index.
22
+ INDICES_IVF = 1,
23
+ /// Indices are stored as 32 bit integers on the GPU, but returned
24
+ /// as 64 bit integers
25
+ INDICES_32_BIT = 2,
26
+ /// Indices are stored as 64 bit integers on the GPU
27
+ INDICES_64_BIT = 3,
28
28
  };
29
29
 
30
- } } // namespace
30
+ } // namespace gpu
31
+ } // namespace faiss
@@ -5,76 +5,72 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #include <faiss/gpu/GpuResources.h>
10
9
  #include <faiss/gpu/utils/DeviceUtils.h>
11
10
  #include <sstream>
12
11
 
13
- namespace faiss { namespace gpu {
12
+ namespace faiss {
13
+ namespace gpu {
14
14
 
15
15
  std::string allocTypeToString(AllocType t) {
16
- switch (t) {
17
- case AllocType::Other:
18
- return "Other";
19
- case AllocType::FlatData:
20
- return "FlatData";
21
- case AllocType::IVFLists:
22
- return "IVFLists";
23
- case AllocType::Quantizer:
24
- return "Quantizer";
25
- case AllocType::QuantizerPrecomputedCodes:
26
- return "QuantizerPrecomputedCodes";
27
- case AllocType::TemporaryMemoryBuffer:
28
- return "TemporaryMemoryBuffer";
29
- case AllocType::TemporaryMemoryOverflow:
30
- return "TemporaryMemoryOverflow";
31
- default:
32
- return "Unknown";
33
- }
16
+ switch (t) {
17
+ case AllocType::Other:
18
+ return "Other";
19
+ case AllocType::FlatData:
20
+ return "FlatData";
21
+ case AllocType::IVFLists:
22
+ return "IVFLists";
23
+ case AllocType::Quantizer:
24
+ return "Quantizer";
25
+ case AllocType::QuantizerPrecomputedCodes:
26
+ return "QuantizerPrecomputedCodes";
27
+ case AllocType::TemporaryMemoryBuffer:
28
+ return "TemporaryMemoryBuffer";
29
+ case AllocType::TemporaryMemoryOverflow:
30
+ return "TemporaryMemoryOverflow";
31
+ default:
32
+ return "Unknown";
33
+ }
34
34
  }
35
35
 
36
36
  std::string memorySpaceToString(MemorySpace s) {
37
- switch (s) {
38
- case MemorySpace::Temporary:
39
- return "Temporary";
40
- case MemorySpace::Device:
41
- return "Device";
42
- case MemorySpace::Unified:
43
- return "Unified";
44
- default:
45
- return "Unknown";
46
- }
37
+ switch (s) {
38
+ case MemorySpace::Temporary:
39
+ return "Temporary";
40
+ case MemorySpace::Device:
41
+ return "Device";
42
+ case MemorySpace::Unified:
43
+ return "Unified";
44
+ default:
45
+ return "Unknown";
46
+ }
47
47
  }
48
48
 
49
- std::string
50
- AllocInfo::toString() const {
51
- std::stringstream ss;
52
- ss << "type " << allocTypeToString(type)
53
- << " dev " << device
54
- << " space " << memorySpaceToString(space)
55
- << " stream " << (void*) stream;
49
+ std::string AllocInfo::toString() const {
50
+ std::stringstream ss;
51
+ ss << "type " << allocTypeToString(type) << " dev " << device << " space "
52
+ << memorySpaceToString(space) << " stream " << (void*)stream;
56
53
 
57
- return ss.str();
54
+ return ss.str();
58
55
  }
59
56
 
60
- std::string
61
- AllocRequest::toString() const {
62
- std::stringstream ss;
63
- ss << AllocInfo::toString() << " size " << size << " bytes";
57
+ std::string AllocRequest::toString() const {
58
+ std::stringstream ss;
59
+ ss << AllocInfo::toString() << " size " << size << " bytes";
64
60
 
65
- return ss.str();
61
+ return ss.str();
66
62
  }
67
63
 
68
64
  AllocInfo makeDevAlloc(AllocType at, cudaStream_t st) {
69
- return AllocInfo(at, getCurrentDevice(), MemorySpace::Device, st);
65
+ return AllocInfo(at, getCurrentDevice(), MemorySpace::Device, st);
70
66
  }
71
67
 
72
68
  AllocInfo makeTempAlloc(AllocType at, cudaStream_t st) {
73
- return AllocInfo(at, getCurrentDevice(), MemorySpace::Temporary, st);
69
+ return AllocInfo(at, getCurrentDevice(), MemorySpace::Temporary, st);
74
70
  }
75
71
 
76
72
  AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st) {
77
- return AllocInfo(at, getCurrentDevice(), sp, st);
73
+ return AllocInfo(at, getCurrentDevice(), sp, st);
78
74
  }
79
75
 
80
76
  //
@@ -82,119 +78,111 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st) {
82
78
  //
83
79
 
84
80
  GpuMemoryReservation::GpuMemoryReservation()
85
- : res(nullptr),
86
- device(0),
87
- stream(nullptr),
88
- data(nullptr),
89
- size(0) {
90
- }
81
+ : res(nullptr), device(0), stream(nullptr), data(nullptr), size(0) {}
91
82
 
92
- GpuMemoryReservation::GpuMemoryReservation(GpuResources* r,
93
- int dev,
94
- cudaStream_t str,
95
- void* p,
96
- size_t sz)
97
- : res(r),
98
- device(dev),
99
- stream(str),
100
- data(p),
101
- size(sz) {
102
- }
83
+ GpuMemoryReservation::GpuMemoryReservation(
84
+ GpuResources* r,
85
+ int dev,
86
+ cudaStream_t str,
87
+ void* p,
88
+ size_t sz)
89
+ : res(r), device(dev), stream(str), data(p), size(sz) {}
103
90
 
104
91
  GpuMemoryReservation::GpuMemoryReservation(GpuMemoryReservation&& m) noexcept {
105
- res = m.res; m.res = nullptr;
106
- device = m.device; m.device = 0;
107
- stream = m.stream; m.stream = nullptr;
108
- data = m.data; m.data = nullptr;
109
- size = m.size; m.size = 0;
110
- }
111
-
112
- GpuMemoryReservation&
113
- GpuMemoryReservation::operator=(GpuMemoryReservation&& m) {
114
- // Can't be both a valid allocation and the same allocation
115
- FAISS_ASSERT(!(res && res == m.res && device == m.device && data == m.data));
116
-
117
- release();
118
- res = m.res; m.res = nullptr;
119
- device = m.device; m.device = 0;
120
- stream = m.stream; m.stream = nullptr;
121
- data = m.data; m.data = nullptr;
122
- size = m.size; m.size = 0;
123
-
124
- return *this;
125
- }
126
-
127
- void
128
- GpuMemoryReservation::release() {
129
- if (res) {
130
- res->deallocMemory(device, data);
131
- res = nullptr;
132
- device = 0;
133
- stream = nullptr;
134
- data = nullptr;
135
- size = 0;
136
- }
92
+ res = m.res;
93
+ m.res = nullptr;
94
+ device = m.device;
95
+ m.device = 0;
96
+ stream = m.stream;
97
+ m.stream = nullptr;
98
+ data = m.data;
99
+ m.data = nullptr;
100
+ size = m.size;
101
+ m.size = 0;
102
+ }
103
+
104
+ GpuMemoryReservation& GpuMemoryReservation::operator=(
105
+ GpuMemoryReservation&& m) {
106
+ // Can't be both a valid allocation and the same allocation
107
+ FAISS_ASSERT(
108
+ !(res && res == m.res && device == m.device && data == m.data));
109
+
110
+ release();
111
+ res = m.res;
112
+ m.res = nullptr;
113
+ device = m.device;
114
+ m.device = 0;
115
+ stream = m.stream;
116
+ m.stream = nullptr;
117
+ data = m.data;
118
+ m.data = nullptr;
119
+ size = m.size;
120
+ m.size = 0;
121
+
122
+ return *this;
123
+ }
124
+
125
+ void GpuMemoryReservation::release() {
126
+ if (res) {
127
+ res->deallocMemory(device, data);
128
+ res = nullptr;
129
+ device = 0;
130
+ stream = nullptr;
131
+ data = nullptr;
132
+ size = 0;
133
+ }
137
134
  }
138
135
 
139
136
  GpuMemoryReservation::~GpuMemoryReservation() {
140
- if (res) {
141
- res->deallocMemory(device, data);
142
- }
137
+ if (res) {
138
+ res->deallocMemory(device, data);
139
+ }
143
140
  }
144
141
 
145
142
  //
146
143
  // GpuResources
147
144
  //
148
145
 
149
- GpuResources::~GpuResources() {
150
- }
146
+ GpuResources::~GpuResources() {}
151
147
 
152
- cublasHandle_t
153
- GpuResources::getBlasHandleCurrentDevice() {
154
- return getBlasHandle(getCurrentDevice());
148
+ cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
149
+ return getBlasHandle(getCurrentDevice());
155
150
  }
156
151
 
157
- cudaStream_t
158
- GpuResources::getDefaultStreamCurrentDevice() {
159
- return getDefaultStream(getCurrentDevice());
152
+ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
153
+ return getDefaultStream(getCurrentDevice());
160
154
  }
161
155
 
162
- std::vector<cudaStream_t>
163
- GpuResources::getAlternateStreamsCurrentDevice() {
164
- return getAlternateStreams(getCurrentDevice());
156
+ std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
157
+ return getAlternateStreams(getCurrentDevice());
165
158
  }
166
159
 
167
- cudaStream_t
168
- GpuResources::getAsyncCopyStreamCurrentDevice() {
169
- return getAsyncCopyStream(getCurrentDevice());
160
+ cudaStream_t GpuResources::getAsyncCopyStreamCurrentDevice() {
161
+ return getAsyncCopyStream(getCurrentDevice());
170
162
  }
171
163
 
172
- void
173
- GpuResources::syncDefaultStream(int device) {
174
- CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
164
+ void GpuResources::syncDefaultStream(int device) {
165
+ CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
175
166
  }
176
167
 
177
- void
178
- GpuResources::syncDefaultStreamCurrentDevice() {
179
- syncDefaultStream(getCurrentDevice());
168
+ void GpuResources::syncDefaultStreamCurrentDevice() {
169
+ syncDefaultStream(getCurrentDevice());
180
170
  }
181
171
 
182
- GpuMemoryReservation
183
- GpuResources::allocMemoryHandle(const AllocRequest& req) {
184
- return GpuMemoryReservation(
185
- this, req.device, req.stream, allocMemory(req), req.size);
172
+ GpuMemoryReservation GpuResources::allocMemoryHandle(const AllocRequest& req) {
173
+ return GpuMemoryReservation(
174
+ this, req.device, req.stream, allocMemory(req), req.size);
186
175
  }
187
176
 
188
- size_t
189
- GpuResources::getTempMemoryAvailableCurrentDevice() const {
190
- return getTempMemoryAvailable(getCurrentDevice());
177
+ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
178
+ return getTempMemoryAvailable(getCurrentDevice());
191
179
  }
192
180
 
193
181
  //
194
182
  // GpuResourcesProvider
195
183
  //
196
184
 
197
- GpuResourcesProvider::~GpuResourcesProvider() {
198
- }
185
+ GpuResourcesProvider::~GpuResourcesProvider() {}
199
186
 
200
- } } // namespace
187
+ } // namespace gpu
188
+ } // namespace faiss