faiss 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (199) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +16 -4
  5. data/ext/faiss/ext.cpp +12 -308
  6. data/ext/faiss/extconf.rb +6 -3
  7. data/ext/faiss/index.cpp +189 -0
  8. data/ext/faiss/index_binary.cpp +75 -0
  9. data/ext/faiss/kmeans.cpp +40 -0
  10. data/ext/faiss/numo.hpp +867 -0
  11. data/ext/faiss/pca_matrix.cpp +33 -0
  12. data/ext/faiss/product_quantizer.cpp +53 -0
  13. data/ext/faiss/utils.cpp +13 -0
  14. data/ext/faiss/utils.h +5 -0
  15. data/lib/faiss.rb +0 -5
  16. data/lib/faiss/version.rb +1 -1
  17. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  18. data/vendor/faiss/faiss/AutoTune.h +6 -3
  19. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  20. data/vendor/faiss/faiss/Index.cpp +3 -4
  21. data/vendor/faiss/faiss/Index.h +3 -3
  22. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  23. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  26. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  27. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  29. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  30. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  31. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  32. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  33. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  34. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  37. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  38. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  39. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  41. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  42. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  43. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  44. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  45. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  46. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  47. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  48. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  49. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  50. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  51. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  52. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  53. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  54. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  55. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  56. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  57. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  58. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  59. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  60. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  61. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  62. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  63. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  64. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  66. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  67. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  68. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  69. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  70. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  71. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  72. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  73. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  74. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  75. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  76. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  77. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  78. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  79. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  80. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  81. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  82. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  83. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  84. data/vendor/faiss/faiss/impl/io.h +7 -2
  85. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  86. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  87. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  88. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  89. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  90. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  91. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  92. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  93. data/vendor/faiss/faiss/index_io.h +1 -48
  94. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  95. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  96. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  97. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  98. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  99. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  100. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  101. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  102. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  103. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  104. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  105. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  106. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  107. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  108. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  109. data/vendor/faiss/faiss/utils/distances.h +28 -20
  110. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  111. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  112. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  113. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  114. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  115. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  116. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  117. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  118. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  119. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  120. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  121. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  122. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  123. metadata +54 -149
  124. data/lib/faiss/index.rb +0 -20
  125. data/lib/faiss/index_binary.rb +0 -20
  126. data/lib/faiss/kmeans.rb +0 -15
  127. data/lib/faiss/pca_matrix.rb +0 -15
  128. data/lib/faiss/product_quantizer.rb +0 -22
  129. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  130. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  131. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  132. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  133. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  134. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  135. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  136. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  137. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  138. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  139. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  140. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  141. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  142. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  143. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  144. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  145. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  146. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  147. data/vendor/faiss/c_api/Index_c.h +0 -183
  148. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  149. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  150. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  151. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  152. data/vendor/faiss/c_api/error_c.h +0 -42
  153. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  154. data/vendor/faiss/c_api/error_impl.h +0 -16
  155. data/vendor/faiss/c_api/faiss_c.h +0 -58
  156. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  157. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  158. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  159. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  160. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  161. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  162. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  163. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  164. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  165. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  166. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  167. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  168. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  169. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  170. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  171. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  172. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  173. data/vendor/faiss/c_api/index_io_c.h +0 -50
  174. data/vendor/faiss/c_api/macros_impl.h +0 -110
  175. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  176. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  177. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  178. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  179. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  180. data/vendor/faiss/misc/test_blas.cpp +0 -87
  181. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  182. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  183. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  184. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  185. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  186. data/vendor/faiss/tests/test_merge.cpp +0 -260
  187. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  188. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  189. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  190. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  191. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  192. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  193. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  194. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  195. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  196. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  197. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  198. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  199. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -192,7 +192,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
192
192
  size_t coarse_size = include_listnos ? coarse_code_size () : 0;
193
193
  memset(codes, 0, (code_size + coarse_size) * n);
194
194
 
195
- #pragma omp parallel if(n > 1)
195
+ #pragma omp parallel if(n > 1000)
196
196
  {
197
197
  std::vector<float> residual (d);
198
198
 
@@ -222,7 +222,7 @@ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
222
222
  std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
223
223
  size_t coarse_size = coarse_code_size ();
224
224
 
225
- #pragma omp parallel if(n > 1)
225
+ #pragma omp parallel if(n > 1000)
226
226
  {
227
227
  std::vector<float> residual (d);
228
228
 
@@ -82,7 +82,7 @@ struct IndexScalarQuantizer: Index {
82
82
 
83
83
 
84
84
  /** An IVF implementation where the components of the residuals are
85
- * encoded with a scalar uniform quantizer. All distance computations
85
+ * encoded with a scalar quantizer. All distance computations
86
86
  * are asymmetric, so the encoded vectors are decoded and approximate
87
87
  * distances are computed.
88
88
  */
@@ -148,6 +148,6 @@ void bruteForceKnn(GpuResourcesProvider* resources,
148
148
  float* outDistances,
149
149
  // A region of memory size numQueries x k, with k
150
150
  // innermost (row major)
151
- faiss::Index::idx_t* outIndices);
151
+ Index::idx_t* outIndices);
152
152
 
153
153
  } } // namespace
@@ -36,9 +36,12 @@ class GpuIndex : public faiss::Index {
36
36
  float metricArg,
37
37
  GpuIndexConfig config);
38
38
 
39
- inline int getDevice() const {
40
- return device_;
41
- }
39
+ /// Returns the device that this index is resident on
40
+ int getDevice() const;
41
+
42
+ /// Returns a reference to our GpuResources object that manages memory, stream
43
+ /// and handle resources on the GPU
44
+ std::shared_ptr<GpuResources> getResources();
42
45
 
43
46
  /// Set the minimum data size for searches (in MiB) for which we use
44
47
  /// CPU -> GPU paging
@@ -50,7 +53,7 @@ class GpuIndex : public faiss::Index {
50
53
  /// `x` can be resident on the CPU or any GPU; copies are performed
51
54
  /// as needed
52
55
  /// Handles paged adds if the add set is too large; calls addInternal_
53
- void add(faiss::Index::idx_t, const float* x) override;
56
+ void add(Index::idx_t, const float* x) override;
54
57
 
55
58
  /// `x` and `ids` can be resident on the CPU or any GPU; copies are
56
59
  /// performed as needed
@@ -59,6 +62,13 @@ class GpuIndex : public faiss::Index {
59
62
  const float* x,
60
63
  const Index::idx_t* ids) override;
61
64
 
65
+ /// `x` and `labels` can be resident on the CPU or any GPU; copies are
66
+ /// performed as needed
67
+ void assign(Index::idx_t n,
68
+ const float* x,
69
+ Index::idx_t* labels,
70
+ Index::idx_t k = 1) const override;
71
+
62
72
  /// `x`, `distances` and `labels` can be resident on the CPU or any
63
73
  /// GPU; copies are performed as needed
64
74
  void search(Index::idx_t n,
@@ -136,11 +146,8 @@ private:
136
146
  /// Manages streams, cuBLAS handles and scratch memory for devices
137
147
  std::shared_ptr<GpuResources> resources_;
138
148
 
139
- /// The GPU device we are resident on
140
- const int device_;
141
-
142
- /// The memory space of our primary storage on the GPU
143
- const MemorySpace memorySpace_;
149
+ /// Our configuration options
150
+ const GpuIndexConfig config_;
144
151
 
145
152
  /// Size above which we page copies from the CPU to GPU
146
153
  size_t minPagedSize_;
@@ -38,6 +38,13 @@ class GpuIndexBinaryFlat : public IndexBinary {
38
38
 
39
39
  ~GpuIndexBinaryFlat() override;
40
40
 
41
+ /// Returns the device that this index is resident on
42
+ int getDevice() const;
43
+
44
+ /// Returns a reference to our GpuResources object that manages memory, stream
45
+ /// and handle resources on the GPU
46
+ std::shared_ptr<GpuResources> getResources();
47
+
41
48
  /// Initialize ourselves from the given CPU index; will overwrite
42
49
  /// all data in ourselves
43
50
  void copyFrom(const faiss::IndexBinaryFlat* index);
@@ -80,7 +87,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
80
87
  std::shared_ptr<GpuResources> resources_;
81
88
 
82
89
  /// Configuration options
83
- GpuIndexBinaryFlatConfig config_;
90
+ const GpuIndexBinaryFlatConfig binaryFlatConfig_;
84
91
 
85
92
  /// Holds our GPU data containing the list of vectors
86
93
  std::unique_ptr<BinaryFlatIndex> data_;
@@ -21,7 +21,7 @@ struct IndexFlatIP;
21
21
 
22
22
  namespace faiss { namespace gpu {
23
23
 
24
- struct FlatIndex;
24
+ class FlatIndex;
25
25
 
26
26
  struct GpuIndexFlatConfig : public GpuIndexConfig {
27
27
  inline GpuIndexFlatConfig()
@@ -87,27 +87,27 @@ class GpuIndexFlat : public GpuIndex {
87
87
  void train(Index::idx_t n, const float* x) override;
88
88
 
89
89
  /// Overrides to avoid excessive copies
90
- void add(faiss::Index::idx_t, const float* x) override;
90
+ void add(Index::idx_t, const float* x) override;
91
91
 
92
92
  /// Reconstruction methods; prefer the batch reconstruct as it will
93
93
  /// be more efficient
94
- void reconstruct(faiss::Index::idx_t key, float* out) const override;
94
+ void reconstruct(Index::idx_t key, float* out) const override;
95
95
 
96
96
  /// Batch reconstruction method
97
- void reconstruct_n(faiss::Index::idx_t i0,
98
- faiss::Index::idx_t num,
97
+ void reconstruct_n(Index::idx_t i0,
98
+ Index::idx_t num,
99
99
  float* out) const override;
100
100
 
101
101
  /// Compute residual
102
102
  void compute_residual(const float* x,
103
103
  float* residual,
104
- faiss::Index::idx_t key) const override;
104
+ Index::idx_t key) const override;
105
105
 
106
106
  /// Compute residual (batch mode)
107
- void compute_residual_n(faiss::Index::idx_t n,
107
+ void compute_residual_n(Index::idx_t n,
108
108
  const float* xs,
109
109
  float* residuals,
110
- const faiss::Index::idx_t* keys) const override;
110
+ const Index::idx_t* keys) const override;
111
111
 
112
112
  /// For internal access
113
113
  inline FlatIndex* getGpuData() { return data_.get(); }
@@ -126,11 +126,11 @@ class GpuIndexFlat : public GpuIndex {
126
126
  const float* x,
127
127
  int k,
128
128
  float* distances,
129
- faiss::Index::idx_t* labels) const override;
129
+ Index::idx_t* labels) const override;
130
130
 
131
131
  protected:
132
- /// Our config object
133
- const GpuIndexFlatConfig config_;
132
+ /// Our configuration options
133
+ const GpuIndexFlatConfig flatConfig_;
134
134
 
135
135
  /// Holds our GPU data containing the list of vectors
136
136
  std::unique_ptr<FlatIndex> data_;
@@ -56,6 +56,22 @@ class GpuIndexIVF : public GpuIndex {
56
56
  /// Returns the number of inverted lists we're managing
57
57
  int getNumLists() const;
58
58
 
59
+ /// Returns the number of vectors present in a particular inverted list
60
+ virtual int getListLength(int listId) const = 0;
61
+
62
+ /// Return the encoded vector data contained in a particular inverted list,
63
+ /// for debugging purposes.
64
+ /// If gpuFormat is true, the data is returned as it is encoded in the
65
+ /// GPU-side representation.
66
+ /// Otherwise, it is converted to the CPU format.
67
+ /// compliant format, while the native GPU format may differ.
68
+ virtual std::vector<uint8_t>
69
+ getListVectorData(int listId, bool gpuFormat = false) const = 0;
70
+
71
+ /// Return the vector indices contained in a particular inverted list, for
72
+ /// debugging purposes.
73
+ virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
74
+
59
75
  /// Return the quantizer we're using
60
76
  GpuIndexFlat* getQuantizer();
61
77
 
@@ -67,7 +83,7 @@ class GpuIndexIVF : public GpuIndex {
67
83
 
68
84
  protected:
69
85
  bool addImplRequiresIDs_() const override;
70
- void trainQuantizer_(faiss::Index::idx_t n, const float* x);
86
+ void trainQuantizer_(Index::idx_t n, const float* x);
71
87
 
72
88
  public:
73
89
  /// Exposing this like the CPU version for manipulation
@@ -83,7 +99,8 @@ class GpuIndexIVF : public GpuIndex {
83
99
  GpuIndexFlat* quantizer;
84
100
 
85
101
  protected:
86
- GpuIndexIVFConfig ivfConfig_;
102
+ /// Our configuration options
103
+ const GpuIndexIVFConfig ivfConfig_;
87
104
  };
88
105
 
89
106
  } } // namespace
@@ -19,6 +19,13 @@ class IVFFlat;
19
19
  class GpuIndexFlat;
20
20
 
21
21
  struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
22
+ inline GpuIndexIVFFlatConfig()
23
+ : interleavedLayout(true) {
24
+ }
25
+
26
+ /// Use the alternative memory layout for the IVF lists
27
+ /// (currently the default)
28
+ bool interleavedLayout;
22
29
  };
23
30
 
24
31
  /// Wrapper around the GPU implementation that looks like
@@ -56,10 +63,28 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
56
63
  /// to exactly the amount needed. Returns space reclaimed in bytes
57
64
  size_t reclaimMemory();
58
65
 
66
+ /// Clears out all inverted lists, but retains the coarse centroid information
59
67
  void reset() override;
60
68
 
69
+ /// Trains the coarse quantizer based on the given vector data
61
70
  void train(Index::idx_t n, const float* x) override;
62
71
 
72
+ /// Returns the number of vectors present in a particular inverted list
73
+ int getListLength(int listId) const override;
74
+
75
+ /// Return the encoded vector data contained in a particular inverted list,
76
+ /// for debugging purposes.
77
+ /// If gpuFormat is true, the data is returned as it is encoded in the
78
+ /// GPU-side representation.
79
+ /// Otherwise, it is converted to the CPU format.
80
+ /// compliant format, while the native GPU format may differ.
81
+ std::vector<uint8_t>
82
+ getListVectorData(int listId, bool gpuFormat = false) const override;
83
+
84
+ /// Return the vector indices contained in a particular inverted list, for
85
+ /// debugging purposes.
86
+ std::vector<Index::idx_t> getListIndices(int listId) const override;
87
+
63
88
  protected:
64
89
  /// Called from GpuIndex for add/add_with_ids
65
90
  void addImpl_(int n,
@@ -73,8 +98,9 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
73
98
  float* distances,
74
99
  Index::idx_t* labels) const override;
75
100
 
76
- private:
77
- GpuIndexIVFFlatConfig ivfFlatConfig_;
101
+ protected:
102
+ /// Our configuration options
103
+ const GpuIndexIVFFlatConfig ivfFlatConfig_;
78
104
 
79
105
  /// Desired inverted list memory reservation
80
106
  size_t reserveMemoryVecs_;
@@ -23,7 +23,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
23
23
  inline GpuIndexIVFPQConfig()
24
24
  : useFloat16LookupTables(false),
25
25
  usePrecomputedTables(false),
26
- alternativeLayout(false),
26
+ interleavedLayout(false),
27
27
  useMMCodeDistance(false) {
28
28
  }
29
29
 
@@ -38,7 +38,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
38
38
 
39
39
  /// Use the alternative memory layout for the IVF lists
40
40
  /// WARNING: this is a feature under development, do not use!
41
- bool alternativeLayout;
41
+ bool interleavedLayout;
42
42
 
43
43
  /// Use GEMM-backed computation of PQ code distances for the no precomputed
44
44
  /// table version of IVFPQ.
@@ -108,19 +108,24 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
108
108
  /// product centroid information
109
109
  void reset() override;
110
110
 
111
+ /// Trains the coarse and product quantizer based on the given vector data
111
112
  void train(Index::idx_t n, const float* x) override;
112
113
 
113
- /// For debugging purposes, return the list length of a particular
114
- /// list
115
- int getListLength(int listId) const;
114
+ /// Returns the number of vectors present in a particular inverted list
115
+ int getListLength(int listId) const override;
116
116
 
117
- /// For debugging purposes, return the list codes of a particular
118
- /// list
119
- std::vector<unsigned char> getListCodes(int listId) const;
117
+ /// Return the encoded vector data contained in a particular inverted list,
118
+ /// for debugging purposes.
119
+ /// If gpuFormat is true, the data is returned as it is encoded in the
120
+ /// GPU-side representation.
121
+ /// Otherwise, it is converted to the CPU format.
122
+ /// compliant format, while the native GPU format may differ.
123
+ std::vector<uint8_t>
124
+ getListVectorData(int listId, bool gpuFormat = false) const override;
120
125
 
121
- /// For debugging purposes, return the list indices of a particular
122
- /// list
123
- std::vector<long> getListIndices(int listId) const;
126
+ /// Return the vector indices contained in a particular inverted list, for
127
+ /// debugging purposes.
128
+ std::vector<Index::idx_t> getListIndices(int listId) const override;
124
129
 
125
130
  protected:
126
131
  /// Called from GpuIndex for add/add_with_ids
@@ -135,13 +140,18 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
135
140
  float* distances,
136
141
  Index::idx_t* labels) const override;
137
142
 
138
- private:
143
+ /// Throws errors if configuration settings are improper
139
144
  void verifySettings_() const;
140
145
 
146
+ /// Trains the PQ quantizer based on the given vector data
141
147
  void trainResidualQuantizer_(Index::idx_t n, const float* x);
142
148
 
143
- private:
144
- GpuIndexIVFPQConfig ivfpqConfig_;
149
+ protected:
150
+ /// Our configuration options that we were initialized with
151
+ const GpuIndexIVFPQConfig ivfpqConfig_;
152
+
153
+ /// Runtime override: whether or not we use precomputed tables
154
+ bool usePrecomputedTables_;
145
155
 
146
156
  /// Number of sub-quantizers per encoded vector
147
157
  int subQuantizers_;
@@ -18,6 +18,13 @@ class IVFFlat;
18
18
  class GpuIndexFlat;
19
19
 
20
20
  struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
21
+ inline GpuIndexIVFScalarQuantizerConfig()
22
+ : interleavedLayout(true) {
23
+ }
24
+
25
+ /// Use the alternative memory layout for the IVF lists
26
+ /// (currently the default)
27
+ bool interleavedLayout;
21
28
  };
22
29
 
23
30
  /// Wrapper around the GPU implementation that looks like
@@ -61,10 +68,29 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
61
68
  /// to exactly the amount needed. Returns space reclaimed in bytes
62
69
  size_t reclaimMemory();
63
70
 
71
+ /// Clears out all inverted lists, but retains the coarse and scalar quantizer
72
+ /// information
64
73
  void reset() override;
65
74
 
75
+ /// Trains the coarse and scalar quantizer based on the given vector data
66
76
  void train(Index::idx_t n, const float* x) override;
67
77
 
78
+ /// Returns the number of vectors present in a particular inverted list
79
+ int getListLength(int listId) const override;
80
+
81
+ /// Return the encoded vector data contained in a particular inverted list,
82
+ /// for debugging purposes.
83
+ /// If gpuFormat is true, the data is returned as it is encoded in the
84
+ /// GPU-side representation.
85
+ /// Otherwise, it is converted to the CPU format.
86
+ /// compliant format, while the native GPU format may differ.
87
+ std::vector<uint8_t>
88
+ getListVectorData(int listId, bool gpuFormat = false) const override;
89
+
90
+ /// Return the vector indices contained in a particular inverted list, for
91
+ /// debugging purposes.
92
+ std::vector<Index::idx_t> getListIndices(int listId) const override;
93
+
68
94
  protected:
69
95
  /// Called from GpuIndex for add/add_with_ids
70
96
  void addImpl_(int n,
@@ -88,8 +114,9 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
88
114
  /// Exposed like the CPU version
89
115
  bool by_residual;
90
116
 
91
- private:
92
- GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
117
+ protected:
118
+ /// Our configuration options
119
+ const GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
93
120
 
94
121
  /// Desired inverted list memory reservation
95
122
  size_t reserveMemoryVecs_;
@@ -198,6 +198,10 @@ class GpuResources {
198
198
  /// given device
199
199
  virtual cudaStream_t getDefaultStream(int device) = 0;
200
200
 
201
+ /// Overrides the default stream for a device to the user-supplied stream. The
202
+ /// resources object does not own this stream (i.e., it will not destroy it).
203
+ virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
204
+
201
205
  /// Returns the set of alternative streams that we use for the given device
202
206
  virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
203
207
 
@@ -101,12 +101,8 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
101
101
  for (auto& entry : defaultStreams_) {
102
102
  DeviceScope scope(entry.first);
103
103
 
104
- auto it = userDefaultStreams_.find(entry.first);
105
- if (it == userDefaultStreams_.end()) {
106
- // The user did not specify this stream, thus we are the ones
107
- // who have created it
108
- CUDA_VERIFY(cudaStreamDestroy(entry.second));
109
- }
104
+ // We created these streams, so are responsible for destroying them
105
+ CUDA_VERIFY(cudaStreamDestroy(entry.second));
110
106
  }
111
107
 
112
108
  for (auto& entry : alternateStreams_) {
@@ -210,16 +206,47 @@ StandardGpuResourcesImpl::setPinnedMemory(size_t size) {
210
206
 
211
207
  void
212
208
  StandardGpuResourcesImpl::setDefaultStream(int device, cudaStream_t stream) {
213
- auto it = defaultStreams_.find(device);
214
- if (it != defaultStreams_.end()) {
215
- // Replace this stream with the user stream
216
- CUDA_VERIFY(cudaStreamDestroy(it->second));
217
- it->second = stream;
209
+ if (isInitialized(device)) {
210
+ // A new series of calls may not be ordered with what was the previous
211
+ // stream, so if the stream being specified is different, then we need to
212
+ // ensure ordering between the two (new stream waits on old).
213
+ auto it = userDefaultStreams_.find(device);
214
+ cudaStream_t prevStream = nullptr;
215
+
216
+ if (it != userDefaultStreams_.end()) {
217
+ prevStream = it->second;
218
+ } else {
219
+ FAISS_ASSERT(defaultStreams_.count(device));
220
+ prevStream = defaultStreams_[device];
221
+ }
222
+
223
+ if (prevStream != stream) {
224
+ streamWait({stream}, {prevStream});
225
+ }
218
226
  }
219
227
 
220
228
  userDefaultStreams_[device] = stream;
221
229
  }
222
230
 
231
+ void
232
+ StandardGpuResourcesImpl::revertDefaultStream(int device) {
233
+ if (isInitialized(device)) {
234
+ auto it = userDefaultStreams_.find(device);
235
+
236
+ if (it != userDefaultStreams_.end()) {
237
+ // There was a user stream set that we need to synchronize against
238
+ cudaStream_t prevStream = userDefaultStreams_[device];
239
+
240
+ FAISS_ASSERT(defaultStreams_.count(device));
241
+ cudaStream_t newStream = defaultStreams_[device];
242
+
243
+ streamWait({newStream}, {prevStream});
244
+ }
245
+ }
246
+
247
+ userDefaultStreams_.erase(device);
248
+ }
249
+
223
250
  void
224
251
  StandardGpuResourcesImpl::setDefaultNullStreamAllDevices() {
225
252
  for (int dev = 0; dev < getNumDevices(); ++dev) {
@@ -274,14 +301,8 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
274
301
 
275
302
  // Create streams
276
303
  cudaStream_t defaultStream = 0;
277
- auto it = userDefaultStreams_.find(device);
278
- if (it != userDefaultStreams_.end()) {
279
- // We already have a stream provided by the user
280
- defaultStream = it->second;
281
- } else {
282
- CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
283
- cudaStreamNonBlocking));
284
- }
304
+ CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
305
+ cudaStreamNonBlocking));
285
306
 
286
307
  defaultStreams_[device] = defaultStream;
287
308
 
@@ -308,15 +329,14 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
308
329
  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
309
330
  blasHandles_[device] = blasHandle;
310
331
 
311
- // Enable tensor core support if available
312
- #if CUDA_VERSION >= 9000 && CUDA_VERSION < 11000
313
- // This flag was deprecated in CUDA 11
314
- if (getTensorCoreSupport(device)) {
315
- cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
316
- }
317
- #endif
332
+ // For CUDA 10 on V100, enabling tensor core usage would enable automatic
333
+ // rounding down of inputs to f16 (though accumulate in f32) which results in
334
+ // unacceptable loss of precision in general.
335
+ // For CUDA 11 / A100, only enable tensor core support if it doesn't result in
336
+ // a loss of precision.
318
337
  #if CUDA_VERSION >= 11000
319
- cublasSetMathMode(blasHandle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
338
+ cublasSetMathMode(blasHandle,
339
+ CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
320
340
  #endif
321
341
 
322
342
  FAISS_ASSERT(allocs_.count(device) == 0);
@@ -341,6 +361,14 @@ StandardGpuResourcesImpl::getBlasHandle(int device) {
341
361
  cudaStream_t
342
362
  StandardGpuResourcesImpl::getDefaultStream(int device) {
343
363
  initializeForDevice(device);
364
+
365
+ auto it = userDefaultStreams_.find(device);
366
+ if (it != userDefaultStreams_.end()) {
367
+ // There is a user override stream set
368
+ return it->second;
369
+ }
370
+
371
+ // Otherwise, our base default stream
344
372
  return defaultStreams_[device];
345
373
  }
346
374
 
@@ -539,6 +567,11 @@ StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
539
567
  res_->setDefaultStream(device, stream);
540
568
  }
541
569
 
570
+ void
571
+ StandardGpuResources::revertDefaultStream(int device) {
572
+ res_->revertDefaultStream(device);
573
+ }
574
+
542
575
  void
543
576
  StandardGpuResources::setDefaultNullStreamAllDevices() {
544
577
  res_->setDefaultNullStreamAllDevices();