faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -192,7 +192,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
192
192
  size_t coarse_size = include_listnos ? coarse_code_size () : 0;
193
193
  memset(codes, 0, (code_size + coarse_size) * n);
194
194
 
195
- #pragma omp parallel if(n > 1)
195
+ #pragma omp parallel if(n > 1000)
196
196
  {
197
197
  std::vector<float> residual (d);
198
198
 
@@ -222,7 +222,7 @@ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
222
222
  std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
223
223
  size_t coarse_size = coarse_code_size ();
224
224
 
225
- #pragma omp parallel if(n > 1)
225
+ #pragma omp parallel if(n > 1000)
226
226
  {
227
227
  std::vector<float> residual (d);
228
228
 
@@ -82,7 +82,7 @@ struct IndexScalarQuantizer: Index {
82
82
 
83
83
 
84
84
  /** An IVF implementation where the components of the residuals are
85
- * encoded with a scalar uniform quantizer. All distance computations
85
+ * encoded with a scalar quantizer. All distance computations
86
86
  * are asymmetric, so the encoded vectors are decoded and approximate
87
87
  * distances are computed.
88
88
  */
@@ -148,6 +148,6 @@ void bruteForceKnn(GpuResourcesProvider* resources,
148
148
  float* outDistances,
149
149
  // A region of memory size numQueries x k, with k
150
150
  // innermost (row major)
151
- faiss::Index::idx_t* outIndices);
151
+ Index::idx_t* outIndices);
152
152
 
153
153
  } } // namespace
@@ -36,9 +36,12 @@ class GpuIndex : public faiss::Index {
36
36
  float metricArg,
37
37
  GpuIndexConfig config);
38
38
 
39
- inline int getDevice() const {
40
- return device_;
41
- }
39
+ /// Returns the device that this index is resident on
40
+ int getDevice() const;
41
+
42
+ /// Returns a reference to our GpuResources object that manages memory, stream
43
+ /// and handle resources on the GPU
44
+ std::shared_ptr<GpuResources> getResources();
42
45
 
43
46
  /// Set the minimum data size for searches (in MiB) for which we use
44
47
  /// CPU -> GPU paging
@@ -50,7 +53,7 @@ class GpuIndex : public faiss::Index {
50
53
  /// `x` can be resident on the CPU or any GPU; copies are performed
51
54
  /// as needed
52
55
  /// Handles paged adds if the add set is too large; calls addInternal_
53
- void add(faiss::Index::idx_t, const float* x) override;
56
+ void add(Index::idx_t, const float* x) override;
54
57
 
55
58
  /// `x` and `ids` can be resident on the CPU or any GPU; copies are
56
59
  /// performed as needed
@@ -59,6 +62,13 @@ class GpuIndex : public faiss::Index {
59
62
  const float* x,
60
63
  const Index::idx_t* ids) override;
61
64
 
65
+ /// `x` and `labels` can be resident on the CPU or any GPU; copies are
66
+ /// performed as needed
67
+ void assign(Index::idx_t n,
68
+ const float* x,
69
+ Index::idx_t* labels,
70
+ Index::idx_t k = 1) const override;
71
+
62
72
  /// `x`, `distances` and `labels` can be resident on the CPU or any
63
73
  /// GPU; copies are performed as needed
64
74
  void search(Index::idx_t n,
@@ -136,11 +146,8 @@ private:
136
146
  /// Manages streams, cuBLAS handles and scratch memory for devices
137
147
  std::shared_ptr<GpuResources> resources_;
138
148
 
139
- /// The GPU device we are resident on
140
- const int device_;
141
-
142
- /// The memory space of our primary storage on the GPU
143
- const MemorySpace memorySpace_;
149
+ /// Our configuration options
150
+ const GpuIndexConfig config_;
144
151
 
145
152
  /// Size above which we page copies from the CPU to GPU
146
153
  size_t minPagedSize_;
@@ -38,6 +38,13 @@ class GpuIndexBinaryFlat : public IndexBinary {
38
38
 
39
39
  ~GpuIndexBinaryFlat() override;
40
40
 
41
+ /// Returns the device that this index is resident on
42
+ int getDevice() const;
43
+
44
+ /// Returns a reference to our GpuResources object that manages memory, stream
45
+ /// and handle resources on the GPU
46
+ std::shared_ptr<GpuResources> getResources();
47
+
41
48
  /// Initialize ourselves from the given CPU index; will overwrite
42
49
  /// all data in ourselves
43
50
  void copyFrom(const faiss::IndexBinaryFlat* index);
@@ -80,7 +87,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
80
87
  std::shared_ptr<GpuResources> resources_;
81
88
 
82
89
  /// Configuration options
83
- GpuIndexBinaryFlatConfig config_;
90
+ const GpuIndexBinaryFlatConfig binaryFlatConfig_;
84
91
 
85
92
  /// Holds our GPU data containing the list of vectors
86
93
  std::unique_ptr<BinaryFlatIndex> data_;
@@ -21,7 +21,7 @@ struct IndexFlatIP;
21
21
 
22
22
  namespace faiss { namespace gpu {
23
23
 
24
- struct FlatIndex;
24
+ class FlatIndex;
25
25
 
26
26
  struct GpuIndexFlatConfig : public GpuIndexConfig {
27
27
  inline GpuIndexFlatConfig()
@@ -87,27 +87,27 @@ class GpuIndexFlat : public GpuIndex {
87
87
  void train(Index::idx_t n, const float* x) override;
88
88
 
89
89
  /// Overrides to avoid excessive copies
90
- void add(faiss::Index::idx_t, const float* x) override;
90
+ void add(Index::idx_t, const float* x) override;
91
91
 
92
92
  /// Reconstruction methods; prefer the batch reconstruct as it will
93
93
  /// be more efficient
94
- void reconstruct(faiss::Index::idx_t key, float* out) const override;
94
+ void reconstruct(Index::idx_t key, float* out) const override;
95
95
 
96
96
  /// Batch reconstruction method
97
- void reconstruct_n(faiss::Index::idx_t i0,
98
- faiss::Index::idx_t num,
97
+ void reconstruct_n(Index::idx_t i0,
98
+ Index::idx_t num,
99
99
  float* out) const override;
100
100
 
101
101
  /// Compute residual
102
102
  void compute_residual(const float* x,
103
103
  float* residual,
104
- faiss::Index::idx_t key) const override;
104
+ Index::idx_t key) const override;
105
105
 
106
106
  /// Compute residual (batch mode)
107
- void compute_residual_n(faiss::Index::idx_t n,
107
+ void compute_residual_n(Index::idx_t n,
108
108
  const float* xs,
109
109
  float* residuals,
110
- const faiss::Index::idx_t* keys) const override;
110
+ const Index::idx_t* keys) const override;
111
111
 
112
112
  /// For internal access
113
113
  inline FlatIndex* getGpuData() { return data_.get(); }
@@ -126,11 +126,11 @@ class GpuIndexFlat : public GpuIndex {
126
126
  const float* x,
127
127
  int k,
128
128
  float* distances,
129
- faiss::Index::idx_t* labels) const override;
129
+ Index::idx_t* labels) const override;
130
130
 
131
131
  protected:
132
- /// Our config object
133
- const GpuIndexFlatConfig config_;
132
+ /// Our configuration options
133
+ const GpuIndexFlatConfig flatConfig_;
134
134
 
135
135
  /// Holds our GPU data containing the list of vectors
136
136
  std::unique_ptr<FlatIndex> data_;
@@ -56,6 +56,22 @@ class GpuIndexIVF : public GpuIndex {
56
56
  /// Returns the number of inverted lists we're managing
57
57
  int getNumLists() const;
58
58
 
59
+ /// Returns the number of vectors present in a particular inverted list
60
+ virtual int getListLength(int listId) const = 0;
61
+
62
+ /// Return the encoded vector data contained in a particular inverted list,
63
+ /// for debugging purposes.
64
+ /// If gpuFormat is true, the data is returned as it is encoded in the
65
+ /// GPU-side representation.
66
+ /// Otherwise, it is converted to the CPU format.
67
+ /// compliant format, while the native GPU format may differ.
68
+ virtual std::vector<uint8_t>
69
+ getListVectorData(int listId, bool gpuFormat = false) const = 0;
70
+
71
+ /// Return the vector indices contained in a particular inverted list, for
72
+ /// debugging purposes.
73
+ virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
74
+
59
75
  /// Return the quantizer we're using
60
76
  GpuIndexFlat* getQuantizer();
61
77
 
@@ -67,7 +83,7 @@ class GpuIndexIVF : public GpuIndex {
67
83
 
68
84
  protected:
69
85
  bool addImplRequiresIDs_() const override;
70
- void trainQuantizer_(faiss::Index::idx_t n, const float* x);
86
+ void trainQuantizer_(Index::idx_t n, const float* x);
71
87
 
72
88
  public:
73
89
  /// Exposing this like the CPU version for manipulation
@@ -83,7 +99,8 @@ class GpuIndexIVF : public GpuIndex {
83
99
  GpuIndexFlat* quantizer;
84
100
 
85
101
  protected:
86
- GpuIndexIVFConfig ivfConfig_;
102
+ /// Our configuration options
103
+ const GpuIndexIVFConfig ivfConfig_;
87
104
  };
88
105
 
89
106
  } } // namespace
@@ -19,6 +19,13 @@ class IVFFlat;
19
19
  class GpuIndexFlat;
20
20
 
21
21
  struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
22
+ inline GpuIndexIVFFlatConfig()
23
+ : interleavedLayout(true) {
24
+ }
25
+
26
+ /// Use the alternative memory layout for the IVF lists
27
+ /// (currently the default)
28
+ bool interleavedLayout;
22
29
  };
23
30
 
24
31
  /// Wrapper around the GPU implementation that looks like
@@ -56,10 +63,28 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
56
63
  /// to exactly the amount needed. Returns space reclaimed in bytes
57
64
  size_t reclaimMemory();
58
65
 
66
+ /// Clears out all inverted lists, but retains the coarse centroid information
59
67
  void reset() override;
60
68
 
69
+ /// Trains the coarse quantizer based on the given vector data
61
70
  void train(Index::idx_t n, const float* x) override;
62
71
 
72
+ /// Returns the number of vectors present in a particular inverted list
73
+ int getListLength(int listId) const override;
74
+
75
+ /// Return the encoded vector data contained in a particular inverted list,
76
+ /// for debugging purposes.
77
+ /// If gpuFormat is true, the data is returned as it is encoded in the
78
+ /// GPU-side representation.
79
+ /// Otherwise, it is converted to the CPU format.
80
+ /// compliant format, while the native GPU format may differ.
81
+ std::vector<uint8_t>
82
+ getListVectorData(int listId, bool gpuFormat = false) const override;
83
+
84
+ /// Return the vector indices contained in a particular inverted list, for
85
+ /// debugging purposes.
86
+ std::vector<Index::idx_t> getListIndices(int listId) const override;
87
+
63
88
  protected:
64
89
  /// Called from GpuIndex for add/add_with_ids
65
90
  void addImpl_(int n,
@@ -73,8 +98,9 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
73
98
  float* distances,
74
99
  Index::idx_t* labels) const override;
75
100
 
76
- private:
77
- GpuIndexIVFFlatConfig ivfFlatConfig_;
101
+ protected:
102
+ /// Our configuration options
103
+ const GpuIndexIVFFlatConfig ivfFlatConfig_;
78
104
 
79
105
  /// Desired inverted list memory reservation
80
106
  size_t reserveMemoryVecs_;
@@ -23,7 +23,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
23
23
  inline GpuIndexIVFPQConfig()
24
24
  : useFloat16LookupTables(false),
25
25
  usePrecomputedTables(false),
26
- alternativeLayout(false),
26
+ interleavedLayout(false),
27
27
  useMMCodeDistance(false) {
28
28
  }
29
29
 
@@ -38,7 +38,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
38
38
 
39
39
  /// Use the alternative memory layout for the IVF lists
40
40
  /// WARNING: this is a feature under development, do not use!
41
- bool alternativeLayout;
41
+ bool interleavedLayout;
42
42
 
43
43
  /// Use GEMM-backed computation of PQ code distances for the no precomputed
44
44
  /// table version of IVFPQ.
@@ -108,19 +108,24 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
108
108
  /// product centroid information
109
109
  void reset() override;
110
110
 
111
+ /// Trains the coarse and product quantizer based on the given vector data
111
112
  void train(Index::idx_t n, const float* x) override;
112
113
 
113
- /// For debugging purposes, return the list length of a particular
114
- /// list
115
- int getListLength(int listId) const;
114
+ /// Returns the number of vectors present in a particular inverted list
115
+ int getListLength(int listId) const override;
116
116
 
117
- /// For debugging purposes, return the list codes of a particular
118
- /// list
119
- std::vector<unsigned char> getListCodes(int listId) const;
117
+ /// Return the encoded vector data contained in a particular inverted list,
118
+ /// for debugging purposes.
119
+ /// If gpuFormat is true, the data is returned as it is encoded in the
120
+ /// GPU-side representation.
121
+ /// Otherwise, it is converted to the CPU format.
122
+ /// compliant format, while the native GPU format may differ.
123
+ std::vector<uint8_t>
124
+ getListVectorData(int listId, bool gpuFormat = false) const override;
120
125
 
121
- /// For debugging purposes, return the list indices of a particular
122
- /// list
123
- std::vector<long> getListIndices(int listId) const;
126
+ /// Return the vector indices contained in a particular inverted list, for
127
+ /// debugging purposes.
128
+ std::vector<Index::idx_t> getListIndices(int listId) const override;
124
129
 
125
130
  protected:
126
131
  /// Called from GpuIndex for add/add_with_ids
@@ -135,13 +140,18 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
135
140
  float* distances,
136
141
  Index::idx_t* labels) const override;
137
142
 
138
- private:
143
+ /// Throws errors if configuration settings are improper
139
144
  void verifySettings_() const;
140
145
 
146
+ /// Trains the PQ quantizer based on the given vector data
141
147
  void trainResidualQuantizer_(Index::idx_t n, const float* x);
142
148
 
143
- private:
144
- GpuIndexIVFPQConfig ivfpqConfig_;
149
+ protected:
150
+ /// Our configuration options that we were initialized with
151
+ const GpuIndexIVFPQConfig ivfpqConfig_;
152
+
153
+ /// Runtime override: whether or not we use precomputed tables
154
+ bool usePrecomputedTables_;
145
155
 
146
156
  /// Number of sub-quantizers per encoded vector
147
157
  int subQuantizers_;
@@ -18,6 +18,13 @@ class IVFFlat;
18
18
  class GpuIndexFlat;
19
19
 
20
20
  struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
21
+ inline GpuIndexIVFScalarQuantizerConfig()
22
+ : interleavedLayout(true) {
23
+ }
24
+
25
+ /// Use the alternative memory layout for the IVF lists
26
+ /// (currently the default)
27
+ bool interleavedLayout;
21
28
  };
22
29
 
23
30
  /// Wrapper around the GPU implementation that looks like
@@ -61,10 +68,29 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
61
68
  /// to exactly the amount needed. Returns space reclaimed in bytes
62
69
  size_t reclaimMemory();
63
70
 
71
+ /// Clears out all inverted lists, but retains the coarse and scalar quantizer
72
+ /// information
64
73
  void reset() override;
65
74
 
75
+ /// Trains the coarse and scalar quantizer based on the given vector data
66
76
  void train(Index::idx_t n, const float* x) override;
67
77
 
78
+ /// Returns the number of vectors present in a particular inverted list
79
+ int getListLength(int listId) const override;
80
+
81
+ /// Return the encoded vector data contained in a particular inverted list,
82
+ /// for debugging purposes.
83
+ /// If gpuFormat is true, the data is returned as it is encoded in the
84
+ /// GPU-side representation.
85
+ /// Otherwise, it is converted to the CPU format.
86
+ /// compliant format, while the native GPU format may differ.
87
+ std::vector<uint8_t>
88
+ getListVectorData(int listId, bool gpuFormat = false) const override;
89
+
90
+ /// Return the vector indices contained in a particular inverted list, for
91
+ /// debugging purposes.
92
+ std::vector<Index::idx_t> getListIndices(int listId) const override;
93
+
68
94
  protected:
69
95
  /// Called from GpuIndex for add/add_with_ids
70
96
  void addImpl_(int n,
@@ -88,8 +114,9 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
88
114
  /// Exposed like the CPU version
89
115
  bool by_residual;
90
116
 
91
- private:
92
- GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
117
+ protected:
118
+ /// Our configuration options
119
+ const GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
93
120
 
94
121
  /// Desired inverted list memory reservation
95
122
  size_t reserveMemoryVecs_;
@@ -198,6 +198,10 @@ class GpuResources {
198
198
  /// given device
199
199
  virtual cudaStream_t getDefaultStream(int device) = 0;
200
200
 
201
+ /// Overrides the default stream for a device to the user-supplied stream. The
202
+ /// resources object does not own this stream (i.e., it will not destroy it).
203
+ virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
204
+
201
205
  /// Returns the set of alternative streams that we use for the given device
202
206
  virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
203
207
 
@@ -101,12 +101,8 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
101
101
  for (auto& entry : defaultStreams_) {
102
102
  DeviceScope scope(entry.first);
103
103
 
104
- auto it = userDefaultStreams_.find(entry.first);
105
- if (it == userDefaultStreams_.end()) {
106
- // The user did not specify this stream, thus we are the ones
107
- // who have created it
108
- CUDA_VERIFY(cudaStreamDestroy(entry.second));
109
- }
104
+ // We created these streams, so are responsible for destroying them
105
+ CUDA_VERIFY(cudaStreamDestroy(entry.second));
110
106
  }
111
107
 
112
108
  for (auto& entry : alternateStreams_) {
@@ -210,16 +206,47 @@ StandardGpuResourcesImpl::setPinnedMemory(size_t size) {
210
206
 
211
207
  void
212
208
  StandardGpuResourcesImpl::setDefaultStream(int device, cudaStream_t stream) {
213
- auto it = defaultStreams_.find(device);
214
- if (it != defaultStreams_.end()) {
215
- // Replace this stream with the user stream
216
- CUDA_VERIFY(cudaStreamDestroy(it->second));
217
- it->second = stream;
209
+ if (isInitialized(device)) {
210
+ // A new series of calls may not be ordered with what was the previous
211
+ // stream, so if the stream being specified is different, then we need to
212
+ // ensure ordering between the two (new stream waits on old).
213
+ auto it = userDefaultStreams_.find(device);
214
+ cudaStream_t prevStream = nullptr;
215
+
216
+ if (it != userDefaultStreams_.end()) {
217
+ prevStream = it->second;
218
+ } else {
219
+ FAISS_ASSERT(defaultStreams_.count(device));
220
+ prevStream = defaultStreams_[device];
221
+ }
222
+
223
+ if (prevStream != stream) {
224
+ streamWait({stream}, {prevStream});
225
+ }
218
226
  }
219
227
 
220
228
  userDefaultStreams_[device] = stream;
221
229
  }
222
230
 
231
+ void
232
+ StandardGpuResourcesImpl::revertDefaultStream(int device) {
233
+ if (isInitialized(device)) {
234
+ auto it = userDefaultStreams_.find(device);
235
+
236
+ if (it != userDefaultStreams_.end()) {
237
+ // There was a user stream set that we need to synchronize against
238
+ cudaStream_t prevStream = userDefaultStreams_[device];
239
+
240
+ FAISS_ASSERT(defaultStreams_.count(device));
241
+ cudaStream_t newStream = defaultStreams_[device];
242
+
243
+ streamWait({newStream}, {prevStream});
244
+ }
245
+ }
246
+
247
+ userDefaultStreams_.erase(device);
248
+ }
249
+
223
250
  void
224
251
  StandardGpuResourcesImpl::setDefaultNullStreamAllDevices() {
225
252
  for (int dev = 0; dev < getNumDevices(); ++dev) {
@@ -274,14 +301,8 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
274
301
 
275
302
  // Create streams
276
303
  cudaStream_t defaultStream = 0;
277
- auto it = userDefaultStreams_.find(device);
278
- if (it != userDefaultStreams_.end()) {
279
- // We already have a stream provided by the user
280
- defaultStream = it->second;
281
- } else {
282
- CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
283
- cudaStreamNonBlocking));
284
- }
304
+ CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
305
+ cudaStreamNonBlocking));
285
306
 
286
307
  defaultStreams_[device] = defaultStream;
287
308
 
@@ -308,15 +329,14 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
308
329
  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
309
330
  blasHandles_[device] = blasHandle;
310
331
 
311
- // Enable tensor core support if available
312
- #if CUDA_VERSION >= 9000 && CUDA_VERSION < 11000
313
- // This flag was deprecated in CUDA 11
314
- if (getTensorCoreSupport(device)) {
315
- cublasSetMathMode(blasHandle, CUBLAS_TENSOR_OP_MATH);
316
- }
317
- #endif
332
+ // For CUDA 10 on V100, enabling tensor core usage would enable automatic
333
+ // rounding down of inputs to f16 (though accumulate in f32) which results in
334
+ // unacceptable loss of precision in general.
335
+ // For CUDA 11 / A100, only enable tensor core support if it doesn't result in
336
+ // a loss of precision.
318
337
  #if CUDA_VERSION >= 11000
319
- cublasSetMathMode(blasHandle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
338
+ cublasSetMathMode(blasHandle,
339
+ CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
320
340
  #endif
321
341
 
322
342
  FAISS_ASSERT(allocs_.count(device) == 0);
@@ -341,6 +361,14 @@ StandardGpuResourcesImpl::getBlasHandle(int device) {
341
361
  cudaStream_t
342
362
  StandardGpuResourcesImpl::getDefaultStream(int device) {
343
363
  initializeForDevice(device);
364
+
365
+ auto it = userDefaultStreams_.find(device);
366
+ if (it != userDefaultStreams_.end()) {
367
+ // There is a user override stream set
368
+ return it->second;
369
+ }
370
+
371
+ // Otherwise, our base default stream
344
372
  return defaultStreams_[device];
345
373
  }
346
374
 
@@ -539,6 +567,11 @@ StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
539
567
  res_->setDefaultStream(device, stream);
540
568
  }
541
569
 
570
+ void
571
+ StandardGpuResources::revertDefaultStream(int device) {
572
+ res_->revertDefaultStream(device);
573
+ }
574
+
542
575
  void
543
576
  StandardGpuResources::setDefaultNullStreamAllDevices() {
544
577
  res_->setDefaultNullStreamAllDevices();