faiss 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +23 -21
  4. data/ext/faiss/extconf.rb +11 -0
  5. data/ext/faiss/index.cpp +4 -4
  6. data/ext/faiss/index_binary.cpp +6 -6
  7. data/ext/faiss/product_quantizer.cpp +4 -4
  8. data/lib/faiss/version.rb +1 -1
  9. data/vendor/faiss/faiss/AutoTune.cpp +13 -0
  10. data/vendor/faiss/faiss/IVFlib.cpp +101 -2
  11. data/vendor/faiss/faiss/IVFlib.h +26 -2
  12. data/vendor/faiss/faiss/Index.cpp +36 -3
  13. data/vendor/faiss/faiss/Index.h +43 -6
  14. data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
  15. data/vendor/faiss/faiss/Index2Layer.h +6 -1
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
  20. data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +18 -3
  22. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
  23. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
  24. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
  31. data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
  32. data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
  33. data/vendor/faiss/faiss/IndexFastScan.h +145 -0
  34. data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
  35. data/vendor/faiss/faiss/IndexFlat.h +7 -4
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
  39. data/vendor/faiss/faiss/IndexHNSW.h +4 -2
  40. data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
  41. data/vendor/faiss/faiss/IndexIDMap.h +107 -0
  42. data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
  43. data/vendor/faiss/faiss/IndexIVF.h +35 -16
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
  45. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
  46. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
  48. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
  49. data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
  50. data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
  51. data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
  53. data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
  55. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
  56. data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
  57. data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
  58. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
  59. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  60. data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
  61. data/vendor/faiss/faiss/IndexLSH.h +2 -1
  62. data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
  63. data/vendor/faiss/faiss/IndexLattice.h +3 -1
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
  66. data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
  67. data/vendor/faiss/faiss/IndexNSG.h +25 -1
  68. data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
  69. data/vendor/faiss/faiss/IndexPQ.h +19 -5
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
  73. data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
  74. data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
  75. data/vendor/faiss/faiss/IndexRefine.h +4 -2
  76. data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
  77. data/vendor/faiss/faiss/IndexReplicas.h +2 -1
  78. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
  79. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
  80. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
  81. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
  82. data/vendor/faiss/faiss/IndexShards.cpp +4 -1
  83. data/vendor/faiss/faiss/IndexShards.h +2 -1
  84. data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
  85. data/vendor/faiss/faiss/MetaIndexes.h +3 -81
  86. data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
  87. data/vendor/faiss/faiss/VectorTransform.h +22 -4
  88. data/vendor/faiss/faiss/clone_index.cpp +23 -1
  89. data/vendor/faiss/faiss/clone_index.h +3 -0
  90. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
  91. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
  92. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
  93. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
  94. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
  95. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
  96. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
  101. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
  102. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
  103. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  104. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
  105. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
  106. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
  107. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
  108. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
  109. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
  110. data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
  111. data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
  113. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
  118. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
  119. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
  124. data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
  125. data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
  126. data/vendor/faiss/faiss/impl/HNSW.h +19 -16
  127. data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
  128. data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
  131. data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  134. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
  138. data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
  144. data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
  145. data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
  146. data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
  147. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
  148. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
  149. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
  150. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
  151. data/vendor/faiss/faiss/index_factory.cpp +196 -7
  152. data/vendor/faiss/faiss/index_io.h +5 -0
  153. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
  154. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
  155. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
  156. data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
  157. data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
  158. data/vendor/faiss/faiss/utils/Heap.h +31 -15
  159. data/vendor/faiss/faiss/utils/distances.cpp +380 -56
  160. data/vendor/faiss/faiss/utils/distances.h +113 -15
  161. data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
  162. data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
  163. data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
  164. data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
  165. data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
  166. data/vendor/faiss/faiss/utils/fp16.h +11 -0
  167. data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
  168. data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
  169. data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
  170. data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
  171. data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
  172. data/vendor/faiss/faiss/utils/random.cpp +53 -0
  173. data/vendor/faiss/faiss/utils/random.h +5 -0
  174. data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
  175. data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
  176. data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
  177. metadata +37 -3
@@ -16,6 +16,7 @@
16
16
  #include <faiss/gpu/GpuIndexIVFFlat.h>
17
17
  #include <faiss/gpu/GpuIndexIVFPQ.h>
18
18
  #include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
19
+ #include <faiss/gpu/impl/IndexUtils.h>
19
20
  #include <faiss/gpu/utils/DeviceUtils.h>
20
21
  #include <faiss/impl/FaissAssert.h>
21
22
 
@@ -121,7 +121,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
121
121
  GpuIndexFlatConfig config;
122
122
  config.device = device;
123
123
  config.useFloat16 = useFloat16;
124
- config.storeTransposed = storeTransposed;
125
124
  return new GpuIndexFlat(provider, ifl, config);
126
125
  } else if (
127
126
  dynamic_cast<const IndexScalarQuantizer*>(index) &&
@@ -147,7 +146,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
147
146
  config.device = device;
148
147
  config.indicesOptions = indicesOptions;
149
148
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
150
- config.flatConfig.storeTransposed = storeTransposed;
151
149
 
152
150
  GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
153
151
  provider, ifl->d, ifl->nlist, ifl->metric_type, config);
@@ -164,7 +162,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
164
162
  config.device = device;
165
163
  config.indicesOptions = indicesOptions;
166
164
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
167
- config.flatConfig.storeTransposed = storeTransposed;
168
165
 
169
166
  GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
170
167
  provider,
@@ -195,7 +192,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
195
192
  config.device = device;
196
193
  config.indicesOptions = indicesOptions;
197
194
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
198
- config.flatConfig.storeTransposed = storeTransposed;
199
195
  config.useFloat16LookupTables = useFloat16;
200
196
  config.usePrecomputedTables = usePrecomputed;
201
197
 
@@ -74,7 +74,19 @@ class GpuIndex : public faiss::Index {
74
74
  const float* x,
75
75
  Index::idx_t k,
76
76
  float* distances,
77
- Index::idx_t* labels) const override;
77
+ Index::idx_t* labels,
78
+ const SearchParameters* params = nullptr) const override;
79
+
80
+ /// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
81
+ /// any GPU; copies are performed as needed
82
+ void search_and_reconstruct(
83
+ idx_t n,
84
+ const float* x,
85
+ idx_t k,
86
+ float* distances,
87
+ idx_t* labels,
88
+ float* recons,
89
+ const SearchParameters* params = nullptr) const override;
78
90
 
79
91
  /// Overridden to force GPU indices to provide their own GPU-friendly
80
92
  /// implementation
@@ -111,7 +123,8 @@ class GpuIndex : public faiss::Index {
111
123
  const float* x,
112
124
  int k,
113
125
  float* distances,
114
- Index::idx_t* labels) const = 0;
126
+ Index::idx_t* labels,
127
+ const SearchParameters* params) const = 0;
115
128
 
116
129
  private:
117
130
  /// Handles paged adds if the add set is too large, passes to
@@ -127,7 +140,8 @@ class GpuIndex : public faiss::Index {
127
140
  const float* x,
128
141
  int k,
129
142
  float* outDistancesData,
130
- Index::idx_t* outIndicesData) const;
143
+ Index::idx_t* outIndicesData,
144
+ const SearchParameters* params) const;
131
145
 
132
146
  /// Calls searchImpl_ for a single page of GPU-resident data,
133
147
  /// handling paging of the data and copies from the CPU
@@ -136,7 +150,8 @@ class GpuIndex : public faiss::Index {
136
150
  const float* x,
137
151
  int k,
138
152
  float* outDistancesData,
139
- Index::idx_t* outIndicesData) const;
153
+ Index::idx_t* outIndicesData,
154
+ const SearchParameters* params) const;
140
155
 
141
156
  protected:
142
157
  /// Manages streams, cuBLAS handles and scratch memory for devices
@@ -149,5 +164,14 @@ class GpuIndex : public faiss::Index {
149
164
  size_t minPagedSize_;
150
165
  };
151
166
 
167
+ /// If the given index is a GPU index, this returns the index instance
168
+ GpuIndex* tryCastGpuIndex(faiss::Index* index);
169
+
170
+ /// Is the given index instance a GPU index?
171
+ bool isGpuIndex(faiss::Index* index);
172
+
173
+ /// Does the given CPU index instance have a corresponding GPU implementation?
174
+ bool isGpuIndexImplemented(faiss::Index* index);
175
+
152
176
  } // namespace gpu
153
177
  } // namespace faiss
@@ -62,7 +62,8 @@ class GpuIndexBinaryFlat : public IndexBinary {
62
62
  const uint8_t* x,
63
63
  faiss::IndexBinary::idx_t k,
64
64
  int32_t* distances,
65
- faiss::IndexBinary::idx_t* labels) const override;
65
+ faiss::IndexBinary::idx_t* labels,
66
+ const faiss::SearchParameters* params = nullptr) const override;
66
67
 
67
68
  void reconstruct(faiss::IndexBinary::idx_t key, uint8_t* recons)
68
69
  const override;
@@ -24,17 +24,14 @@ namespace gpu {
24
24
  class FlatIndex;
25
25
 
26
26
  struct GpuIndexFlatConfig : public GpuIndexConfig {
27
- inline GpuIndexFlatConfig() : useFloat16(false), storeTransposed(false) {}
27
+ inline GpuIndexFlatConfig() : useFloat16(false) {}
28
28
 
29
29
  /// Whether or not data is stored as float16
30
30
  bool useFloat16;
31
31
 
32
- /// Whether or not data is stored (transparently) in a transposed
33
- /// layout, enabling use of the NN GEMM call, which is ~10% faster.
34
- /// This will improve the speed of the flat index, but will
35
- /// substantially slow down any add() calls made, as all data must
36
- /// be transposed, and will increase storage requirements (we store
37
- /// data in both transposed and non-transposed layouts).
32
+ /// Deprecated: no longer used
33
+ /// Previously used to indicate whether internal storage of vectors is
34
+ /// transposed
38
35
  bool storeTransposed;
39
36
  };
40
37
 
@@ -98,6 +95,10 @@ class GpuIndexFlat : public GpuIndex {
98
95
  void reconstruct_n(Index::idx_t i0, Index::idx_t num, float* out)
99
96
  const override;
100
97
 
98
+ /// Batch reconstruction method
99
+ void reconstruct_batch(Index::idx_t n, const Index::idx_t* keys, float* out)
100
+ const override;
101
+
101
102
  /// Compute residual
102
103
  void compute_residual(const float* x, float* residual, Index::idx_t key)
103
104
  const override;
@@ -128,7 +129,8 @@ class GpuIndexFlat : public GpuIndex {
128
129
  const float* x,
129
130
  int k,
130
131
  float* distances,
131
- Index::idx_t* labels) const override;
132
+ Index::idx_t* labels,
133
+ const SearchParameters* params) const override;
132
134
 
133
135
  protected:
134
136
  /// Our configuration options
@@ -8,18 +8,17 @@
8
8
  #pragma once
9
9
 
10
10
  #include <faiss/Clustering.h>
11
+ #include <faiss/IndexIVF.h> // for SearchParametersIVF
11
12
  #include <faiss/gpu/GpuIndex.h>
12
13
  #include <faiss/gpu/GpuIndexFlat.h>
13
14
  #include <faiss/gpu/GpuIndicesOptions.h>
14
-
15
- namespace faiss {
16
- struct IndexIVF;
17
- }
15
+ #include <memory>
18
16
 
19
17
  namespace faiss {
20
18
  namespace gpu {
21
19
 
22
20
  class GpuIndexFlat;
21
+ class IVFBase;
23
22
 
24
23
  struct GpuIndexIVFConfig : public GpuIndexConfig {
25
24
  inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
@@ -31,8 +30,13 @@ struct GpuIndexIVFConfig : public GpuIndexConfig {
31
30
  GpuIndexFlatConfig flatConfig;
32
31
  };
33
32
 
33
+ /// Base class of all GPU IVF index types. This (for now) deliberately does not
34
+ /// inherit from IndexIVF, as many of the public data members and functionality
35
+ /// in IndexIVF is not supported in the same manner on the GPU.
34
36
  class GpuIndexIVF : public GpuIndex {
35
37
  public:
38
+ /// Version that auto-constructs a flat coarse quantizer based on the
39
+ /// desired metric
36
40
  GpuIndexIVF(
37
41
  GpuResourcesProvider* provider,
38
42
  int dims,
@@ -41,6 +45,17 @@ class GpuIndexIVF : public GpuIndex {
41
45
  int nlist,
42
46
  GpuIndexIVFConfig config = GpuIndexIVFConfig());
43
47
 
48
+ /// Version that takes a coarse quantizer instance. The GpuIndexIVF does not
49
+ /// own the coarseQuantizer instance by default (functions like IndexIVF).
50
+ GpuIndexIVF(
51
+ GpuResourcesProvider* provider,
52
+ Index* coarseQuantizer,
53
+ int dims,
54
+ faiss::MetricType metric,
55
+ float metricArg,
56
+ int nlist,
57
+ GpuIndexIVFConfig config = GpuIndexIVFConfig());
58
+
44
59
  ~GpuIndexIVF() override;
45
60
 
46
61
  private:
@@ -54,11 +69,16 @@ class GpuIndexIVF : public GpuIndex {
54
69
  /// Copy what we have to the CPU equivalent
55
70
  void copyTo(faiss::IndexIVF* index) const;
56
71
 
72
+ /// Should be called if the user ever changes the state of the IVF coarse
73
+ /// quantizer manually (e.g., substitutes a new instance or changes vectors
74
+ /// in the coarse quantizer outside the scope of training)
75
+ virtual void updateQuantizer() = 0;
76
+
57
77
  /// Returns the number of inverted lists we're managing
58
78
  int getNumLists() const;
59
79
 
60
80
  /// Returns the number of vectors present in a particular inverted list
61
- virtual int getListLength(int listId) const = 0;
81
+ int getListLength(int listId) const;
62
82
 
63
83
  /// Return the encoded vector data contained in a particular inverted list,
64
84
  /// for debugging purposes.
@@ -66,16 +86,12 @@ class GpuIndexIVF : public GpuIndex {
66
86
  /// GPU-side representation.
67
87
  /// Otherwise, it is converted to the CPU format.
68
88
  /// compliant format, while the native GPU format may differ.
69
- virtual std::vector<uint8_t> getListVectorData(
70
- int listId,
71
- bool gpuFormat = false) const = 0;
89
+ std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
90
+ const;
72
91
 
73
92
  /// Return the vector indices contained in a particular inverted list, for
74
93
  /// debugging purposes.
75
- virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
76
-
77
- /// Return the quantizer we're using
78
- GpuIndexFlat* getQuantizer();
94
+ std::vector<Index::idx_t> getListIndices(int listId) const;
79
95
 
80
96
  /// Sets the number of list probes per query
81
97
  void setNumProbes(int nprobe);
@@ -83,10 +99,49 @@ class GpuIndexIVF : public GpuIndex {
83
99
  /// Returns our current number of list probes per query
84
100
  int getNumProbes() const;
85
101
 
102
+ /// Same interface as faiss::IndexIVF, in order to search a set of vectors
103
+ /// pre-quantized by the IVF quantizer. Does not include IndexIVFStats as
104
+ /// that can only be obtained on the host via a GPU d2h copy.
105
+ /// @param n nb of vectors to query
106
+ /// @param x query vectors, size nx * d
107
+ /// @param assign coarse quantization indices, size nx * nprobe
108
+ /// @param centroid_dis
109
+ /// distances to coarse centroids, size nx * nprobe
110
+ /// @param distance
111
+ /// output distances, size n * k
112
+ /// @param labels output labels, size n * k
113
+ /// @param store_pairs store inv list index + inv list offset
114
+ /// instead in upper/lower 32 bit of result,
115
+ /// instead of ids (used for reranking).
116
+ /// @param params used to override the object's search parameters
117
+ void search_preassigned(
118
+ idx_t n,
119
+ const float* x,
120
+ idx_t k,
121
+ const idx_t* assign,
122
+ const float* centroid_dis,
123
+ float* distances,
124
+ idx_t* labels,
125
+ bool store_pairs,
126
+ const SearchParametersIVF* params = nullptr) const;
127
+
86
128
  protected:
129
+ void verifyIVFSettings_() const;
87
130
  bool addImplRequiresIDs_() const override;
88
131
  void trainQuantizer_(Index::idx_t n, const float* x);
89
132
 
133
+ /// Called from GpuIndex for add/add_with_ids
134
+ void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
135
+
136
+ /// Called from GpuIndex for search
137
+ void searchImpl_(
138
+ int n,
139
+ const float* x,
140
+ int k,
141
+ float* distances,
142
+ Index::idx_t* labels,
143
+ const SearchParameters* params) const override;
144
+
90
145
  public:
91
146
  /// Exposing this like the CPU version for manipulation
92
147
  ClusteringParameters cp;
@@ -97,12 +152,18 @@ class GpuIndexIVF : public GpuIndex {
97
152
  /// Exposing this like the CPU version for manipulation
98
153
  int nprobe;
99
154
 
100
- /// Exposeing this like the CPU version for query
101
- GpuIndexFlat* quantizer;
155
+ /// A user-pluggable coarse quantizer
156
+ Index* quantizer;
157
+
158
+ /// Whether or not we own the coarse quantizer
159
+ bool own_fields;
102
160
 
103
161
  protected:
104
162
  /// Our configuration options
105
163
  const GpuIndexIVFConfig ivfConfig_;
164
+
165
+ /// For a trained/initialized index, this is a reference to the base class
166
+ std::shared_ptr<IVFBase> baseIndex_;
106
167
  };
107
168
 
108
169
  } // namespace gpu
@@ -40,12 +40,22 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
40
40
  GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
41
41
 
42
42
  /// Constructs a new instance with an empty flat quantizer; the user
43
- /// provides the number of lists desired.
43
+ /// provides the number of IVF lists desired.
44
44
  GpuIndexIVFFlat(
45
45
  GpuResourcesProvider* provider,
46
46
  int dims,
47
47
  int nlist,
48
- faiss::MetricType metric,
48
+ faiss::MetricType metric = faiss::METRIC_L2,
49
+ GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
50
+
51
+ /// Constructs a new instance with a provided CPU or GPU coarse quantizer;
52
+ /// the user provides the number of IVF lists desired.
53
+ GpuIndexIVFFlat(
54
+ GpuResourcesProvider* provider,
55
+ Index* coarseQuantizer,
56
+ int dims,
57
+ int nlist,
58
+ faiss::MetricType metric = faiss::METRIC_L2,
49
59
  GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
50
60
 
51
61
  ~GpuIndexIVFFlat() override;
@@ -69,37 +79,14 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
69
79
  /// information
70
80
  void reset() override;
71
81
 
82
+ /// Should be called if the user ever changes the state of the IVF coarse
83
+ /// quantizer manually (e.g., substitutes a new instance or changes vectors
84
+ /// in the coarse quantizer outside the scope of training)
85
+ void updateQuantizer() override;
86
+
72
87
  /// Trains the coarse quantizer based on the given vector data
73
88
  void train(Index::idx_t n, const float* x) override;
74
89
 
75
- /// Returns the number of vectors present in a particular inverted list
76
- int getListLength(int listId) const override;
77
-
78
- /// Return the encoded vector data contained in a particular inverted list,
79
- /// for debugging purposes.
80
- /// If gpuFormat is true, the data is returned as it is encoded in the
81
- /// GPU-side representation.
82
- /// Otherwise, it is converted to the CPU format.
83
- /// compliant format, while the native GPU format may differ.
84
- std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
85
- const override;
86
-
87
- /// Return the vector indices contained in a particular inverted list, for
88
- /// debugging purposes.
89
- std::vector<Index::idx_t> getListIndices(int listId) const override;
90
-
91
- protected:
92
- /// Called from GpuIndex for add/add_with_ids
93
- void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
94
-
95
- /// Called from GpuIndex for search
96
- void searchImpl_(
97
- int n,
98
- const float* x,
99
- int k,
100
- float* distances,
101
- Index::idx_t* labels) const override;
102
-
103
90
  protected:
104
91
  /// Our configuration options
105
92
  const GpuIndexIVFFlatConfig ivfFlatConfig_;
@@ -107,8 +94,8 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
107
94
  /// Desired inverted list memory reservation
108
95
  size_t reserveMemoryVecs_;
109
96
 
110
- /// Instance that we own; contains the inverted list
111
- std::unique_ptr<IVFFlat> index_;
97
+ /// Instance that we own; contains the inverted lists
98
+ std::shared_ptr<IVFFlat> index_;
112
99
  };
113
100
 
114
101
  } // namespace gpu
@@ -63,14 +63,27 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
63
63
  const faiss::IndexIVFPQ* index,
64
64
  GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
65
65
 
66
- /// Construct an empty index
66
+ /// Constructs a new instance with an empty flat quantizer; the user
67
+ /// provides the number of IVF lists desired.
67
68
  GpuIndexIVFPQ(
68
69
  GpuResourcesProvider* provider,
69
70
  int dims,
70
71
  int nlist,
71
72
  int subQuantizers,
72
73
  int bitsPerCode,
73
- faiss::MetricType metric,
74
+ faiss::MetricType metric = faiss::METRIC_L2,
75
+ GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
76
+
77
+ /// Constructs a new instance with a provided CPU or GPU coarse quantizer;
78
+ /// the user provides the number of IVF lists desired.
79
+ GpuIndexIVFPQ(
80
+ GpuResourcesProvider* provider,
81
+ Index* coarseQuantizer,
82
+ int dims,
83
+ int nlist,
84
+ int subQuantizers,
85
+ int bitsPerCode,
86
+ faiss::MetricType metric = faiss::METRIC_L2,
74
87
  GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
75
88
 
76
89
  ~GpuIndexIVFPQ() override;
@@ -112,44 +125,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
112
125
  /// product centroid information
113
126
  void reset() override;
114
127
 
128
+ /// Should be called if the user ever changes the state of the IVF coarse
129
+ /// quantizer manually (e.g., substitutes a new instance or changes vectors
130
+ /// in the coarse quantizer outside the scope of training)
131
+ void updateQuantizer() override;
132
+
115
133
  /// Trains the coarse and product quantizer based on the given vector data
116
134
  void train(Index::idx_t n, const float* x) override;
117
135
 
118
- /// Returns the number of vectors present in a particular inverted list
119
- int getListLength(int listId) const override;
120
-
121
- /// Return the encoded vector data contained in a particular inverted list,
122
- /// for debugging purposes.
123
- /// If gpuFormat is true, the data is returned as it is encoded in the
124
- /// GPU-side representation.
125
- /// Otherwise, it is converted to the CPU format.
126
- /// compliant format, while the native GPU format may differ.
127
- std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
128
- const override;
129
-
130
- /// Return the vector indices contained in a particular inverted list, for
131
- /// debugging purposes.
132
- std::vector<Index::idx_t> getListIndices(int listId) const override;
133
-
134
136
  public:
135
137
  /// Like the CPU version, we expose a publically-visible ProductQuantizer
136
138
  /// for manipulation
137
139
  ProductQuantizer pq;
138
140
 
139
141
  protected:
140
- /// Called from GpuIndex for add/add_with_ids
141
- void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
142
-
143
- /// Called from GpuIndex for search
144
- void searchImpl_(
145
- int n,
146
- const float* x,
147
- int k,
148
- float* distances,
149
- Index::idx_t* labels) const override;
150
-
151
142
  /// Throws errors if configuration settings are improper
152
- void verifySettings_() const;
143
+ void verifyPQSettings_() const;
153
144
 
154
145
  /// Trains the PQ quantizer based on the given vector data
155
146
  void trainResidualQuantizer_(Index::idx_t n, const float* x);
@@ -172,7 +163,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
172
163
 
173
164
  /// The product quantizer instance that we own; contains the
174
165
  /// inverted lists
175
- std::unique_ptr<IVFPQ> index_;
166
+ std::shared_ptr<IVFPQ> index_;
176
167
  };
177
168
 
178
169
  } // namespace gpu
@@ -38,7 +38,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
38
38
  GpuIndexIVFScalarQuantizerConfig());
39
39
 
40
40
  /// Constructs a new instance with an empty flat quantizer; the user
41
- /// provides the number of lists desired.
41
+ /// provides the number of IVF lists desired.
42
42
  GpuIndexIVFScalarQuantizer(
43
43
  GpuResourcesProvider* provider,
44
44
  int dims,
@@ -49,6 +49,19 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
49
49
  GpuIndexIVFScalarQuantizerConfig config =
50
50
  GpuIndexIVFScalarQuantizerConfig());
51
51
 
52
+ /// Constructs a new instance with a provided CPU or GPU coarse quantizer;
53
+ /// the user provides the number of IVF lists desired.
54
+ GpuIndexIVFScalarQuantizer(
55
+ GpuResourcesProvider* provider,
56
+ Index* coarseQuantizer,
57
+ int dims,
58
+ int nlist,
59
+ faiss::ScalarQuantizer::QuantizerType qtype,
60
+ faiss::MetricType metric = MetricType::METRIC_L2,
61
+ bool encodeResidual = true,
62
+ GpuIndexIVFScalarQuantizerConfig config =
63
+ GpuIndexIVFScalarQuantizerConfig());
64
+
52
65
  ~GpuIndexIVFScalarQuantizer() override;
53
66
 
54
67
  /// Reserve GPU memory in our inverted lists for this number of vectors
@@ -70,36 +83,17 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
70
83
  /// quantizer information
71
84
  void reset() override;
72
85
 
86
+ /// Should be called if the user ever changes the state of the IVF coarse
87
+ /// quantizer manually (e.g., substitutes a new instance or changes vectors
88
+ /// in the coarse quantizer outside the scope of training)
89
+ void updateQuantizer() override;
90
+
73
91
  /// Trains the coarse and scalar quantizer based on the given vector data
74
92
  void train(Index::idx_t n, const float* x) override;
75
93
 
76
- /// Returns the number of vectors present in a particular inverted list
77
- int getListLength(int listId) const override;
78
-
79
- /// Return the encoded vector data contained in a particular inverted list,
80
- /// for debugging purposes.
81
- /// If gpuFormat is true, the data is returned as it is encoded in the
82
- /// GPU-side representation.
83
- /// Otherwise, it is converted to the CPU format.
84
- /// compliant format, while the native GPU format may differ.
85
- std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
86
- const override;
87
-
88
- /// Return the vector indices contained in a particular inverted list, for
89
- /// debugging purposes.
90
- std::vector<Index::idx_t> getListIndices(int listId) const override;
91
-
92
94
  protected:
93
- /// Called from GpuIndex for add/add_with_ids
94
- void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
95
-
96
- /// Called from GpuIndex for search
97
- void searchImpl_(
98
- int n,
99
- const float* x,
100
- int k,
101
- float* distances,
102
- Index::idx_t* labels) const override;
95
+ /// Validates index SQ parameters
96
+ void verifySQSettings_() const;
103
97
 
104
98
  /// Called from train to handle SQ residual training
105
99
  void trainResiduals_(Index::idx_t n, const float* x);
@@ -119,7 +113,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
119
113
  size_t reserveMemoryVecs_;
120
114
 
121
115
  /// Instance that we own; contains the inverted list
122
- std::unique_ptr<IVFFlat> index_;
116
+ std::shared_ptr<IVFFlat> index_;
123
117
  };
124
118
 
125
119
  } // namespace gpu
@@ -184,5 +184,19 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
184
184
 
185
185
  GpuResourcesProvider::~GpuResourcesProvider() {}
186
186
 
187
+ //
188
+ // GpuResourcesProviderFromResourceInstance
189
+ //
190
+
191
+ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
192
+ std::shared_ptr<GpuResources> p)
193
+ : res_(p) {}
194
+
195
+ GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() {}
196
+
197
+ std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
198
+ return res_;
199
+ }
200
+
187
201
  } // namespace gpu
188
202
  } // namespace faiss
@@ -28,8 +28,7 @@ enum AllocType {
28
28
  FlatData = 1,
29
29
 
30
30
  /// Primary data storage for GpuIndexIVF* (the storage for each individual
31
- /// IVF
32
- /// list)
31
+ /// IVF list)
33
32
  IVFLists = 2,
34
33
 
35
34
  /// Quantizer (PQ, SQ) dictionary information
@@ -251,7 +250,8 @@ class GpuResources {
251
250
  cudaStream_t getAsyncCopyStreamCurrentDevice();
252
251
  };
253
252
 
254
- /// Interface for a provider of a shared resources object
253
+ /// Interface for a provider of a shared resources object. This is to avoid
254
+ /// interfacing std::shared_ptr to Python
255
255
  class GpuResourcesProvider {
256
256
  public:
257
257
  virtual ~GpuResourcesProvider();
@@ -260,5 +260,18 @@ class GpuResourcesProvider {
260
260
  virtual std::shared_ptr<GpuResources> getResources() = 0;
261
261
  };
262
262
 
263
+ /// A simple wrapper for a GpuResources object to make a GpuResourcesProvider
264
+ /// out of it again
265
+ class GpuResourcesProviderFromInstance : public GpuResourcesProvider {
266
+ public:
267
+ explicit GpuResourcesProviderFromInstance(std::shared_ptr<GpuResources> p);
268
+ ~GpuResourcesProviderFromInstance() override;
269
+
270
+ std::shared_ptr<GpuResources> getResources() override;
271
+
272
+ private:
273
+ std::shared_ptr<GpuResources> res_;
274
+ };
275
+
263
276
  } // namespace gpu
264
277
  } // namespace faiss
@@ -268,6 +268,9 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
268
268
  return;
269
269
  }
270
270
 
271
+ FAISS_ASSERT(device < getNumDevices());
272
+ DeviceScope scope(device);
273
+
271
274
  // If this is the first device that we're initializing, create our
272
275
  // pinned memory allocation
273
276
  if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
@@ -285,9 +288,6 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
285
288
  pinnedMemAllocSize_ = pinnedMemSize_;
286
289
  }
287
290
 
288
- FAISS_ASSERT(device < getNumDevices());
289
- DeviceScope scope(device);
290
-
291
291
  // Make sure that device properties for all devices are cached
292
292
  auto& prop = getDeviceProperties(device);
293
293