faiss 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +23 -21
  4. data/ext/faiss/extconf.rb +11 -0
  5. data/ext/faiss/index.cpp +4 -4
  6. data/ext/faiss/index_binary.cpp +6 -6
  7. data/ext/faiss/product_quantizer.cpp +4 -4
  8. data/lib/faiss/version.rb +1 -1
  9. data/vendor/faiss/faiss/AutoTune.cpp +13 -0
  10. data/vendor/faiss/faiss/IVFlib.cpp +101 -2
  11. data/vendor/faiss/faiss/IVFlib.h +26 -2
  12. data/vendor/faiss/faiss/Index.cpp +36 -3
  13. data/vendor/faiss/faiss/Index.h +43 -6
  14. data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
  15. data/vendor/faiss/faiss/Index2Layer.h +6 -1
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
  20. data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +18 -3
  22. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
  23. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
  24. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
  31. data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
  32. data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
  33. data/vendor/faiss/faiss/IndexFastScan.h +145 -0
  34. data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
  35. data/vendor/faiss/faiss/IndexFlat.h +7 -4
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
  39. data/vendor/faiss/faiss/IndexHNSW.h +4 -2
  40. data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
  41. data/vendor/faiss/faiss/IndexIDMap.h +107 -0
  42. data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
  43. data/vendor/faiss/faiss/IndexIVF.h +35 -16
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
  45. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
  46. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
  48. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
  49. data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
  50. data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
  51. data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
  53. data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
  55. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
  56. data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
  57. data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
  58. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
  59. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  60. data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
  61. data/vendor/faiss/faiss/IndexLSH.h +2 -1
  62. data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
  63. data/vendor/faiss/faiss/IndexLattice.h +3 -1
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
  66. data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
  67. data/vendor/faiss/faiss/IndexNSG.h +25 -1
  68. data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
  69. data/vendor/faiss/faiss/IndexPQ.h +19 -5
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
  73. data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
  74. data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
  75. data/vendor/faiss/faiss/IndexRefine.h +4 -2
  76. data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
  77. data/vendor/faiss/faiss/IndexReplicas.h +2 -1
  78. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
  79. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
  80. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
  81. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
  82. data/vendor/faiss/faiss/IndexShards.cpp +4 -1
  83. data/vendor/faiss/faiss/IndexShards.h +2 -1
  84. data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
  85. data/vendor/faiss/faiss/MetaIndexes.h +3 -81
  86. data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
  87. data/vendor/faiss/faiss/VectorTransform.h +22 -4
  88. data/vendor/faiss/faiss/clone_index.cpp +23 -1
  89. data/vendor/faiss/faiss/clone_index.h +3 -0
  90. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
  91. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
  92. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
  93. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
  94. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
  95. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
  96. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
  101. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
  102. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
  103. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  104. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
  105. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
  106. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
  107. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
  108. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
  109. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
  110. data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
  111. data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
  113. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
  118. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
  119. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
  124. data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
  125. data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
  126. data/vendor/faiss/faiss/impl/HNSW.h +19 -16
  127. data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
  128. data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
  131. data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  134. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
  138. data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
  144. data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
  145. data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
  146. data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
  147. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
  148. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
  149. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
  150. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
  151. data/vendor/faiss/faiss/index_factory.cpp +196 -7
  152. data/vendor/faiss/faiss/index_io.h +5 -0
  153. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
  154. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
  155. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
  156. data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
  157. data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
  158. data/vendor/faiss/faiss/utils/Heap.h +31 -15
  159. data/vendor/faiss/faiss/utils/distances.cpp +380 -56
  160. data/vendor/faiss/faiss/utils/distances.h +113 -15
  161. data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
  162. data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
  163. data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
  164. data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
  165. data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
  166. data/vendor/faiss/faiss/utils/fp16.h +11 -0
  167. data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
  168. data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
  169. data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
  170. data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
  171. data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
  172. data/vendor/faiss/faiss/utils/random.cpp +53 -0
  173. data/vendor/faiss/faiss/utils/random.h +5 -0
  174. data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
  175. data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
  176. data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
  177. metadata +37 -3
@@ -16,6 +16,7 @@
16
16
  #include <faiss/gpu/GpuIndexIVFFlat.h>
17
17
  #include <faiss/gpu/GpuIndexIVFPQ.h>
18
18
  #include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
19
+ #include <faiss/gpu/impl/IndexUtils.h>
19
20
  #include <faiss/gpu/utils/DeviceUtils.h>
20
21
  #include <faiss/impl/FaissAssert.h>
21
22
 
@@ -121,7 +121,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
121
121
  GpuIndexFlatConfig config;
122
122
  config.device = device;
123
123
  config.useFloat16 = useFloat16;
124
- config.storeTransposed = storeTransposed;
125
124
  return new GpuIndexFlat(provider, ifl, config);
126
125
  } else if (
127
126
  dynamic_cast<const IndexScalarQuantizer*>(index) &&
@@ -147,7 +146,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
147
146
  config.device = device;
148
147
  config.indicesOptions = indicesOptions;
149
148
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
150
- config.flatConfig.storeTransposed = storeTransposed;
151
149
 
152
150
  GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
153
151
  provider, ifl->d, ifl->nlist, ifl->metric_type, config);
@@ -164,7 +162,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
164
162
  config.device = device;
165
163
  config.indicesOptions = indicesOptions;
166
164
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
167
- config.flatConfig.storeTransposed = storeTransposed;
168
165
 
169
166
  GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
170
167
  provider,
@@ -195,7 +192,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
195
192
  config.device = device;
196
193
  config.indicesOptions = indicesOptions;
197
194
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
198
- config.flatConfig.storeTransposed = storeTransposed;
199
195
  config.useFloat16LookupTables = useFloat16;
200
196
  config.usePrecomputedTables = usePrecomputed;
201
197
 
@@ -74,7 +74,19 @@ class GpuIndex : public faiss::Index {
74
74
  const float* x,
75
75
  Index::idx_t k,
76
76
  float* distances,
77
- Index::idx_t* labels) const override;
77
+ Index::idx_t* labels,
78
+ const SearchParameters* params = nullptr) const override;
79
+
80
+ /// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
81
+ /// any GPU; copies are performed as needed
82
+ void search_and_reconstruct(
83
+ idx_t n,
84
+ const float* x,
85
+ idx_t k,
86
+ float* distances,
87
+ idx_t* labels,
88
+ float* recons,
89
+ const SearchParameters* params = nullptr) const override;
78
90
 
79
91
  /// Overridden to force GPU indices to provide their own GPU-friendly
80
92
  /// implementation
@@ -111,7 +123,8 @@ class GpuIndex : public faiss::Index {
111
123
  const float* x,
112
124
  int k,
113
125
  float* distances,
114
- Index::idx_t* labels) const = 0;
126
+ Index::idx_t* labels,
127
+ const SearchParameters* params) const = 0;
115
128
 
116
129
  private:
117
130
  /// Handles paged adds if the add set is too large, passes to
@@ -127,7 +140,8 @@ class GpuIndex : public faiss::Index {
127
140
  const float* x,
128
141
  int k,
129
142
  float* outDistancesData,
130
- Index::idx_t* outIndicesData) const;
143
+ Index::idx_t* outIndicesData,
144
+ const SearchParameters* params) const;
131
145
 
132
146
  /// Calls searchImpl_ for a single page of GPU-resident data,
133
147
  /// handling paging of the data and copies from the CPU
@@ -136,7 +150,8 @@ class GpuIndex : public faiss::Index {
136
150
  const float* x,
137
151
  int k,
138
152
  float* outDistancesData,
139
- Index::idx_t* outIndicesData) const;
153
+ Index::idx_t* outIndicesData,
154
+ const SearchParameters* params) const;
140
155
 
141
156
  protected:
142
157
  /// Manages streams, cuBLAS handles and scratch memory for devices
@@ -149,5 +164,14 @@ class GpuIndex : public faiss::Index {
149
164
  size_t minPagedSize_;
150
165
  };
151
166
 
167
+ /// If the given index is a GPU index, this returns the index instance
168
+ GpuIndex* tryCastGpuIndex(faiss::Index* index);
169
+
170
+ /// Is the given index instance a GPU index?
171
+ bool isGpuIndex(faiss::Index* index);
172
+
173
+ /// Does the given CPU index instance have a corresponding GPU implementation?
174
+ bool isGpuIndexImplemented(faiss::Index* index);
175
+
152
176
  } // namespace gpu
153
177
  } // namespace faiss
@@ -62,7 +62,8 @@ class GpuIndexBinaryFlat : public IndexBinary {
62
62
  const uint8_t* x,
63
63
  faiss::IndexBinary::idx_t k,
64
64
  int32_t* distances,
65
- faiss::IndexBinary::idx_t* labels) const override;
65
+ faiss::IndexBinary::idx_t* labels,
66
+ const faiss::SearchParameters* params = nullptr) const override;
66
67
 
67
68
  void reconstruct(faiss::IndexBinary::idx_t key, uint8_t* recons)
68
69
  const override;
@@ -24,17 +24,14 @@ namespace gpu {
24
24
  class FlatIndex;
25
25
 
26
26
  struct GpuIndexFlatConfig : public GpuIndexConfig {
27
- inline GpuIndexFlatConfig() : useFloat16(false), storeTransposed(false) {}
27
+ inline GpuIndexFlatConfig() : useFloat16(false) {}
28
28
 
29
29
  /// Whether or not data is stored as float16
30
30
  bool useFloat16;
31
31
 
32
- /// Whether or not data is stored (transparently) in a transposed
33
- /// layout, enabling use of the NN GEMM call, which is ~10% faster.
34
- /// This will improve the speed of the flat index, but will
35
- /// substantially slow down any add() calls made, as all data must
36
- /// be transposed, and will increase storage requirements (we store
37
- /// data in both transposed and non-transposed layouts).
32
+ /// Deprecated: no longer used
33
+ /// Previously used to indicate whether internal storage of vectors is
34
+ /// transposed
38
35
  bool storeTransposed;
39
36
  };
40
37
 
@@ -98,6 +95,10 @@ class GpuIndexFlat : public GpuIndex {
98
95
  void reconstruct_n(Index::idx_t i0, Index::idx_t num, float* out)
99
96
  const override;
100
97
 
98
+ /// Batch reconstruction method
99
+ void reconstruct_batch(Index::idx_t n, const Index::idx_t* keys, float* out)
100
+ const override;
101
+
101
102
  /// Compute residual
102
103
  void compute_residual(const float* x, float* residual, Index::idx_t key)
103
104
  const override;
@@ -128,7 +129,8 @@ class GpuIndexFlat : public GpuIndex {
128
129
  const float* x,
129
130
  int k,
130
131
  float* distances,
131
- Index::idx_t* labels) const override;
132
+ Index::idx_t* labels,
133
+ const SearchParameters* params) const override;
132
134
 
133
135
  protected:
134
136
  /// Our configuration options
@@ -8,18 +8,17 @@
8
8
  #pragma once
9
9
 
10
10
  #include <faiss/Clustering.h>
11
+ #include <faiss/IndexIVF.h> // for SearchParametersIVF
11
12
  #include <faiss/gpu/GpuIndex.h>
12
13
  #include <faiss/gpu/GpuIndexFlat.h>
13
14
  #include <faiss/gpu/GpuIndicesOptions.h>
14
-
15
- namespace faiss {
16
- struct IndexIVF;
17
- }
15
+ #include <memory>
18
16
 
19
17
  namespace faiss {
20
18
  namespace gpu {
21
19
 
22
20
  class GpuIndexFlat;
21
+ class IVFBase;
23
22
 
24
23
  struct GpuIndexIVFConfig : public GpuIndexConfig {
25
24
  inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
@@ -31,8 +30,13 @@ struct GpuIndexIVFConfig : public GpuIndexConfig {
31
30
  GpuIndexFlatConfig flatConfig;
32
31
  };
33
32
 
33
+ /// Base class of all GPU IVF index types. This (for now) deliberately does not
34
+ /// inherit from IndexIVF, as many of the public data members and functionality
35
+ /// in IndexIVF is not supported in the same manner on the GPU.
34
36
  class GpuIndexIVF : public GpuIndex {
35
37
  public:
38
+ /// Version that auto-constructs a flat coarse quantizer based on the
39
+ /// desired metric
36
40
  GpuIndexIVF(
37
41
  GpuResourcesProvider* provider,
38
42
  int dims,
@@ -41,6 +45,17 @@ class GpuIndexIVF : public GpuIndex {
41
45
  int nlist,
42
46
  GpuIndexIVFConfig config = GpuIndexIVFConfig());
43
47
 
48
+ /// Version that takes a coarse quantizer instance. The GpuIndexIVF does not
49
+ /// own the coarseQuantizer instance by default (functions like IndexIVF).
50
+ GpuIndexIVF(
51
+ GpuResourcesProvider* provider,
52
+ Index* coarseQuantizer,
53
+ int dims,
54
+ faiss::MetricType metric,
55
+ float metricArg,
56
+ int nlist,
57
+ GpuIndexIVFConfig config = GpuIndexIVFConfig());
58
+
44
59
  ~GpuIndexIVF() override;
45
60
 
46
61
  private:
@@ -54,11 +69,16 @@ class GpuIndexIVF : public GpuIndex {
54
69
  /// Copy what we have to the CPU equivalent
55
70
  void copyTo(faiss::IndexIVF* index) const;
56
71
 
72
+ /// Should be called if the user ever changes the state of the IVF coarse
73
+ /// quantizer manually (e.g., substitutes a new instance or changes vectors
74
+ /// in the coarse quantizer outside the scope of training)
75
+ virtual void updateQuantizer() = 0;
76
+
57
77
  /// Returns the number of inverted lists we're managing
58
78
  int getNumLists() const;
59
79
 
60
80
  /// Returns the number of vectors present in a particular inverted list
61
- virtual int getListLength(int listId) const = 0;
81
+ int getListLength(int listId) const;
62
82
 
63
83
  /// Return the encoded vector data contained in a particular inverted list,
64
84
  /// for debugging purposes.
@@ -66,16 +86,12 @@ class GpuIndexIVF : public GpuIndex {
66
86
  /// GPU-side representation.
67
87
  /// Otherwise, it is converted to the CPU format.
68
88
  /// compliant format, while the native GPU format may differ.
69
- virtual std::vector<uint8_t> getListVectorData(
70
- int listId,
71
- bool gpuFormat = false) const = 0;
89
+ std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
90
+ const;
72
91
 
73
92
  /// Return the vector indices contained in a particular inverted list, for
74
93
  /// debugging purposes.
75
- virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
76
-
77
- /// Return the quantizer we're using
78
- GpuIndexFlat* getQuantizer();
94
+ std::vector<Index::idx_t> getListIndices(int listId) const;
79
95
 
80
96
  /// Sets the number of list probes per query
81
97
  void setNumProbes(int nprobe);
@@ -83,10 +99,49 @@ class GpuIndexIVF : public GpuIndex {
83
99
  /// Returns our current number of list probes per query
84
100
  int getNumProbes() const;
85
101
 
102
+ /// Same interface as faiss::IndexIVF, in order to search a set of vectors
103
+ /// pre-quantized by the IVF quantizer. Does not include IndexIVFStats as
104
+ /// that can only be obtained on the host via a GPU d2h copy.
105
+ /// @param n nb of vectors to query
106
+ /// @param x query vectors, size nx * d
107
+ /// @param assign coarse quantization indices, size nx * nprobe
108
+ /// @param centroid_dis
109
+ /// distances to coarse centroids, size nx * nprobe
110
+ /// @param distance
111
+ /// output distances, size n * k
112
+ /// @param labels output labels, size n * k
113
+ /// @param store_pairs store inv list index + inv list offset
114
+ /// instead in upper/lower 32 bit of result,
115
+ /// instead of ids (used for reranking).
116
+ /// @param params used to override the object's search parameters
117
+ void search_preassigned(
118
+ idx_t n,
119
+ const float* x,
120
+ idx_t k,
121
+ const idx_t* assign,
122
+ const float* centroid_dis,
123
+ float* distances,
124
+ idx_t* labels,
125
+ bool store_pairs,
126
+ const SearchParametersIVF* params = nullptr) const;
127
+
86
128
  protected:
129
+ void verifyIVFSettings_() const;
87
130
  bool addImplRequiresIDs_() const override;
88
131
  void trainQuantizer_(Index::idx_t n, const float* x);
89
132
 
133
+ /// Called from GpuIndex for add/add_with_ids
134
+ void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
135
+
136
+ /// Called from GpuIndex for search
137
+ void searchImpl_(
138
+ int n,
139
+ const float* x,
140
+ int k,
141
+ float* distances,
142
+ Index::idx_t* labels,
143
+ const SearchParameters* params) const override;
144
+
90
145
  public:
91
146
  /// Exposing this like the CPU version for manipulation
92
147
  ClusteringParameters cp;
@@ -97,12 +152,18 @@ class GpuIndexIVF : public GpuIndex {
97
152
  /// Exposing this like the CPU version for manipulation
98
153
  int nprobe;
99
154
 
100
- /// Exposeing this like the CPU version for query
101
- GpuIndexFlat* quantizer;
155
+ /// A user-pluggable coarse quantizer
156
+ Index* quantizer;
157
+
158
+ /// Whether or not we own the coarse quantizer
159
+ bool own_fields;
102
160
 
103
161
  protected:
104
162
  /// Our configuration options
105
163
  const GpuIndexIVFConfig ivfConfig_;
164
+
165
+ /// For a trained/initialized index, this is a reference to the base class
166
+ std::shared_ptr<IVFBase> baseIndex_;
106
167
  };
107
168
 
108
169
  } // namespace gpu
@@ -40,12 +40,22 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
40
40
  GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
41
41
 
42
42
  /// Constructs a new instance with an empty flat quantizer; the user
43
- /// provides the number of lists desired.
43
+ /// provides the number of IVF lists desired.
44
44
  GpuIndexIVFFlat(
45
45
  GpuResourcesProvider* provider,
46
46
  int dims,
47
47
  int nlist,
48
- faiss::MetricType metric,
48
+ faiss::MetricType metric = faiss::METRIC_L2,
49
+ GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
50
+
51
+ /// Constructs a new instance with a provided CPU or GPU coarse quantizer;
52
+ /// the user provides the number of IVF lists desired.
53
+ GpuIndexIVFFlat(
54
+ GpuResourcesProvider* provider,
55
+ Index* coarseQuantizer,
56
+ int dims,
57
+ int nlist,
58
+ faiss::MetricType metric = faiss::METRIC_L2,
49
59
  GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
50
60
 
51
61
  ~GpuIndexIVFFlat() override;
@@ -69,37 +79,14 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
69
79
  /// information
70
80
  void reset() override;
71
81
 
82
+ /// Should be called if the user ever changes the state of the IVF coarse
83
+ /// quantizer manually (e.g., substitutes a new instance or changes vectors
84
+ /// in the coarse quantizer outside the scope of training)
85
+ void updateQuantizer() override;
86
+
72
87
  /// Trains the coarse quantizer based on the given vector data
73
88
  void train(Index::idx_t n, const float* x) override;
74
89
 
75
- /// Returns the number of vectors present in a particular inverted list
76
- int getListLength(int listId) const override;
77
-
78
- /// Return the encoded vector data contained in a particular inverted list,
79
- /// for debugging purposes.
80
- /// If gpuFormat is true, the data is returned as it is encoded in the
81
- /// GPU-side representation.
82
- /// Otherwise, it is converted to the CPU format.
83
- /// compliant format, while the native GPU format may differ.
84
- std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
85
- const override;
86
-
87
- /// Return the vector indices contained in a particular inverted list, for
88
- /// debugging purposes.
89
- std::vector<Index::idx_t> getListIndices(int listId) const override;
90
-
91
- protected:
92
- /// Called from GpuIndex for add/add_with_ids
93
- void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
94
-
95
- /// Called from GpuIndex for search
96
- void searchImpl_(
97
- int n,
98
- const float* x,
99
- int k,
100
- float* distances,
101
- Index::idx_t* labels) const override;
102
-
103
90
  protected:
104
91
  /// Our configuration options
105
92
  const GpuIndexIVFFlatConfig ivfFlatConfig_;
@@ -107,8 +94,8 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
107
94
  /// Desired inverted list memory reservation
108
95
  size_t reserveMemoryVecs_;
109
96
 
110
- /// Instance that we own; contains the inverted list
111
- std::unique_ptr<IVFFlat> index_;
97
+ /// Instance that we own; contains the inverted lists
98
+ std::shared_ptr<IVFFlat> index_;
112
99
  };
113
100
 
114
101
  } // namespace gpu
@@ -63,14 +63,27 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
63
63
  const faiss::IndexIVFPQ* index,
64
64
  GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
65
65
 
66
- /// Construct an empty index
66
+ /// Constructs a new instance with an empty flat quantizer; the user
67
+ /// provides the number of IVF lists desired.
67
68
  GpuIndexIVFPQ(
68
69
  GpuResourcesProvider* provider,
69
70
  int dims,
70
71
  int nlist,
71
72
  int subQuantizers,
72
73
  int bitsPerCode,
73
- faiss::MetricType metric,
74
+ faiss::MetricType metric = faiss::METRIC_L2,
75
+ GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
76
+
77
+ /// Constructs a new instance with a provided CPU or GPU coarse quantizer;
78
+ /// the user provides the number of IVF lists desired.
79
+ GpuIndexIVFPQ(
80
+ GpuResourcesProvider* provider,
81
+ Index* coarseQuantizer,
82
+ int dims,
83
+ int nlist,
84
+ int subQuantizers,
85
+ int bitsPerCode,
86
+ faiss::MetricType metric = faiss::METRIC_L2,
74
87
  GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
75
88
 
76
89
  ~GpuIndexIVFPQ() override;
@@ -112,44 +125,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
112
125
  /// product centroid information
113
126
  void reset() override;
114
127
 
128
+ /// Should be called if the user ever changes the state of the IVF coarse
129
+ /// quantizer manually (e.g., substitutes a new instance or changes vectors
130
+ /// in the coarse quantizer outside the scope of training)
131
+ void updateQuantizer() override;
132
+
115
133
  /// Trains the coarse and product quantizer based on the given vector data
116
134
  void train(Index::idx_t n, const float* x) override;
117
135
 
118
- /// Returns the number of vectors present in a particular inverted list
119
- int getListLength(int listId) const override;
120
-
121
- /// Return the encoded vector data contained in a particular inverted list,
122
- /// for debugging purposes.
123
- /// If gpuFormat is true, the data is returned as it is encoded in the
124
- /// GPU-side representation.
125
- /// Otherwise, it is converted to the CPU format.
126
- /// compliant format, while the native GPU format may differ.
127
- std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
128
- const override;
129
-
130
- /// Return the vector indices contained in a particular inverted list, for
131
- /// debugging purposes.
132
- std::vector<Index::idx_t> getListIndices(int listId) const override;
133
-
134
136
  public:
135
137
  /// Like the CPU version, we expose a publically-visible ProductQuantizer
136
138
  /// for manipulation
137
139
  ProductQuantizer pq;
138
140
 
139
141
  protected:
140
- /// Called from GpuIndex for add/add_with_ids
141
- void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
142
-
143
- /// Called from GpuIndex for search
144
- void searchImpl_(
145
- int n,
146
- const float* x,
147
- int k,
148
- float* distances,
149
- Index::idx_t* labels) const override;
150
-
151
142
  /// Throws errors if configuration settings are improper
152
- void verifySettings_() const;
143
+ void verifyPQSettings_() const;
153
144
 
154
145
  /// Trains the PQ quantizer based on the given vector data
155
146
  void trainResidualQuantizer_(Index::idx_t n, const float* x);
@@ -172,7 +163,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
172
163
 
173
164
  /// The product quantizer instance that we own; contains the
174
165
  /// inverted lists
175
- std::unique_ptr<IVFPQ> index_;
166
+ std::shared_ptr<IVFPQ> index_;
176
167
  };
177
168
 
178
169
  } // namespace gpu
@@ -38,7 +38,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
38
38
  GpuIndexIVFScalarQuantizerConfig());
39
39
 
40
40
  /// Constructs a new instance with an empty flat quantizer; the user
41
- /// provides the number of lists desired.
41
+ /// provides the number of IVF lists desired.
42
42
  GpuIndexIVFScalarQuantizer(
43
43
  GpuResourcesProvider* provider,
44
44
  int dims,
@@ -49,6 +49,19 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
49
49
  GpuIndexIVFScalarQuantizerConfig config =
50
50
  GpuIndexIVFScalarQuantizerConfig());
51
51
 
52
+ /// Constructs a new instance with a provided CPU or GPU coarse quantizer;
53
+ /// the user provides the number of IVF lists desired.
54
+ GpuIndexIVFScalarQuantizer(
55
+ GpuResourcesProvider* provider,
56
+ Index* coarseQuantizer,
57
+ int dims,
58
+ int nlist,
59
+ faiss::ScalarQuantizer::QuantizerType qtype,
60
+ faiss::MetricType metric = MetricType::METRIC_L2,
61
+ bool encodeResidual = true,
62
+ GpuIndexIVFScalarQuantizerConfig config =
63
+ GpuIndexIVFScalarQuantizerConfig());
64
+
52
65
  ~GpuIndexIVFScalarQuantizer() override;
53
66
 
54
67
  /// Reserve GPU memory in our inverted lists for this number of vectors
@@ -70,36 +83,17 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
70
83
  /// quantizer information
71
84
  void reset() override;
72
85
 
86
+ /// Should be called if the user ever changes the state of the IVF coarse
87
+ /// quantizer manually (e.g., substitutes a new instance or changes vectors
88
+ /// in the coarse quantizer outside the scope of training)
89
+ void updateQuantizer() override;
90
+
73
91
  /// Trains the coarse and scalar quantizer based on the given vector data
74
92
  void train(Index::idx_t n, const float* x) override;
75
93
 
76
- /// Returns the number of vectors present in a particular inverted list
77
- int getListLength(int listId) const override;
78
-
79
- /// Return the encoded vector data contained in a particular inverted list,
80
- /// for debugging purposes.
81
- /// If gpuFormat is true, the data is returned as it is encoded in the
82
- /// GPU-side representation.
83
- /// Otherwise, it is converted to the CPU format.
84
- /// compliant format, while the native GPU format may differ.
85
- std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
86
- const override;
87
-
88
- /// Return the vector indices contained in a particular inverted list, for
89
- /// debugging purposes.
90
- std::vector<Index::idx_t> getListIndices(int listId) const override;
91
-
92
94
  protected:
93
- /// Called from GpuIndex for add/add_with_ids
94
- void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
95
-
96
- /// Called from GpuIndex for search
97
- void searchImpl_(
98
- int n,
99
- const float* x,
100
- int k,
101
- float* distances,
102
- Index::idx_t* labels) const override;
95
+ /// Validates index SQ parameters
96
+ void verifySQSettings_() const;
103
97
 
104
98
  /// Called from train to handle SQ residual training
105
99
  void trainResiduals_(Index::idx_t n, const float* x);
@@ -119,7 +113,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
119
113
  size_t reserveMemoryVecs_;
120
114
 
121
115
  /// Instance that we own; contains the inverted list
122
- std::unique_ptr<IVFFlat> index_;
116
+ std::shared_ptr<IVFFlat> index_;
123
117
  };
124
118
 
125
119
  } // namespace gpu
@@ -184,5 +184,19 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
184
184
 
185
185
  GpuResourcesProvider::~GpuResourcesProvider() {}
186
186
 
187
+ //
188
+ // GpuResourcesProviderFromResourceInstance
189
+ //
190
+
191
+ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
192
+ std::shared_ptr<GpuResources> p)
193
+ : res_(p) {}
194
+
195
+ GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() {}
196
+
197
+ std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
198
+ return res_;
199
+ }
200
+
187
201
  } // namespace gpu
188
202
  } // namespace faiss
@@ -28,8 +28,7 @@ enum AllocType {
28
28
  FlatData = 1,
29
29
 
30
30
  /// Primary data storage for GpuIndexIVF* (the storage for each individual
31
- /// IVF
32
- /// list)
31
+ /// IVF list)
33
32
  IVFLists = 2,
34
33
 
35
34
  /// Quantizer (PQ, SQ) dictionary information
@@ -251,7 +250,8 @@ class GpuResources {
251
250
  cudaStream_t getAsyncCopyStreamCurrentDevice();
252
251
  };
253
252
 
254
- /// Interface for a provider of a shared resources object
253
+ /// Interface for a provider of a shared resources object. This is to avoid
254
+ /// interfacing std::shared_ptr to Python
255
255
  class GpuResourcesProvider {
256
256
  public:
257
257
  virtual ~GpuResourcesProvider();
@@ -260,5 +260,18 @@ class GpuResourcesProvider {
260
260
  virtual std::shared_ptr<GpuResources> getResources() = 0;
261
261
  };
262
262
 
263
+ /// A simple wrapper for a GpuResources object to make a GpuResourcesProvider
264
+ /// out of it again
265
+ class GpuResourcesProviderFromInstance : public GpuResourcesProvider {
266
+ public:
267
+ explicit GpuResourcesProviderFromInstance(std::shared_ptr<GpuResources> p);
268
+ ~GpuResourcesProviderFromInstance() override;
269
+
270
+ std::shared_ptr<GpuResources> getResources() override;
271
+
272
+ private:
273
+ std::shared_ptr<GpuResources> res_;
274
+ };
275
+
263
276
  } // namespace gpu
264
277
  } // namespace faiss
@@ -268,6 +268,9 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
268
268
  return;
269
269
  }
270
270
 
271
+ FAISS_ASSERT(device < getNumDevices());
272
+ DeviceScope scope(device);
273
+
271
274
  // If this is the first device that we're initializing, create our
272
275
  // pinned memory allocation
273
276
  if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
@@ -285,9 +288,6 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
285
288
  pinnedMemAllocSize_ = pinnedMemSize_;
286
289
  }
287
290
 
288
- FAISS_ASSERT(device < getNumDevices());
289
- DeviceScope scope(device);
290
-
291
291
  // Make sure that device properties for all devices are cached
292
292
  auto& prop = getDeviceProperties(device);
293
293