faiss 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/faiss/Clustering.cpp +32 -0
  6. data/vendor/faiss/faiss/Clustering.h +14 -0
  7. data/vendor/faiss/faiss/Index.h +1 -1
  8. data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
  9. data/vendor/faiss/faiss/Index2Layer.h +2 -16
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  11. data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
  12. data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
  13. data/vendor/faiss/faiss/IndexFlat.h +9 -15
  14. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  15. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  16. data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
  17. data/vendor/faiss/faiss/IndexIVF.h +25 -7
  18. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  19. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  20. data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
  21. data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
  22. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  23. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
  24. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
  25. data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
  26. data/vendor/faiss/faiss/IndexLSH.h +2 -15
  27. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
  28. data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
  29. data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
  30. data/vendor/faiss/faiss/IndexPQ.h +2 -17
  31. data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
  32. data/vendor/faiss/faiss/IndexRefine.h +10 -0
  33. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
  34. data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
  35. data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
  36. data/vendor/faiss/faiss/VectorTransform.h +3 -0
  37. data/vendor/faiss/faiss/clone_index.cpp +3 -2
  38. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
  39. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  40. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
  41. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
  42. data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
  43. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
  44. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
  45. data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
  46. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  47. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
  48. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
  49. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
  50. data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
  51. data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
  52. data/vendor/faiss/faiss/impl/io.cpp +1 -1
  53. data/vendor/faiss/faiss/impl/io_macros.h +20 -0
  54. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  55. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  56. data/vendor/faiss/faiss/index_factory.cpp +585 -414
  57. data/vendor/faiss/faiss/index_factory.h +3 -0
  58. data/vendor/faiss/faiss/utils/distances.cpp +4 -2
  59. data/vendor/faiss/faiss/utils/distances.h +36 -3
  60. data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
  61. data/vendor/faiss/faiss/utils/utils.h +1 -1
  62. metadata +12 -5
  63. data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -15,10 +15,17 @@
15
15
  #include <vector>
16
16
 
17
17
  #include <faiss/impl/AdditiveQuantizer.h>
18
+ #include <faiss/impl/platform_macros.h>
18
19
  #include <faiss/utils/utils.h>
19
20
 
20
21
  namespace faiss {
21
22
 
23
+ namespace lsq {
24
+
25
+ struct IcmEncoderFactory;
26
+
27
+ } // namespace lsq
28
+
22
29
  /** Implementation of LSQ/LSQ++ described in the following two papers:
23
30
  *
24
31
  * Revisiting additive quantization
@@ -35,7 +42,6 @@ namespace faiss {
35
42
  * The trained codes are stored in `codebooks` which is called
36
43
  * `centroids` in PQ and RQ.
37
44
  */
38
-
39
45
  struct LocalSearchQuantizer : AdditiveQuantizer {
40
46
  size_t K; ///< number of codes per codebook
41
47
 
@@ -53,10 +59,21 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
53
59
  int random_seed; ///< seed for random generator
54
60
  size_t nperts; ///< number of perturbation in each code
55
61
 
62
+ ///< if non-NULL, use this encoder to encode
63
+ lsq::IcmEncoderFactory* icm_encoder_factory;
64
+
65
+ bool update_codebooks_with_double = true;
66
+
56
67
  LocalSearchQuantizer(
57
- size_t d, /* dimensionality of the input vectors */
58
- size_t M, /* number of subquantizers */
59
- size_t nbits); /* number of bit per subvector index */
68
+ size_t d, /* dimensionality of the input vectors */
69
+ size_t M, /* number of subquantizers */
70
+ size_t nbits, /* number of bit per subvector index */
71
+ Search_type_t search_type =
72
+ ST_decompress); /* determines the storage type */
73
+
74
+ LocalSearchQuantizer();
75
+
76
+ ~LocalSearchQuantizer() override;
60
77
 
61
78
  // Train the local search quantizer
62
79
  void train(size_t n, const float* x) override;
@@ -65,6 +82,7 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
65
82
  *
66
83
  * @param x vectors to encode, size n * d
67
84
  * @param codes output codes, size n * code_size
85
+ * @param n number of vectors
68
86
  */
69
87
  void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
70
88
 
@@ -72,36 +90,46 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
72
90
  *
73
91
  * @param x training vectors, size n * d
74
92
  * @param codes encoded training vectors, size n * M
93
+ * @param n number of vectors
75
94
  */
76
95
  void update_codebooks(const float* x, const int32_t* codes, size_t n);
77
96
 
78
97
  /** Encode vectors given codebooks using iterative conditional mode (icm).
79
98
  *
80
- * @param x vectors to encode, size n * d
81
- * @param codes output codes, size n * M
99
+ * @param codes output codes, size n * M
100
+ * @param x vectors to encode, size n * d
101
+ * @param n number of vectors
82
102
  * @param ils_iters number of iterations of iterative local search
83
103
  */
84
104
  void icm_encode(
85
- const float* x,
86
105
  int32_t* codes,
106
+ const float* x,
87
107
  size_t n,
88
108
  size_t ils_iters,
89
109
  std::mt19937& gen) const;
90
110
 
91
- void icm_encode_partial(
92
- size_t index,
93
- const float* x,
111
+ void icm_encode_impl(
94
112
  int32_t* codes,
113
+ const float* x,
114
+ const float* unaries,
115
+ std::mt19937& gen,
95
116
  size_t n,
96
- const float* binaries,
97
117
  size_t ils_iters,
98
- std::mt19937& gen) const;
118
+ bool verbose) const;
99
119
 
100
120
  void icm_encode_step(
121
+ int32_t* codes,
101
122
  const float* unaries,
102
123
  const float* binaries,
103
- int32_t* codes,
104
- size_t n) const;
124
+ size_t n,
125
+ size_t n_iters) const;
126
+
127
+ /** Add some perturbation to codes
128
+ *
129
+ * @param codes codes to be perturbed, size n * M
130
+ * @param n number of vectors
131
+ */
132
+ void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
105
133
 
106
134
  /** Add some perturbation to codebooks
107
135
  *
@@ -113,12 +141,6 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
113
141
  const std::vector<float>& stddev,
114
142
  std::mt19937& gen);
115
143
 
116
- /** Add some perturbation to codes
117
- *
118
- * @param codes codes to be perturbed, size n * M
119
- */
120
- void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
121
-
122
144
  /** Compute binary terms
123
145
  *
124
146
  * @param binaries binary terms, size M * M * K * K
@@ -127,6 +149,7 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
127
149
 
128
150
  /** Compute unary terms
129
151
  *
152
+ * @param n number of vectors
130
153
  * @param x vectors to encode, size n * d
131
154
  * @param unaries unary terms, size n * M * K
132
155
  */
@@ -134,8 +157,9 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
134
157
 
135
158
  /** Helper function to compute reconstruction error
136
159
  *
137
- * @param x vectors to encode, size n * d
138
160
  * @param codes encoded codes, size n * M
161
+ * @param x vectors to encode, size n * d
162
+ * @param n number of vectors
139
163
  * @param objs if it is not null, store reconstruction
140
164
  error of each vector into it, size n
141
165
  */
@@ -146,13 +170,50 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
146
170
  float* objs = nullptr) const;
147
171
  };
148
172
 
173
+ namespace lsq {
174
+
175
+ struct IcmEncoder {
176
+ std::vector<float> binaries;
177
+
178
+ bool verbose;
179
+
180
+ const LocalSearchQuantizer* lsq;
181
+
182
+ explicit IcmEncoder(const LocalSearchQuantizer* lsq);
183
+
184
+ virtual ~IcmEncoder() {}
185
+
186
+ ///< compute binary terms
187
+ virtual void set_binary_term();
188
+
189
+ /** Encode vectors given codebooks
190
+ *
191
+ * @param codes output codes, size n * M
192
+ * @param x vectors to encode, size n * d
193
+ * @param gen random generator
194
+ * @param n number of vectors
195
+ * @param ils_iters number of iterations of iterative local search
196
+ */
197
+ virtual void encode(
198
+ int32_t* codes,
199
+ const float* x,
200
+ std::mt19937& gen,
201
+ size_t n,
202
+ size_t ils_iters) const;
203
+ };
204
+
205
+ struct IcmEncoderFactory {
206
+ virtual IcmEncoder* get(const LocalSearchQuantizer* lsq) {
207
+ return new IcmEncoder(lsq);
208
+ }
209
+ virtual ~IcmEncoderFactory() {}
210
+ };
211
+
149
212
  /** A helper struct to count consuming time during training.
150
213
  * It is NOT thread-safe.
151
214
  */
152
215
  struct LSQTimer {
153
- std::unordered_map<std::string, double> duration;
154
- std::unordered_map<std::string, double> t0;
155
- std::unordered_map<std::string, bool> started;
216
+ std::unordered_map<std::string, double> t;
156
217
 
157
218
  LSQTimer() {
158
219
  reset();
@@ -160,13 +221,24 @@ struct LSQTimer {
160
221
 
161
222
  double get(const std::string& name);
162
223
 
163
- void start(const std::string& name);
164
-
165
- void end(const std::string& name);
224
+ void add(const std::string& name, double delta);
166
225
 
167
226
  void reset();
168
227
  };
169
228
 
170
- FAISS_API extern LSQTimer lsq_timer; ///< timer to count consuming time
229
+ struct LSQTimerScope {
230
+ double t0;
231
+ LSQTimer* timer;
232
+ std::string name;
233
+ bool finished;
234
+
235
+ LSQTimerScope(LSQTimer* timer, std::string name);
236
+
237
+ void finish();
238
+
239
+ ~LSQTimerScope();
240
+ };
241
+
242
+ } // namespace lsq
171
243
 
172
244
  } // namespace faiss
@@ -161,9 +161,6 @@ void NSG::search(
161
161
  search_on_graph<false>(
162
162
  *final_graph, dis, vt, enterpoint, pool_size, retset, tmp);
163
163
 
164
- std::partial_sort(
165
- retset.begin(), retset.begin() + k, retset.begin() + pool_size);
166
-
167
164
  for (size_t i = 0; i < k; i++) {
168
165
  I[i] = retset[i].id;
169
166
  D[i] = retset[i].distance;
@@ -105,7 +105,7 @@ struct NSG {
105
105
 
106
106
  int ntotal; ///< nb of nodes
107
107
 
108
- /// construction-time parameters
108
+ // construction-time parameters
109
109
  int R; ///< nb of neighbors per node
110
110
  int L; ///< length of the search path at construction time
111
111
  int C; ///< candidate pool size at construction time