faiss 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/faiss/Clustering.cpp +32 -0
  6. data/vendor/faiss/faiss/Clustering.h +14 -0
  7. data/vendor/faiss/faiss/Index.h +1 -1
  8. data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
  9. data/vendor/faiss/faiss/Index2Layer.h +2 -16
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  11. data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
  12. data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
  13. data/vendor/faiss/faiss/IndexFlat.h +9 -15
  14. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  15. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  16. data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
  17. data/vendor/faiss/faiss/IndexIVF.h +25 -7
  18. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  19. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  20. data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
  21. data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
  22. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  23. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
  24. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
  25. data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
  26. data/vendor/faiss/faiss/IndexLSH.h +2 -15
  27. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
  28. data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
  29. data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
  30. data/vendor/faiss/faiss/IndexPQ.h +2 -17
  31. data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
  32. data/vendor/faiss/faiss/IndexRefine.h +10 -0
  33. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
  34. data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
  35. data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
  36. data/vendor/faiss/faiss/VectorTransform.h +3 -0
  37. data/vendor/faiss/faiss/clone_index.cpp +3 -2
  38. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
  39. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  40. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
  41. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
  42. data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
  43. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
  44. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
  45. data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
  46. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  47. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
  48. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
  49. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
  50. data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
  51. data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
  52. data/vendor/faiss/faiss/impl/io.cpp +1 -1
  53. data/vendor/faiss/faiss/impl/io_macros.h +20 -0
  54. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  55. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  56. data/vendor/faiss/faiss/index_factory.cpp +585 -414
  57. data/vendor/faiss/faiss/index_factory.h +3 -0
  58. data/vendor/faiss/faiss/utils/distances.cpp +4 -2
  59. data/vendor/faiss/faiss/utils/distances.h +36 -3
  60. data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
  61. data/vendor/faiss/faiss/utils/utils.h +1 -1
  62. metadata +12 -5
  63. data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -15,10 +15,17 @@
15
15
  #include <vector>
16
16
 
17
17
  #include <faiss/impl/AdditiveQuantizer.h>
18
+ #include <faiss/impl/platform_macros.h>
18
19
  #include <faiss/utils/utils.h>
19
20
 
20
21
  namespace faiss {
21
22
 
23
+ namespace lsq {
24
+
25
+ struct IcmEncoderFactory;
26
+
27
+ } // namespace lsq
28
+
22
29
  /** Implementation of LSQ/LSQ++ described in the following two papers:
23
30
  *
24
31
  * Revisiting additive quantization
@@ -35,7 +42,6 @@ namespace faiss {
35
42
  * The trained codes are stored in `codebooks` which is called
36
43
  * `centroids` in PQ and RQ.
37
44
  */
38
-
39
45
  struct LocalSearchQuantizer : AdditiveQuantizer {
40
46
  size_t K; ///< number of codes per codebook
41
47
 
@@ -53,10 +59,21 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
53
59
  int random_seed; ///< seed for random generator
54
60
  size_t nperts; ///< number of perturbation in each code
55
61
 
62
+ ///< if non-NULL, use this encoder to encode
63
+ lsq::IcmEncoderFactory* icm_encoder_factory;
64
+
65
+ bool update_codebooks_with_double = true;
66
+
56
67
  LocalSearchQuantizer(
57
- size_t d, /* dimensionality of the input vectors */
58
- size_t M, /* number of subquantizers */
59
- size_t nbits); /* number of bit per subvector index */
68
+ size_t d, /* dimensionality of the input vectors */
69
+ size_t M, /* number of subquantizers */
70
+ size_t nbits, /* number of bit per subvector index */
71
+ Search_type_t search_type =
72
+ ST_decompress); /* determines the storage type */
73
+
74
+ LocalSearchQuantizer();
75
+
76
+ ~LocalSearchQuantizer() override;
60
77
 
61
78
  // Train the local search quantizer
62
79
  void train(size_t n, const float* x) override;
@@ -65,6 +82,7 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
65
82
  *
66
83
  * @param x vectors to encode, size n * d
67
84
  * @param codes output codes, size n * code_size
85
+ * @param n number of vectors
68
86
  */
69
87
  void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
70
88
 
@@ -72,36 +90,46 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
72
90
  *
73
91
  * @param x training vectors, size n * d
74
92
  * @param codes encoded training vectors, size n * M
93
+ * @param n number of vectors
75
94
  */
76
95
  void update_codebooks(const float* x, const int32_t* codes, size_t n);
77
96
 
78
97
  /** Encode vectors given codebooks using iterative conditional mode (icm).
79
98
  *
80
- * @param x vectors to encode, size n * d
81
- * @param codes output codes, size n * M
99
+ * @param codes output codes, size n * M
100
+ * @param x vectors to encode, size n * d
101
+ * @param n number of vectors
82
102
  * @param ils_iters number of iterations of iterative local search
83
103
  */
84
104
  void icm_encode(
85
- const float* x,
86
105
  int32_t* codes,
106
+ const float* x,
87
107
  size_t n,
88
108
  size_t ils_iters,
89
109
  std::mt19937& gen) const;
90
110
 
91
- void icm_encode_partial(
92
- size_t index,
93
- const float* x,
111
+ void icm_encode_impl(
94
112
  int32_t* codes,
113
+ const float* x,
114
+ const float* unaries,
115
+ std::mt19937& gen,
95
116
  size_t n,
96
- const float* binaries,
97
117
  size_t ils_iters,
98
- std::mt19937& gen) const;
118
+ bool verbose) const;
99
119
 
100
120
  void icm_encode_step(
121
+ int32_t* codes,
101
122
  const float* unaries,
102
123
  const float* binaries,
103
- int32_t* codes,
104
- size_t n) const;
124
+ size_t n,
125
+ size_t n_iters) const;
126
+
127
+ /** Add some perturbation to codes
128
+ *
129
+ * @param codes codes to be perturbed, size n * M
130
+ * @param n number of vectors
131
+ */
132
+ void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
105
133
 
106
134
  /** Add some perturbation to codebooks
107
135
  *
@@ -113,12 +141,6 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
113
141
  const std::vector<float>& stddev,
114
142
  std::mt19937& gen);
115
143
 
116
- /** Add some perturbation to codes
117
- *
118
- * @param codes codes to be perturbed, size n * M
119
- */
120
- void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
121
-
122
144
  /** Compute binary terms
123
145
  *
124
146
  * @param binaries binary terms, size M * M * K * K
@@ -127,6 +149,7 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
127
149
 
128
150
  /** Compute unary terms
129
151
  *
152
+ * @param n number of vectors
130
153
  * @param x vectors to encode, size n * d
131
154
  * @param unaries unary terms, size n * M * K
132
155
  */
@@ -134,8 +157,9 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
134
157
 
135
158
  /** Helper function to compute reconstruction error
136
159
  *
137
- * @param x vectors to encode, size n * d
138
160
  * @param codes encoded codes, size n * M
161
+ * @param x vectors to encode, size n * d
162
+ * @param n number of vectors
139
163
  * @param objs if it is not null, store reconstruction
140
164
  error of each vector into it, size n
141
165
  */
@@ -146,13 +170,50 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
146
170
  float* objs = nullptr) const;
147
171
  };
148
172
 
173
+ namespace lsq {
174
+
175
+ struct IcmEncoder {
176
+ std::vector<float> binaries;
177
+
178
+ bool verbose;
179
+
180
+ const LocalSearchQuantizer* lsq;
181
+
182
+ explicit IcmEncoder(const LocalSearchQuantizer* lsq);
183
+
184
+ virtual ~IcmEncoder() {}
185
+
186
+ ///< compute binary terms
187
+ virtual void set_binary_term();
188
+
189
+ /** Encode vectors given codebooks
190
+ *
191
+ * @param codes output codes, size n * M
192
+ * @param x vectors to encode, size n * d
193
+ * @param gen random generator
194
+ * @param n number of vectors
195
+ * @param ils_iters number of iterations of iterative local search
196
+ */
197
+ virtual void encode(
198
+ int32_t* codes,
199
+ const float* x,
200
+ std::mt19937& gen,
201
+ size_t n,
202
+ size_t ils_iters) const;
203
+ };
204
+
205
+ struct IcmEncoderFactory {
206
+ virtual IcmEncoder* get(const LocalSearchQuantizer* lsq) {
207
+ return new IcmEncoder(lsq);
208
+ }
209
+ virtual ~IcmEncoderFactory() {}
210
+ };
211
+
149
212
  /** A helper struct to count consuming time during training.
150
213
  * It is NOT thread-safe.
151
214
  */
152
215
  struct LSQTimer {
153
- std::unordered_map<std::string, double> duration;
154
- std::unordered_map<std::string, double> t0;
155
- std::unordered_map<std::string, bool> started;
216
+ std::unordered_map<std::string, double> t;
156
217
 
157
218
  LSQTimer() {
158
219
  reset();
@@ -160,13 +221,24 @@ struct LSQTimer {
160
221
 
161
222
  double get(const std::string& name);
162
223
 
163
- void start(const std::string& name);
164
-
165
- void end(const std::string& name);
224
+ void add(const std::string& name, double delta);
166
225
 
167
226
  void reset();
168
227
  };
169
228
 
170
- FAISS_API extern LSQTimer lsq_timer; ///< timer to count consuming time
229
+ struct LSQTimerScope {
230
+ double t0;
231
+ LSQTimer* timer;
232
+ std::string name;
233
+ bool finished;
234
+
235
+ LSQTimerScope(LSQTimer* timer, std::string name);
236
+
237
+ void finish();
238
+
239
+ ~LSQTimerScope();
240
+ };
241
+
242
+ } // namespace lsq
171
243
 
172
244
  } // namespace faiss
@@ -161,9 +161,6 @@ void NSG::search(
161
161
  search_on_graph<false>(
162
162
  *final_graph, dis, vt, enterpoint, pool_size, retset, tmp);
163
163
 
164
- std::partial_sort(
165
- retset.begin(), retset.begin() + k, retset.begin() + pool_size);
166
-
167
164
  for (size_t i = 0; i < k; i++) {
168
165
  I[i] = retset[i].id;
169
166
  D[i] = retset[i].distance;
@@ -105,7 +105,7 @@ struct NSG {
105
105
 
106
106
  int ntotal; ///< nb of nodes
107
107
 
108
- /// construction-time parameters
108
+ // construction-time parameters
109
109
  int R; ///< nb of neighbors per node
110
110
  int L; ///< length of the search path at construction time
111
111
  int C; ///< candidate pool size at construction time