faiss 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +32 -0
- data/vendor/faiss/faiss/Clustering.h +14 -0
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
- data/vendor/faiss/faiss/Index2Layer.h +2 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
- data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
- data/vendor/faiss/faiss/IndexFlat.h +9 -15
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
- data/vendor/faiss/faiss/IndexIVF.h +25 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
- data/vendor/faiss/faiss/IndexLSH.h +2 -15
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
- data/vendor/faiss/faiss/IndexPQ.h +2 -17
- data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
- data/vendor/faiss/faiss/IndexRefine.h +10 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
- data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
- data/vendor/faiss/faiss/VectorTransform.h +3 -0
- data/vendor/faiss/faiss/clone_index.cpp +3 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
- data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
- data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
- data/vendor/faiss/faiss/impl/io.cpp +1 -1
- data/vendor/faiss/faiss/impl/io_macros.h +20 -0
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/index_factory.cpp +585 -414
- data/vendor/faiss/faiss/index_factory.h +3 -0
- data/vendor/faiss/faiss/utils/distances.cpp +4 -2
- data/vendor/faiss/faiss/utils/distances.h +36 -3
- data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
- data/vendor/faiss/faiss/utils/utils.h +1 -1
- metadata +12 -5
- data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -15,10 +15,17 @@
|
|
15
15
|
#include <vector>
|
16
16
|
|
17
17
|
#include <faiss/impl/AdditiveQuantizer.h>
|
18
|
+
#include <faiss/impl/platform_macros.h>
|
18
19
|
#include <faiss/utils/utils.h>
|
19
20
|
|
20
21
|
namespace faiss {
|
21
22
|
|
23
|
+
namespace lsq {
|
24
|
+
|
25
|
+
struct IcmEncoderFactory;
|
26
|
+
|
27
|
+
} // namespace lsq
|
28
|
+
|
22
29
|
/** Implementation of LSQ/LSQ++ described in the following two papers:
|
23
30
|
*
|
24
31
|
* Revisiting additive quantization
|
@@ -35,7 +42,6 @@ namespace faiss {
|
|
35
42
|
* The trained codes are stored in `codebooks` which is called
|
36
43
|
* `centroids` in PQ and RQ.
|
37
44
|
*/
|
38
|
-
|
39
45
|
struct LocalSearchQuantizer : AdditiveQuantizer {
|
40
46
|
size_t K; ///< number of codes per codebook
|
41
47
|
|
@@ -53,10 +59,21 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
53
59
|
int random_seed; ///< seed for random generator
|
54
60
|
size_t nperts; ///< number of perturbation in each code
|
55
61
|
|
62
|
+
///< if non-NULL, use this encoder to encode
|
63
|
+
lsq::IcmEncoderFactory* icm_encoder_factory;
|
64
|
+
|
65
|
+
bool update_codebooks_with_double = true;
|
66
|
+
|
56
67
|
LocalSearchQuantizer(
|
57
|
-
size_t d,
|
58
|
-
size_t M,
|
59
|
-
size_t nbits
|
68
|
+
size_t d, /* dimensionality of the input vectors */
|
69
|
+
size_t M, /* number of subquantizers */
|
70
|
+
size_t nbits, /* number of bit per subvector index */
|
71
|
+
Search_type_t search_type =
|
72
|
+
ST_decompress); /* determines the storage type */
|
73
|
+
|
74
|
+
LocalSearchQuantizer();
|
75
|
+
|
76
|
+
~LocalSearchQuantizer() override;
|
60
77
|
|
61
78
|
// Train the local search quantizer
|
62
79
|
void train(size_t n, const float* x) override;
|
@@ -65,6 +82,7 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
65
82
|
*
|
66
83
|
* @param x vectors to encode, size n * d
|
67
84
|
* @param codes output codes, size n * code_size
|
85
|
+
* @param n number of vectors
|
68
86
|
*/
|
69
87
|
void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
|
70
88
|
|
@@ -72,36 +90,46 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
72
90
|
*
|
73
91
|
* @param x training vectors, size n * d
|
74
92
|
* @param codes encoded training vectors, size n * M
|
93
|
+
* @param n number of vectors
|
75
94
|
*/
|
76
95
|
void update_codebooks(const float* x, const int32_t* codes, size_t n);
|
77
96
|
|
78
97
|
/** Encode vectors given codebooks using iterative conditional mode (icm).
|
79
98
|
*
|
80
|
-
* @param
|
81
|
-
* @param
|
99
|
+
* @param codes output codes, size n * M
|
100
|
+
* @param x vectors to encode, size n * d
|
101
|
+
* @param n number of vectors
|
82
102
|
* @param ils_iters number of iterations of iterative local search
|
83
103
|
*/
|
84
104
|
void icm_encode(
|
85
|
-
const float* x,
|
86
105
|
int32_t* codes,
|
106
|
+
const float* x,
|
87
107
|
size_t n,
|
88
108
|
size_t ils_iters,
|
89
109
|
std::mt19937& gen) const;
|
90
110
|
|
91
|
-
void
|
92
|
-
size_t index,
|
93
|
-
const float* x,
|
111
|
+
void icm_encode_impl(
|
94
112
|
int32_t* codes,
|
113
|
+
const float* x,
|
114
|
+
const float* unaries,
|
115
|
+
std::mt19937& gen,
|
95
116
|
size_t n,
|
96
|
-
const float* binaries,
|
97
117
|
size_t ils_iters,
|
98
|
-
|
118
|
+
bool verbose) const;
|
99
119
|
|
100
120
|
void icm_encode_step(
|
121
|
+
int32_t* codes,
|
101
122
|
const float* unaries,
|
102
123
|
const float* binaries,
|
103
|
-
|
104
|
-
size_t
|
124
|
+
size_t n,
|
125
|
+
size_t n_iters) const;
|
126
|
+
|
127
|
+
/** Add some perturbation to codes
|
128
|
+
*
|
129
|
+
* @param codes codes to be perturbed, size n * M
|
130
|
+
* @param n number of vectors
|
131
|
+
*/
|
132
|
+
void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
|
105
133
|
|
106
134
|
/** Add some perturbation to codebooks
|
107
135
|
*
|
@@ -113,12 +141,6 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
113
141
|
const std::vector<float>& stddev,
|
114
142
|
std::mt19937& gen);
|
115
143
|
|
116
|
-
/** Add some perturbation to codes
|
117
|
-
*
|
118
|
-
* @param codes codes to be perturbed, size n * M
|
119
|
-
*/
|
120
|
-
void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
|
121
|
-
|
122
144
|
/** Compute binary terms
|
123
145
|
*
|
124
146
|
* @param binaries binary terms, size M * M * K * K
|
@@ -127,6 +149,7 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
127
149
|
|
128
150
|
/** Compute unary terms
|
129
151
|
*
|
152
|
+
* @param n number of vectors
|
130
153
|
* @param x vectors to encode, size n * d
|
131
154
|
* @param unaries unary terms, size n * M * K
|
132
155
|
*/
|
@@ -134,8 +157,9 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
134
157
|
|
135
158
|
/** Helper function to compute reconstruction error
|
136
159
|
*
|
137
|
-
* @param x vectors to encode, size n * d
|
138
160
|
* @param codes encoded codes, size n * M
|
161
|
+
* @param x vectors to encode, size n * d
|
162
|
+
* @param n number of vectors
|
139
163
|
* @param objs if it is not null, store reconstruction
|
140
164
|
error of each vector into it, size n
|
141
165
|
*/
|
@@ -146,13 +170,50 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
146
170
|
float* objs = nullptr) const;
|
147
171
|
};
|
148
172
|
|
173
|
+
namespace lsq {
|
174
|
+
|
175
|
+
struct IcmEncoder {
|
176
|
+
std::vector<float> binaries;
|
177
|
+
|
178
|
+
bool verbose;
|
179
|
+
|
180
|
+
const LocalSearchQuantizer* lsq;
|
181
|
+
|
182
|
+
explicit IcmEncoder(const LocalSearchQuantizer* lsq);
|
183
|
+
|
184
|
+
virtual ~IcmEncoder() {}
|
185
|
+
|
186
|
+
///< compute binary terms
|
187
|
+
virtual void set_binary_term();
|
188
|
+
|
189
|
+
/** Encode vectors given codebooks
|
190
|
+
*
|
191
|
+
* @param codes output codes, size n * M
|
192
|
+
* @param x vectors to encode, size n * d
|
193
|
+
* @param gen random generator
|
194
|
+
* @param n number of vectors
|
195
|
+
* @param ils_iters number of iterations of iterative local search
|
196
|
+
*/
|
197
|
+
virtual void encode(
|
198
|
+
int32_t* codes,
|
199
|
+
const float* x,
|
200
|
+
std::mt19937& gen,
|
201
|
+
size_t n,
|
202
|
+
size_t ils_iters) const;
|
203
|
+
};
|
204
|
+
|
205
|
+
struct IcmEncoderFactory {
|
206
|
+
virtual IcmEncoder* get(const LocalSearchQuantizer* lsq) {
|
207
|
+
return new IcmEncoder(lsq);
|
208
|
+
}
|
209
|
+
virtual ~IcmEncoderFactory() {}
|
210
|
+
};
|
211
|
+
|
149
212
|
/** A helper struct to count consuming time during training.
|
150
213
|
* It is NOT thread-safe.
|
151
214
|
*/
|
152
215
|
struct LSQTimer {
|
153
|
-
std::unordered_map<std::string, double>
|
154
|
-
std::unordered_map<std::string, double> t0;
|
155
|
-
std::unordered_map<std::string, bool> started;
|
216
|
+
std::unordered_map<std::string, double> t;
|
156
217
|
|
157
218
|
LSQTimer() {
|
158
219
|
reset();
|
@@ -160,13 +221,24 @@ struct LSQTimer {
|
|
160
221
|
|
161
222
|
double get(const std::string& name);
|
162
223
|
|
163
|
-
void
|
164
|
-
|
165
|
-
void end(const std::string& name);
|
224
|
+
void add(const std::string& name, double delta);
|
166
225
|
|
167
226
|
void reset();
|
168
227
|
};
|
169
228
|
|
170
|
-
|
229
|
+
struct LSQTimerScope {
|
230
|
+
double t0;
|
231
|
+
LSQTimer* timer;
|
232
|
+
std::string name;
|
233
|
+
bool finished;
|
234
|
+
|
235
|
+
LSQTimerScope(LSQTimer* timer, std::string name);
|
236
|
+
|
237
|
+
void finish();
|
238
|
+
|
239
|
+
~LSQTimerScope();
|
240
|
+
};
|
241
|
+
|
242
|
+
} // namespace lsq
|
171
243
|
|
172
244
|
} // namespace faiss
|
@@ -161,9 +161,6 @@ void NSG::search(
|
|
161
161
|
search_on_graph<false>(
|
162
162
|
*final_graph, dis, vt, enterpoint, pool_size, retset, tmp);
|
163
163
|
|
164
|
-
std::partial_sort(
|
165
|
-
retset.begin(), retset.begin() + k, retset.begin() + pool_size);
|
166
|
-
|
167
164
|
for (size_t i = 0; i < k; i++) {
|
168
165
|
I[i] = retset[i].id;
|
169
166
|
D[i] = retset[i].distance;
|
@@ -105,7 +105,7 @@ struct NSG {
|
|
105
105
|
|
106
106
|
int ntotal; ///< nb of nodes
|
107
107
|
|
108
|
-
|
108
|
+
// construction-time parameters
|
109
109
|
int R; ///< nb of neighbors per node
|
110
110
|
int L; ///< length of the search path at construction time
|
111
111
|
int C; ///< candidate pool size at construction time
|