faiss 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +32 -0
- data/vendor/faiss/faiss/Clustering.h +14 -0
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
- data/vendor/faiss/faiss/Index2Layer.h +2 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
- data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
- data/vendor/faiss/faiss/IndexFlat.h +9 -15
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
- data/vendor/faiss/faiss/IndexIVF.h +25 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
- data/vendor/faiss/faiss/IndexLSH.h +2 -15
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
- data/vendor/faiss/faiss/IndexPQ.h +2 -17
- data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
- data/vendor/faiss/faiss/IndexRefine.h +10 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
- data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
- data/vendor/faiss/faiss/VectorTransform.h +3 -0
- data/vendor/faiss/faiss/clone_index.cpp +3 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
- data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
- data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
- data/vendor/faiss/faiss/impl/io.cpp +1 -1
- data/vendor/faiss/faiss/impl/io_macros.h +20 -0
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/index_factory.cpp +585 -414
- data/vendor/faiss/faiss/index_factory.h +3 -0
- data/vendor/faiss/faiss/utils/distances.cpp +4 -2
- data/vendor/faiss/faiss/utils/distances.h +36 -3
- data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
- data/vendor/faiss/faiss/utils/utils.h +1 -1
- metadata +12 -5
- data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -15,10 +15,17 @@
|
|
15
15
|
#include <vector>
|
16
16
|
|
17
17
|
#include <faiss/impl/AdditiveQuantizer.h>
|
18
|
+
#include <faiss/impl/platform_macros.h>
|
18
19
|
#include <faiss/utils/utils.h>
|
19
20
|
|
20
21
|
namespace faiss {
|
21
22
|
|
23
|
+
namespace lsq {
|
24
|
+
|
25
|
+
struct IcmEncoderFactory;
|
26
|
+
|
27
|
+
} // namespace lsq
|
28
|
+
|
22
29
|
/** Implementation of LSQ/LSQ++ described in the following two papers:
|
23
30
|
*
|
24
31
|
* Revisiting additive quantization
|
@@ -35,7 +42,6 @@ namespace faiss {
|
|
35
42
|
* The trained codes are stored in `codebooks` which is called
|
36
43
|
* `centroids` in PQ and RQ.
|
37
44
|
*/
|
38
|
-
|
39
45
|
struct LocalSearchQuantizer : AdditiveQuantizer {
|
40
46
|
size_t K; ///< number of codes per codebook
|
41
47
|
|
@@ -53,10 +59,21 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
53
59
|
int random_seed; ///< seed for random generator
|
54
60
|
size_t nperts; ///< number of perturbation in each code
|
55
61
|
|
62
|
+
///< if non-NULL, use this encoder to encode
|
63
|
+
lsq::IcmEncoderFactory* icm_encoder_factory;
|
64
|
+
|
65
|
+
bool update_codebooks_with_double = true;
|
66
|
+
|
56
67
|
LocalSearchQuantizer(
|
57
|
-
size_t d,
|
58
|
-
size_t M,
|
59
|
-
size_t nbits
|
68
|
+
size_t d, /* dimensionality of the input vectors */
|
69
|
+
size_t M, /* number of subquantizers */
|
70
|
+
size_t nbits, /* number of bit per subvector index */
|
71
|
+
Search_type_t search_type =
|
72
|
+
ST_decompress); /* determines the storage type */
|
73
|
+
|
74
|
+
LocalSearchQuantizer();
|
75
|
+
|
76
|
+
~LocalSearchQuantizer() override;
|
60
77
|
|
61
78
|
// Train the local search quantizer
|
62
79
|
void train(size_t n, const float* x) override;
|
@@ -65,6 +82,7 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
65
82
|
*
|
66
83
|
* @param x vectors to encode, size n * d
|
67
84
|
* @param codes output codes, size n * code_size
|
85
|
+
* @param n number of vectors
|
68
86
|
*/
|
69
87
|
void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
|
70
88
|
|
@@ -72,36 +90,46 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
72
90
|
*
|
73
91
|
* @param x training vectors, size n * d
|
74
92
|
* @param codes encoded training vectors, size n * M
|
93
|
+
* @param n number of vectors
|
75
94
|
*/
|
76
95
|
void update_codebooks(const float* x, const int32_t* codes, size_t n);
|
77
96
|
|
78
97
|
/** Encode vectors given codebooks using iterative conditional mode (icm).
|
79
98
|
*
|
80
|
-
* @param
|
81
|
-
* @param
|
99
|
+
* @param codes output codes, size n * M
|
100
|
+
* @param x vectors to encode, size n * d
|
101
|
+
* @param n number of vectors
|
82
102
|
* @param ils_iters number of iterations of iterative local search
|
83
103
|
*/
|
84
104
|
void icm_encode(
|
85
|
-
const float* x,
|
86
105
|
int32_t* codes,
|
106
|
+
const float* x,
|
87
107
|
size_t n,
|
88
108
|
size_t ils_iters,
|
89
109
|
std::mt19937& gen) const;
|
90
110
|
|
91
|
-
void
|
92
|
-
size_t index,
|
93
|
-
const float* x,
|
111
|
+
void icm_encode_impl(
|
94
112
|
int32_t* codes,
|
113
|
+
const float* x,
|
114
|
+
const float* unaries,
|
115
|
+
std::mt19937& gen,
|
95
116
|
size_t n,
|
96
|
-
const float* binaries,
|
97
117
|
size_t ils_iters,
|
98
|
-
|
118
|
+
bool verbose) const;
|
99
119
|
|
100
120
|
void icm_encode_step(
|
121
|
+
int32_t* codes,
|
101
122
|
const float* unaries,
|
102
123
|
const float* binaries,
|
103
|
-
|
104
|
-
size_t
|
124
|
+
size_t n,
|
125
|
+
size_t n_iters) const;
|
126
|
+
|
127
|
+
/** Add some perturbation to codes
|
128
|
+
*
|
129
|
+
* @param codes codes to be perturbed, size n * M
|
130
|
+
* @param n number of vectors
|
131
|
+
*/
|
132
|
+
void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
|
105
133
|
|
106
134
|
/** Add some perturbation to codebooks
|
107
135
|
*
|
@@ -113,12 +141,6 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
113
141
|
const std::vector<float>& stddev,
|
114
142
|
std::mt19937& gen);
|
115
143
|
|
116
|
-
/** Add some perturbation to codes
|
117
|
-
*
|
118
|
-
* @param codes codes to be perturbed, size n * M
|
119
|
-
*/
|
120
|
-
void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
|
121
|
-
|
122
144
|
/** Compute binary terms
|
123
145
|
*
|
124
146
|
* @param binaries binary terms, size M * M * K * K
|
@@ -127,6 +149,7 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
127
149
|
|
128
150
|
/** Compute unary terms
|
129
151
|
*
|
152
|
+
* @param n number of vectors
|
130
153
|
* @param x vectors to encode, size n * d
|
131
154
|
* @param unaries unary terms, size n * M * K
|
132
155
|
*/
|
@@ -134,8 +157,9 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
134
157
|
|
135
158
|
/** Helper function to compute reconstruction error
|
136
159
|
*
|
137
|
-
* @param x vectors to encode, size n * d
|
138
160
|
* @param codes encoded codes, size n * M
|
161
|
+
* @param x vectors to encode, size n * d
|
162
|
+
* @param n number of vectors
|
139
163
|
* @param objs if it is not null, store reconstruction
|
140
164
|
error of each vector into it, size n
|
141
165
|
*/
|
@@ -146,13 +170,50 @@ struct LocalSearchQuantizer : AdditiveQuantizer {
|
|
146
170
|
float* objs = nullptr) const;
|
147
171
|
};
|
148
172
|
|
173
|
+
namespace lsq {
|
174
|
+
|
175
|
+
struct IcmEncoder {
|
176
|
+
std::vector<float> binaries;
|
177
|
+
|
178
|
+
bool verbose;
|
179
|
+
|
180
|
+
const LocalSearchQuantizer* lsq;
|
181
|
+
|
182
|
+
explicit IcmEncoder(const LocalSearchQuantizer* lsq);
|
183
|
+
|
184
|
+
virtual ~IcmEncoder() {}
|
185
|
+
|
186
|
+
///< compute binary terms
|
187
|
+
virtual void set_binary_term();
|
188
|
+
|
189
|
+
/** Encode vectors given codebooks
|
190
|
+
*
|
191
|
+
* @param codes output codes, size n * M
|
192
|
+
* @param x vectors to encode, size n * d
|
193
|
+
* @param gen random generator
|
194
|
+
* @param n number of vectors
|
195
|
+
* @param ils_iters number of iterations of iterative local search
|
196
|
+
*/
|
197
|
+
virtual void encode(
|
198
|
+
int32_t* codes,
|
199
|
+
const float* x,
|
200
|
+
std::mt19937& gen,
|
201
|
+
size_t n,
|
202
|
+
size_t ils_iters) const;
|
203
|
+
};
|
204
|
+
|
205
|
+
struct IcmEncoderFactory {
|
206
|
+
virtual IcmEncoder* get(const LocalSearchQuantizer* lsq) {
|
207
|
+
return new IcmEncoder(lsq);
|
208
|
+
}
|
209
|
+
virtual ~IcmEncoderFactory() {}
|
210
|
+
};
|
211
|
+
|
149
212
|
/** A helper struct to count consuming time during training.
|
150
213
|
* It is NOT thread-safe.
|
151
214
|
*/
|
152
215
|
struct LSQTimer {
|
153
|
-
std::unordered_map<std::string, double>
|
154
|
-
std::unordered_map<std::string, double> t0;
|
155
|
-
std::unordered_map<std::string, bool> started;
|
216
|
+
std::unordered_map<std::string, double> t;
|
156
217
|
|
157
218
|
LSQTimer() {
|
158
219
|
reset();
|
@@ -160,13 +221,24 @@ struct LSQTimer {
|
|
160
221
|
|
161
222
|
double get(const std::string& name);
|
162
223
|
|
163
|
-
void
|
164
|
-
|
165
|
-
void end(const std::string& name);
|
224
|
+
void add(const std::string& name, double delta);
|
166
225
|
|
167
226
|
void reset();
|
168
227
|
};
|
169
228
|
|
170
|
-
|
229
|
+
struct LSQTimerScope {
|
230
|
+
double t0;
|
231
|
+
LSQTimer* timer;
|
232
|
+
std::string name;
|
233
|
+
bool finished;
|
234
|
+
|
235
|
+
LSQTimerScope(LSQTimer* timer, std::string name);
|
236
|
+
|
237
|
+
void finish();
|
238
|
+
|
239
|
+
~LSQTimerScope();
|
240
|
+
};
|
241
|
+
|
242
|
+
} // namespace lsq
|
171
243
|
|
172
244
|
} // namespace faiss
|
@@ -161,9 +161,6 @@ void NSG::search(
|
|
161
161
|
search_on_graph<false>(
|
162
162
|
*final_graph, dis, vt, enterpoint, pool_size, retset, tmp);
|
163
163
|
|
164
|
-
std::partial_sort(
|
165
|
-
retset.begin(), retset.begin() + k, retset.begin() + pool_size);
|
166
|
-
|
167
164
|
for (size_t i = 0; i < k; i++) {
|
168
165
|
I[i] = retset[i].id;
|
169
166
|
D[i] = retset[i].distance;
|
@@ -105,7 +105,7 @@ struct NSG {
|
|
105
105
|
|
106
106
|
int ntotal; ///< nb of nodes
|
107
107
|
|
108
|
-
|
108
|
+
// construction-time parameters
|
109
109
|
int R; ///< nb of neighbors per node
|
110
110
|
int L; ///< length of the search path at construction time
|
111
111
|
int C; ///< candidate pool size at construction time
|