faiss 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +32 -0
- data/vendor/faiss/faiss/Clustering.h +14 -0
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
- data/vendor/faiss/faiss/Index2Layer.h +2 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
- data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
- data/vendor/faiss/faiss/IndexFlat.h +9 -15
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
- data/vendor/faiss/faiss/IndexIVF.h +25 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
- data/vendor/faiss/faiss/IndexLSH.h +2 -15
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
- data/vendor/faiss/faiss/IndexPQ.h +2 -17
- data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
- data/vendor/faiss/faiss/IndexRefine.h +10 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
- data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
- data/vendor/faiss/faiss/VectorTransform.h +3 -0
- data/vendor/faiss/faiss/clone_index.cpp +3 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
- data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
- data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
- data/vendor/faiss/faiss/impl/io.cpp +1 -1
- data/vendor/faiss/faiss/impl/io_macros.h +20 -0
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/index_factory.cpp +585 -414
- data/vendor/faiss/faiss/index_factory.h +3 -0
- data/vendor/faiss/faiss/utils/distances.cpp +4 -2
- data/vendor/faiss/faiss/utils/distances.h +36 -3
- data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
- data/vendor/faiss/faiss/utils/utils.h +1 -1
- metadata +12 -5
- data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -0,0 +1,316 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// quiet the noise
|
9
|
+
// XXclang-format off
|
10
|
+
|
11
|
+
#include <faiss/IndexIVFAdditiveQuantizer.h>
|
12
|
+
|
13
|
+
#include <algorithm>
|
14
|
+
#include <cmath>
|
15
|
+
#include <cstring>
|
16
|
+
|
17
|
+
#include <faiss/impl/FaissAssert.h>
|
18
|
+
#include <faiss/impl/ResidualQuantizer.h>
|
19
|
+
#include <faiss/impl/ResultHandler.h>
|
20
|
+
#include <faiss/utils/distances.h>
|
21
|
+
#include <faiss/utils/extra_distances.h>
|
22
|
+
#include <faiss/utils/utils.h>
|
23
|
+
|
24
|
+
namespace faiss {
|
25
|
+
|
26
|
+
/**************************************************************************************
|
27
|
+
* IndexIVFAdditiveQuantizer
|
28
|
+
**************************************************************************************/
|
29
|
+
|
30
|
+
IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(
|
31
|
+
AdditiveQuantizer* aq,
|
32
|
+
Index* quantizer,
|
33
|
+
size_t d,
|
34
|
+
size_t nlist,
|
35
|
+
MetricType metric)
|
36
|
+
: IndexIVF(quantizer, d, nlist, 0, metric), aq(aq) {
|
37
|
+
by_residual = true;
|
38
|
+
}
|
39
|
+
|
40
|
+
IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq)
|
41
|
+
: IndexIVF(), aq(aq) {}
|
42
|
+
|
43
|
+
void IndexIVFAdditiveQuantizer::train_residual(idx_t n, const float* x) {
|
44
|
+
const float* x_in = x;
|
45
|
+
|
46
|
+
size_t max_train_points = 1024 * ((size_t)1 << aq->nbits[0]);
|
47
|
+
|
48
|
+
x = fvecs_maybe_subsample(
|
49
|
+
d, (size_t*)&n, max_train_points, x, verbose, 1234);
|
50
|
+
ScopeDeleter1<float> del_x(x_in == x ? nullptr : x);
|
51
|
+
|
52
|
+
if (by_residual) {
|
53
|
+
std::vector<Index::idx_t> idx(n);
|
54
|
+
quantizer->assign(n, x, idx.data());
|
55
|
+
|
56
|
+
std::vector<float> residuals(n * d);
|
57
|
+
quantizer->compute_residual_n(n, x, residuals.data(), idx.data());
|
58
|
+
|
59
|
+
aq->train(n, residuals.data());
|
60
|
+
} else {
|
61
|
+
aq->train(n, x);
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
void IndexIVFAdditiveQuantizer::encode_vectors(
|
66
|
+
idx_t n,
|
67
|
+
const float* x,
|
68
|
+
const idx_t* list_nos,
|
69
|
+
uint8_t* codes,
|
70
|
+
bool include_listnos) const {
|
71
|
+
FAISS_THROW_IF_NOT(is_trained);
|
72
|
+
|
73
|
+
// first encode then possibly add listnos
|
74
|
+
|
75
|
+
if (by_residual) {
|
76
|
+
// subtract centroids
|
77
|
+
std::vector<float> residuals(n * d);
|
78
|
+
|
79
|
+
#pragma omp parallel if (n > 10000)
|
80
|
+
for (idx_t i = 0; i < n; i++) {
|
81
|
+
quantizer->compute_residual(
|
82
|
+
x + i * d,
|
83
|
+
residuals.data() + i * d,
|
84
|
+
list_nos[i] >= 0 ? list_nos[i] : 0);
|
85
|
+
}
|
86
|
+
aq->compute_codes(residuals.data(), codes, n);
|
87
|
+
} else {
|
88
|
+
aq->compute_codes(x, codes, n);
|
89
|
+
}
|
90
|
+
|
91
|
+
if (include_listnos) {
|
92
|
+
// write back from the end, where there is enough space
|
93
|
+
size_t coarse_size = coarse_code_size();
|
94
|
+
for (idx_t i = n - 1; i >= 0; i--) {
|
95
|
+
uint8_t* code = codes + i * (code_size + coarse_size);
|
96
|
+
memmove(code + coarse_size, codes + i * code_size, code_size);
|
97
|
+
encode_listno(list_nos[i], code);
|
98
|
+
}
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
IndexIVFAdditiveQuantizer::~IndexIVFAdditiveQuantizer() {}
|
103
|
+
|
104
|
+
/*********************************************
|
105
|
+
* AQInvertedListScanner
|
106
|
+
*********************************************/
|
107
|
+
|
108
|
+
namespace {
|
109
|
+
|
110
|
+
using Search_type_t = AdditiveQuantizer::Search_type_t;
|
111
|
+
|
112
|
+
struct AQInvertedListScanner : InvertedListScanner {
|
113
|
+
const IndexIVFAdditiveQuantizer& ia;
|
114
|
+
const AdditiveQuantizer& aq;
|
115
|
+
std::vector<float> tmp;
|
116
|
+
|
117
|
+
AQInvertedListScanner(const IndexIVFAdditiveQuantizer& ia, bool store_pairs)
|
118
|
+
: ia(ia), aq(*ia.aq) {
|
119
|
+
this->store_pairs = store_pairs;
|
120
|
+
this->code_size = ia.code_size;
|
121
|
+
keep_max = ia.metric_type == METRIC_INNER_PRODUCT;
|
122
|
+
tmp.resize(ia.d);
|
123
|
+
}
|
124
|
+
|
125
|
+
const float* q0;
|
126
|
+
|
127
|
+
/// from now on we handle this query.
|
128
|
+
void set_query(const float* query_vector) override {
|
129
|
+
q0 = query_vector;
|
130
|
+
}
|
131
|
+
|
132
|
+
const float* q;
|
133
|
+
/// following codes come from this inverted list
|
134
|
+
void set_list(idx_t list_no, float coarse_dis) override {
|
135
|
+
if (ia.metric_type == METRIC_L2 && ia.by_residual) {
|
136
|
+
ia.quantizer->compute_residual(q0, tmp.data(), list_no);
|
137
|
+
q = tmp.data();
|
138
|
+
} else {
|
139
|
+
q = q0;
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
~AQInvertedListScanner() {}
|
144
|
+
};
|
145
|
+
|
146
|
+
template <bool is_IP>
|
147
|
+
struct AQInvertedListScannerDecompress : AQInvertedListScanner {
|
148
|
+
AQInvertedListScannerDecompress(
|
149
|
+
const IndexIVFAdditiveQuantizer& ia,
|
150
|
+
bool store_pairs)
|
151
|
+
: AQInvertedListScanner(ia, store_pairs) {}
|
152
|
+
|
153
|
+
float coarse_dis = 0;
|
154
|
+
|
155
|
+
/// following codes come from this inverted list
|
156
|
+
void set_list(idx_t list_no, float coarse_dis) override {
|
157
|
+
AQInvertedListScanner::set_list(list_no, coarse_dis);
|
158
|
+
if (ia.by_residual) {
|
159
|
+
this->coarse_dis = coarse_dis;
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
163
|
+
/// compute a single query-to-code distance
|
164
|
+
float distance_to_code(const uint8_t* code) const final {
|
165
|
+
std::vector<float> b(aq.d);
|
166
|
+
aq.decode(code, b.data(), 1);
|
167
|
+
FAISS_ASSERT(q);
|
168
|
+
FAISS_ASSERT(b.data());
|
169
|
+
|
170
|
+
return is_IP ? coarse_dis + fvec_inner_product(q, b.data(), aq.d)
|
171
|
+
: fvec_L2sqr(q, b.data(), aq.d);
|
172
|
+
}
|
173
|
+
|
174
|
+
~AQInvertedListScannerDecompress() override {}
|
175
|
+
};
|
176
|
+
|
177
|
+
template <bool is_IP, Search_type_t search_type>
|
178
|
+
struct AQInvertedListScannerLUT : AQInvertedListScanner {
|
179
|
+
std::vector<float> LUT, tmp;
|
180
|
+
float distance_bias;
|
181
|
+
|
182
|
+
AQInvertedListScannerLUT(
|
183
|
+
const IndexIVFAdditiveQuantizer& ia,
|
184
|
+
bool store_pairs)
|
185
|
+
: AQInvertedListScanner(ia, store_pairs) {
|
186
|
+
LUT.resize(aq.total_codebook_size);
|
187
|
+
tmp.resize(ia.d);
|
188
|
+
distance_bias = 0;
|
189
|
+
}
|
190
|
+
|
191
|
+
/// from now on we handle this query.
|
192
|
+
void set_query(const float* query_vector) override {
|
193
|
+
AQInvertedListScanner::set_query(query_vector);
|
194
|
+
if (!is_IP && !ia.by_residual) {
|
195
|
+
distance_bias = fvec_norm_L2sqr(query_vector, ia.d);
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
/// following codes come from this inverted list
|
200
|
+
void set_list(idx_t list_no, float coarse_dis) override {
|
201
|
+
AQInvertedListScanner::set_list(list_no, coarse_dis);
|
202
|
+
// TODO find a way to provide the nprobes together to do a matmul
|
203
|
+
// + precompute tables
|
204
|
+
aq.compute_LUT(1, q, LUT.data());
|
205
|
+
|
206
|
+
if (ia.by_residual) {
|
207
|
+
distance_bias = coarse_dis;
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
/// compute a single query-to-code distance
|
212
|
+
float distance_to_code(const uint8_t* code) const final {
|
213
|
+
return distance_bias +
|
214
|
+
aq.compute_1_distance_LUT<is_IP, search_type>(code, LUT.data());
|
215
|
+
}
|
216
|
+
|
217
|
+
~AQInvertedListScannerLUT() override {}
|
218
|
+
};
|
219
|
+
|
220
|
+
} // anonymous namespace
|
221
|
+
|
222
|
+
InvertedListScanner* IndexIVFAdditiveQuantizer::get_InvertedListScanner(
|
223
|
+
bool store_pairs) const {
|
224
|
+
if (metric_type == METRIC_INNER_PRODUCT) {
|
225
|
+
if (aq->search_type == AdditiveQuantizer::ST_decompress) {
|
226
|
+
return new AQInvertedListScannerDecompress<true>(
|
227
|
+
*this, store_pairs);
|
228
|
+
} else {
|
229
|
+
return new AQInvertedListScannerLUT<
|
230
|
+
true,
|
231
|
+
AdditiveQuantizer::ST_LUT_nonorm>(*this, store_pairs);
|
232
|
+
}
|
233
|
+
} else {
|
234
|
+
switch (aq->search_type) {
|
235
|
+
case AdditiveQuantizer::ST_decompress:
|
236
|
+
return new AQInvertedListScannerDecompress<false>(
|
237
|
+
*this, store_pairs);
|
238
|
+
#define A(st) \
|
239
|
+
case AdditiveQuantizer::st: \
|
240
|
+
return new AQInvertedListScannerLUT<false, AdditiveQuantizer::st>( \
|
241
|
+
*this, store_pairs);
|
242
|
+
A(ST_LUT_nonorm)
|
243
|
+
// A(ST_norm_from_LUT)
|
244
|
+
A(ST_norm_float)
|
245
|
+
A(ST_norm_qint8)
|
246
|
+
A(ST_norm_qint4)
|
247
|
+
A(ST_norm_cqint8)
|
248
|
+
A(ST_norm_cqint4)
|
249
|
+
#undef A
|
250
|
+
default:
|
251
|
+
FAISS_THROW_FMT(
|
252
|
+
"search type %d not supported", aq->search_type);
|
253
|
+
}
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
/**************************************************************************************
|
258
|
+
* IndexIVFResidualQuantizer
|
259
|
+
**************************************************************************************/
|
260
|
+
|
261
|
+
IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
|
262
|
+
Index* quantizer,
|
263
|
+
size_t d,
|
264
|
+
size_t nlist,
|
265
|
+
const std::vector<size_t>& nbits,
|
266
|
+
MetricType metric,
|
267
|
+
Search_type_t search_type)
|
268
|
+
: IndexIVFAdditiveQuantizer(&rq, quantizer, d, nlist, metric),
|
269
|
+
rq(d, nbits, search_type) {
|
270
|
+
code_size = invlists->code_size = rq.code_size;
|
271
|
+
}
|
272
|
+
|
273
|
+
IndexIVFResidualQuantizer::IndexIVFResidualQuantizer()
|
274
|
+
: IndexIVFAdditiveQuantizer(&rq) {}
|
275
|
+
|
276
|
+
IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
|
277
|
+
Index* quantizer,
|
278
|
+
size_t d,
|
279
|
+
size_t nlist,
|
280
|
+
size_t M, /* number of subquantizers */
|
281
|
+
size_t nbits, /* number of bit per subvector index */
|
282
|
+
MetricType metric,
|
283
|
+
Search_type_t search_type)
|
284
|
+
: IndexIVFResidualQuantizer(
|
285
|
+
quantizer,
|
286
|
+
d,
|
287
|
+
nlist,
|
288
|
+
std::vector<size_t>(M, nbits),
|
289
|
+
metric,
|
290
|
+
search_type) {}
|
291
|
+
|
292
|
+
IndexIVFResidualQuantizer::~IndexIVFResidualQuantizer() {}
|
293
|
+
|
294
|
+
/**************************************************************************************
|
295
|
+
* IndexIVFLocalSearchQuantizer
|
296
|
+
**************************************************************************************/
|
297
|
+
|
298
|
+
IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer(
|
299
|
+
Index* quantizer,
|
300
|
+
size_t d,
|
301
|
+
size_t nlist,
|
302
|
+
size_t M, /* number of subquantizers */
|
303
|
+
size_t nbits, /* number of bit per subvector index */
|
304
|
+
MetricType metric,
|
305
|
+
Search_type_t search_type)
|
306
|
+
: IndexIVFAdditiveQuantizer(&lsq, quantizer, d, nlist, metric),
|
307
|
+
lsq(d, M, nbits, search_type) {
|
308
|
+
code_size = invlists->code_size = lsq.code_size;
|
309
|
+
}
|
310
|
+
|
311
|
+
IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer()
|
312
|
+
: IndexIVFAdditiveQuantizer(&lsq) {}
|
313
|
+
|
314
|
+
IndexIVFLocalSearchQuantizer::~IndexIVFLocalSearchQuantizer() {}
|
315
|
+
|
316
|
+
} // namespace faiss
|
@@ -0,0 +1,121 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef FAISS_INDEX_IVF_ADDITIVE_QUANTIZER_H
|
9
|
+
#define FAISS_INDEX_IVF_ADDITIVE_QUANTIZER_H
|
10
|
+
|
11
|
+
#include <faiss/impl/AdditiveQuantizer.h>
|
12
|
+
|
13
|
+
#include <cstdint>
|
14
|
+
#include <vector>
|
15
|
+
|
16
|
+
#include <faiss/IndexIVF.h>
|
17
|
+
#include <faiss/impl/LocalSearchQuantizer.h>
|
18
|
+
#include <faiss/impl/ResidualQuantizer.h>
|
19
|
+
#include <faiss/impl/platform_macros.h>
|
20
|
+
|
21
|
+
namespace faiss {
|
22
|
+
|
23
|
+
/// Abstract class for IVF additive quantizers.
|
24
|
+
/// The search functions are in common.
|
25
|
+
struct IndexIVFAdditiveQuantizer : IndexIVF {
|
26
|
+
// the quantizer
|
27
|
+
AdditiveQuantizer* aq;
|
28
|
+
bool by_residual = true;
|
29
|
+
int use_precomputed_table = 0; // for future use
|
30
|
+
|
31
|
+
using Search_type_t = AdditiveQuantizer::Search_type_t;
|
32
|
+
|
33
|
+
IndexIVFAdditiveQuantizer(
|
34
|
+
AdditiveQuantizer* aq,
|
35
|
+
Index* quantizer,
|
36
|
+
size_t d,
|
37
|
+
size_t nlist,
|
38
|
+
MetricType metric = METRIC_L2);
|
39
|
+
|
40
|
+
explicit IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq);
|
41
|
+
|
42
|
+
void train_residual(idx_t n, const float* x) override;
|
43
|
+
|
44
|
+
void encode_vectors(
|
45
|
+
idx_t n,
|
46
|
+
const float* x,
|
47
|
+
const idx_t* list_nos,
|
48
|
+
uint8_t* codes,
|
49
|
+
bool include_listnos = false) const override;
|
50
|
+
|
51
|
+
InvertedListScanner* get_InvertedListScanner(
|
52
|
+
bool store_pairs) const override;
|
53
|
+
|
54
|
+
~IndexIVFAdditiveQuantizer() override;
|
55
|
+
};
|
56
|
+
|
57
|
+
/** IndexIVF based on a residual quantizer. Stored vectors are
|
58
|
+
* approximated by residual quantization codes.
|
59
|
+
*/
|
60
|
+
struct IndexIVFResidualQuantizer : IndexIVFAdditiveQuantizer {
|
61
|
+
/// The residual quantizer used to encode the vectors
|
62
|
+
ResidualQuantizer rq;
|
63
|
+
|
64
|
+
/** Constructor.
|
65
|
+
*
|
66
|
+
* @param d dimensionality of the input vectors
|
67
|
+
* @param M number of subquantizers
|
68
|
+
* @param nbits number of bit per subvector index
|
69
|
+
*/
|
70
|
+
IndexIVFResidualQuantizer(
|
71
|
+
Index* quantizer,
|
72
|
+
size_t d,
|
73
|
+
size_t nlist,
|
74
|
+
const std::vector<size_t>& nbits,
|
75
|
+
MetricType metric = METRIC_L2,
|
76
|
+
Search_type_t search_type = AdditiveQuantizer::ST_decompress);
|
77
|
+
|
78
|
+
IndexIVFResidualQuantizer(
|
79
|
+
Index* quantizer,
|
80
|
+
size_t d,
|
81
|
+
size_t nlist,
|
82
|
+
size_t M, /* number of subquantizers */
|
83
|
+
size_t nbits, /* number of bit per subvector index */
|
84
|
+
MetricType metric = METRIC_L2,
|
85
|
+
Search_type_t search_type = AdditiveQuantizer::ST_decompress);
|
86
|
+
|
87
|
+
IndexIVFResidualQuantizer();
|
88
|
+
|
89
|
+
virtual ~IndexIVFResidualQuantizer();
|
90
|
+
};
|
91
|
+
|
92
|
+
/** IndexIVF based on a residual quantizer. Stored vectors are
|
93
|
+
* approximated by residual quantization codes.
|
94
|
+
*/
|
95
|
+
struct IndexIVFLocalSearchQuantizer : IndexIVFAdditiveQuantizer {
|
96
|
+
/// The LSQ quantizer used to encode the vectors
|
97
|
+
LocalSearchQuantizer lsq;
|
98
|
+
|
99
|
+
/** Constructor.
|
100
|
+
*
|
101
|
+
* @param d dimensionality of the input vectors
|
102
|
+
* @param M number of subquantizers
|
103
|
+
* @param nbits number of bit per subvector index
|
104
|
+
*/
|
105
|
+
IndexIVFLocalSearchQuantizer(
|
106
|
+
Index* quantizer,
|
107
|
+
size_t d,
|
108
|
+
size_t nlist,
|
109
|
+
size_t M, /* number of subquantizers */
|
110
|
+
size_t nbits, /* number of bit per subvector index */
|
111
|
+
MetricType metric = METRIC_L2,
|
112
|
+
Search_type_t search_type = AdditiveQuantizer::ST_decompress);
|
113
|
+
|
114
|
+
IndexIVFLocalSearchQuantizer();
|
115
|
+
|
116
|
+
virtual ~IndexIVFLocalSearchQuantizer();
|
117
|
+
};
|
118
|
+
|
119
|
+
} // namespace faiss
|
120
|
+
|
121
|
+
#endif
|
@@ -121,17 +121,16 @@ namespace {
|
|
121
121
|
template <MetricType metric, class C>
|
122
122
|
struct IVFFlatScanner : InvertedListScanner {
|
123
123
|
size_t d;
|
124
|
-
bool store_pairs;
|
125
124
|
|
126
|
-
IVFFlatScanner(size_t d, bool store_pairs)
|
127
|
-
|
125
|
+
IVFFlatScanner(size_t d, bool store_pairs) : d(d) {
|
126
|
+
this->store_pairs = store_pairs;
|
127
|
+
}
|
128
128
|
|
129
129
|
const float* xi;
|
130
130
|
void set_query(const float* query) override {
|
131
131
|
this->xi = query;
|
132
132
|
}
|
133
133
|
|
134
|
-
idx_t list_no;
|
135
134
|
void set_list(idx_t list_no, float /* coarse_dis */) override {
|
136
135
|
this->list_no = list_no;
|
137
136
|
}
|
@@ -223,18 +222,17 @@ IndexIVFFlatDedup::IndexIVFFlatDedup(
|
|
223
222
|
|
224
223
|
void IndexIVFFlatDedup::train(idx_t n, const float* x) {
|
225
224
|
std::unordered_map<uint64_t, idx_t> map;
|
226
|
-
float
|
227
|
-
ScopeDeleter<float> del(x2);
|
225
|
+
std::unique_ptr<float[]> x2(new float[n * d]);
|
228
226
|
|
229
227
|
int64_t n2 = 0;
|
230
228
|
for (int64_t i = 0; i < n; i++) {
|
231
229
|
uint64_t hash = hash_bytes((uint8_t*)(x + i * d), code_size);
|
232
230
|
if (map.count(hash) &&
|
233
|
-
!memcmp(x2 + map[hash] * d, x + i * d, code_size)) {
|
231
|
+
!memcmp(x2.get() + map[hash] * d, x + i * d, code_size)) {
|
234
232
|
// is duplicate, skip
|
235
233
|
} else {
|
236
234
|
map[hash] = n2;
|
237
|
-
memcpy(x2 + n2 * d, x + i * d, code_size);
|
235
|
+
memcpy(x2.get() + n2 * d, x + i * d, code_size);
|
238
236
|
n2++;
|
239
237
|
}
|
240
238
|
}
|
@@ -245,7 +243,7 @@ void IndexIVFFlatDedup::train(idx_t n, const float* x) {
|
|
245
243
|
n2,
|
246
244
|
n);
|
247
245
|
}
|
248
|
-
IndexIVFFlat::train(n2, x2);
|
246
|
+
IndexIVFFlat::train(n2, x2.get());
|
249
247
|
}
|
250
248
|
|
251
249
|
void IndexIVFFlatDedup::add_with_ids(
|
@@ -256,9 +254,8 @@ void IndexIVFFlatDedup::add_with_ids(
|
|
256
254
|
assert(invlists);
|
257
255
|
FAISS_THROW_IF_NOT_MSG(
|
258
256
|
direct_map.no(), "IVFFlatDedup not implemented with direct_map");
|
259
|
-
int64_t
|
260
|
-
|
261
|
-
quantizer->assign(na, x, idx);
|
257
|
+
std::unique_ptr<int64_t[]> idx(new int64_t[na]);
|
258
|
+
quantizer->assign(na, x, idx.get());
|
262
259
|
|
263
260
|
int64_t n_add = 0, n_dup = 0;
|
264
261
|
|
@@ -584,7 +584,7 @@ struct QueryTables {
|
|
584
584
|
// field specific to query
|
585
585
|
const float* qi;
|
586
586
|
|
587
|
-
// query-specific
|
587
|
+
// query-specific initialization
|
588
588
|
void init_query(const float* qi) {
|
589
589
|
this->qi = qi;
|
590
590
|
if (metric_type == METRIC_INNER_PRODUCT)
|
@@ -1018,21 +1018,22 @@ struct IVFPQScannerT : QueryTables {
|
|
1018
1018
|
template <MetricType METRIC_TYPE, class C, class PQDecoder>
|
1019
1019
|
struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
1020
1020
|
InvertedListScanner {
|
1021
|
-
bool store_pairs;
|
1022
1021
|
int precompute_mode;
|
1023
1022
|
|
1024
1023
|
IVFPQScanner(const IndexIVFPQ& ivfpq, bool store_pairs, int precompute_mode)
|
1025
1024
|
: IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>(
|
1026
1025
|
ivfpq,
|
1027
1026
|
nullptr),
|
1028
|
-
|
1029
|
-
|
1027
|
+
precompute_mode(precompute_mode) {
|
1028
|
+
this->store_pairs = store_pairs;
|
1029
|
+
}
|
1030
1030
|
|
1031
1031
|
void set_query(const float* query) override {
|
1032
1032
|
this->init_query(query);
|
1033
1033
|
}
|
1034
1034
|
|
1035
1035
|
void set_list(idx_t list_no, float coarse_dis) override {
|
1036
|
+
this->list_no = list_no;
|
1036
1037
|
this->init_list(list_no, coarse_dis, precompute_mode);
|
1037
1038
|
}
|
1038
1039
|
|
@@ -150,7 +150,7 @@ struct IndexIVFPQ : IndexIVF {
|
|
150
150
|
* < precomputed_tables_max_bytes), set use_precomputed_table on
|
151
151
|
* output =1: tables that work for all quantizers (size 256 * nlist * M) =2:
|
152
152
|
* specific version for MultiIndexQuantizer (much more compact)
|
153
|
-
* @param precomputed_table precomputed table to
|
153
|
+
* @param precomputed_table precomputed table to initialize
|
154
154
|
*/
|
155
155
|
|
156
156
|
void initialize_IVFPQ_precomputed_table(
|