faiss 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +18 -18
- data/README.md +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/Clustering.cpp +318 -53
- data/vendor/faiss/Clustering.h +39 -11
- data/vendor/faiss/DirectMap.cpp +267 -0
- data/vendor/faiss/DirectMap.h +120 -0
- data/vendor/faiss/IVFlib.cpp +24 -4
- data/vendor/faiss/IVFlib.h +4 -0
- data/vendor/faiss/Index.h +5 -24
- data/vendor/faiss/Index2Layer.cpp +0 -1
- data/vendor/faiss/IndexBinary.h +7 -3
- data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
- data/vendor/faiss/IndexBinaryFlat.h +3 -0
- data/vendor/faiss/IndexBinaryHash.cpp +492 -0
- data/vendor/faiss/IndexBinaryHash.h +116 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
- data/vendor/faiss/IndexBinaryIVF.h +14 -4
- data/vendor/faiss/IndexFlat.h +2 -1
- data/vendor/faiss/IndexHNSW.cpp +68 -16
- data/vendor/faiss/IndexHNSW.h +3 -3
- data/vendor/faiss/IndexIVF.cpp +72 -76
- data/vendor/faiss/IndexIVF.h +24 -5
- data/vendor/faiss/IndexIVFFlat.cpp +19 -54
- data/vendor/faiss/IndexIVFFlat.h +1 -11
- data/vendor/faiss/IndexIVFPQ.cpp +49 -26
- data/vendor/faiss/IndexIVFPQ.h +9 -10
- data/vendor/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
- data/vendor/faiss/IndexLSH.h +4 -1
- data/vendor/faiss/IndexPreTransform.cpp +0 -1
- data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
- data/vendor/faiss/InvertedLists.cpp +0 -2
- data/vendor/faiss/MetaIndexes.cpp +0 -1
- data/vendor/faiss/MetricType.h +36 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
- data/vendor/faiss/c_api/Clustering_c.h +11 -5
- data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
- data/vendor/faiss/gpu/GpuDistance.h +93 -0
- data/vendor/faiss/gpu/GpuIndex.h +7 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
- data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
- data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
- data/vendor/faiss/impl/HNSW.cpp +0 -1
- data/vendor/faiss/impl/PolysemousTraining.h +5 -5
- data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
- data/vendor/faiss/impl/ProductQuantizer.h +42 -47
- data/vendor/faiss/impl/index_read.cpp +103 -7
- data/vendor/faiss/impl/index_write.cpp +101 -5
- data/vendor/faiss/impl/io.cpp +111 -1
- data/vendor/faiss/impl/io.h +38 -0
- data/vendor/faiss/index_factory.cpp +0 -1
- data/vendor/faiss/tests/test_merge.cpp +0 -1
- data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
- data/vendor/faiss/utils/distances.cpp +4 -5
- data/vendor/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/utils/hamming.cpp +85 -3
- data/vendor/faiss/utils/hamming.h +20 -0
- data/vendor/faiss/utils/utils.cpp +0 -96
- data/vendor/faiss/utils/utils.h +0 -15
- metadata +11 -3
- data/lib/faiss/ext.bundle +0 -0
data/vendor/faiss/IndexIVF.h
CHANGED
@@ -12,10 +12,12 @@
|
|
12
12
|
|
13
13
|
|
14
14
|
#include <vector>
|
15
|
+
#include <unordered_map>
|
15
16
|
#include <stdint.h>
|
16
17
|
|
17
18
|
#include <faiss/Index.h>
|
18
19
|
#include <faiss/InvertedLists.h>
|
20
|
+
#include <faiss/DirectMap.h>
|
19
21
|
#include <faiss/Clustering.h>
|
20
22
|
#include <faiss/utils/Heap.h>
|
21
23
|
|
@@ -32,7 +34,6 @@ struct Level1Quantizer {
|
|
32
34
|
Index * quantizer; ///< quantizer that maps vectors to inverted lists
|
33
35
|
size_t nlist; ///< number of possible key values
|
34
36
|
|
35
|
-
|
36
37
|
/**
|
37
38
|
* = 0: use the quantizer as index in a kmeans training
|
38
39
|
* = 1: just pass on the training set to the train() of the quantizer
|
@@ -107,14 +108,18 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
107
108
|
/** Parallel mode determines how queries are parallelized with OpenMP
|
108
109
|
*
|
109
110
|
* 0 (default): parallelize over queries
|
110
|
-
* 1: parallelize over
|
111
|
+
* 1: parallelize over inverted lists
|
111
112
|
* 2: parallelize over both
|
113
|
+
*
|
114
|
+
* PARALLEL_MODE_NO_HEAP_INIT: binary or with the previous to
|
115
|
+
* prevent the heap to be initialized and finalized
|
112
116
|
*/
|
113
117
|
int parallel_mode;
|
118
|
+
const int PARALLEL_MODE_NO_HEAP_INIT = 1024;
|
114
119
|
|
115
|
-
|
116
|
-
|
117
|
-
|
120
|
+
/** optional map that maps back ids to invlist entries. This
|
121
|
+
* enables reconstruct() */
|
122
|
+
DirectMap direct_map;
|
118
123
|
|
119
124
|
/** The Inverted file takes a quantizer (an Index) on input,
|
120
125
|
* which implements the function mapping a vector to a list
|
@@ -195,8 +200,19 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
195
200
|
virtual InvertedListScanner *get_InvertedListScanner (
|
196
201
|
bool store_pairs=false) const;
|
197
202
|
|
203
|
+
/** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2 */
|
198
204
|
void reconstruct (idx_t key, float* recons) const override;
|
199
205
|
|
206
|
+
/** Update a subset of vectors.
|
207
|
+
*
|
208
|
+
* The index must have a direct_map
|
209
|
+
*
|
210
|
+
* @param nv nb of vectors to update
|
211
|
+
* @param idx vector indices to update, size nv
|
212
|
+
* @param v vectors of new values, size nv*d
|
213
|
+
*/
|
214
|
+
virtual void update_vectors (int nv, const idx_t *idx, const float *v);
|
215
|
+
|
200
216
|
/** Reconstruct a subset of the indexed vectors.
|
201
217
|
*
|
202
218
|
* Overrides default implementation to bypass reconstruct() which requires
|
@@ -268,6 +284,9 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
268
284
|
*/
|
269
285
|
void make_direct_map (bool new_maintain_direct_map=true);
|
270
286
|
|
287
|
+
void set_direct_map_type (DirectMap::Type type);
|
288
|
+
|
289
|
+
|
271
290
|
/// replace the inverted lists, old one is deallocated if own_invlists
|
272
291
|
void replace_invlists (InvertedLists *il, bool own=false);
|
273
292
|
|
@@ -45,8 +45,7 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids,
|
|
45
45
|
{
|
46
46
|
FAISS_THROW_IF_NOT (is_trained);
|
47
47
|
assert (invlists);
|
48
|
-
|
49
|
-
"cannot have direct map and add with ids");
|
48
|
+
direct_map.check_can_add (xids);
|
50
49
|
const int64_t * idx;
|
51
50
|
ScopeDeleter<int64_t> del;
|
52
51
|
|
@@ -60,19 +59,21 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids,
|
|
60
59
|
}
|
61
60
|
int64_t n_add = 0;
|
62
61
|
for (size_t i = 0; i < n; i++) {
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
if (list_no < 0)
|
67
|
-
continue;
|
68
|
-
const float *xi = x + i * d;
|
69
|
-
size_t offset = invlists->add_entry (
|
70
|
-
list_no, id, (const uint8_t*) xi);
|
62
|
+
idx_t id = xids ? xids[i] : ntotal + i;
|
63
|
+
idx_t list_no = idx [i];
|
64
|
+
size_t offset;
|
71
65
|
|
72
|
-
if (
|
73
|
-
|
74
|
-
|
66
|
+
if (list_no >= 0) {
|
67
|
+
const float *xi = x + i * d;
|
68
|
+
offset = invlists->add_entry (
|
69
|
+
list_no, id, (const uint8_t*) xi);
|
70
|
+
n_add++;
|
71
|
+
} else {
|
72
|
+
offset = 0;
|
73
|
+
}
|
74
|
+
direct_map.add_single_id (id, list_no, offset);
|
75
75
|
}
|
76
|
+
|
76
77
|
if (verbose) {
|
77
78
|
printf("IndexIVFFlat::add_core: added %ld / %ld vectors\n",
|
78
79
|
n_add, n);
|
@@ -158,7 +159,7 @@ struct IVFFlatScanner: InvertedListScanner {
|
|
158
159
|
fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
|
159
160
|
if (C::cmp (simi[0], dis)) {
|
160
161
|
heap_pop<C> (k, simi, idxi);
|
161
|
-
int64_t id = store_pairs ? (list_no
|
162
|
+
int64_t id = store_pairs ? lo_build (list_no, j) : ids[j];
|
162
163
|
heap_push<C> (k, simi, idxi, dis, id);
|
163
164
|
nup++;
|
164
165
|
}
|
@@ -178,7 +179,7 @@ struct IVFFlatScanner: InvertedListScanner {
|
|
178
179
|
float dis = metric == METRIC_INNER_PRODUCT ?
|
179
180
|
fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
|
180
181
|
if (C::cmp (radius, dis)) {
|
181
|
-
int64_t id = store_pairs ? (list_no
|
182
|
+
int64_t id = store_pairs ? lo_build (list_no, j) : ids[j];
|
182
183
|
res.add (dis, id);
|
183
184
|
}
|
184
185
|
}
|
@@ -209,41 +210,6 @@ InvertedListScanner* IndexIVFFlat::get_InvertedListScanner
|
|
209
210
|
|
210
211
|
|
211
212
|
|
212
|
-
void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
|
213
|
-
{
|
214
|
-
|
215
|
-
FAISS_THROW_IF_NOT (maintain_direct_map);
|
216
|
-
FAISS_THROW_IF_NOT (is_trained);
|
217
|
-
std::vector<idx_t> assign (n);
|
218
|
-
quantizer->assign (n, x, assign.data());
|
219
|
-
|
220
|
-
for (size_t i = 0; i < n; i++) {
|
221
|
-
idx_t id = new_ids[i];
|
222
|
-
FAISS_THROW_IF_NOT_MSG (0 <= id && id < ntotal,
|
223
|
-
"id to update out of range");
|
224
|
-
{ // remove old one
|
225
|
-
int64_t dm = direct_map[id];
|
226
|
-
int64_t ofs = dm & 0xffffffff;
|
227
|
-
int64_t il = dm >> 32;
|
228
|
-
size_t l = invlists->list_size (il);
|
229
|
-
if (ofs != l - 1) { // move l - 1 to ofs
|
230
|
-
int64_t id2 = invlists->get_single_id (il, l - 1);
|
231
|
-
direct_map[id2] = (il << 32) | ofs;
|
232
|
-
invlists->update_entry (il, ofs, id2,
|
233
|
-
invlists->get_single_code (il, l - 1));
|
234
|
-
}
|
235
|
-
invlists->resize (il, l - 1);
|
236
|
-
}
|
237
|
-
{ // insert new one
|
238
|
-
int64_t il = assign[i];
|
239
|
-
size_t l = invlists->list_size (il);
|
240
|
-
int64_t dm = (il << 32) | l;
|
241
|
-
direct_map[id] = dm;
|
242
|
-
invlists->add_entry (il, id, (const uint8_t*)(x + i * d));
|
243
|
-
}
|
244
|
-
}
|
245
|
-
|
246
|
-
}
|
247
213
|
|
248
214
|
void IndexIVFFlat::reconstruct_from_offset (int64_t list_no, int64_t offset,
|
249
215
|
float* recons) const
|
@@ -295,8 +261,7 @@ void IndexIVFFlatDedup::add_with_ids(
|
|
295
261
|
|
296
262
|
FAISS_THROW_IF_NOT (is_trained);
|
297
263
|
assert (invlists);
|
298
|
-
FAISS_THROW_IF_NOT_MSG (
|
299
|
-
!maintain_direct_map,
|
264
|
+
FAISS_THROW_IF_NOT_MSG (direct_map.no(),
|
300
265
|
"IVFFlatDedup not implemented with direct_map");
|
301
266
|
int64_t * idx = new int64_t [na];
|
302
267
|
ScopeDeleter<int64_t> del (idx);
|
@@ -431,7 +396,7 @@ size_t IndexIVFFlatDedup::remove_ids(const IDSelector& sel)
|
|
431
396
|
|
432
397
|
// mostly copied from IndexIVF.cpp
|
433
398
|
|
434
|
-
FAISS_THROW_IF_NOT_MSG (
|
399
|
+
FAISS_THROW_IF_NOT_MSG (direct_map.no(),
|
435
400
|
"direct map remove not implemented");
|
436
401
|
|
437
402
|
std::vector<int64_t> toremove(nlist);
|
@@ -484,7 +449,7 @@ void IndexIVFFlatDedup::range_search(
|
|
484
449
|
FAISS_THROW_MSG ("not implemented");
|
485
450
|
}
|
486
451
|
|
487
|
-
void IndexIVFFlatDedup::update_vectors (int , idx_t *, const float *)
|
452
|
+
void IndexIVFFlatDedup::update_vectors (int , const idx_t *, const float *)
|
488
453
|
{
|
489
454
|
FAISS_THROW_MSG ("not implemented");
|
490
455
|
}
|
data/vendor/faiss/IndexIVFFlat.h
CHANGED
@@ -44,15 +44,6 @@ struct IndexIVFFlat: IndexIVF {
|
|
44
44
|
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
|
45
45
|
const override;
|
46
46
|
|
47
|
-
/** Update a subset of vectors.
|
48
|
-
*
|
49
|
-
* The index must have a direct_map
|
50
|
-
*
|
51
|
-
* @param nv nb of vectors to update
|
52
|
-
* @param idx vector indices to update, size nv
|
53
|
-
* @param v vectors of new values, size nv*d
|
54
|
-
*/
|
55
|
-
virtual void update_vectors (int nv, idx_t *idx, const float *v);
|
56
47
|
|
57
48
|
void reconstruct_from_offset (int64_t list_no, int64_t offset,
|
58
49
|
float* recons) const override;
|
@@ -99,8 +90,7 @@ struct IndexIVFFlatDedup: IndexIVFFlat {
|
|
99
90
|
RangeSearchResult* result) const override;
|
100
91
|
|
101
92
|
/// not implemented
|
102
|
-
void update_vectors (int nv, idx_t *idx, const float *v) override;
|
103
|
-
|
93
|
+
void update_vectors (int nv, const idx_t *idx, const float *v) override;
|
104
94
|
|
105
95
|
/// not implemented
|
106
96
|
void reconstruct_from_offset (int64_t list_no, int64_t offset,
|
data/vendor/faiss/IndexIVFPQ.cpp
CHANGED
@@ -36,8 +36,8 @@ namespace faiss {
|
|
36
36
|
******************************************/
|
37
37
|
|
38
38
|
IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
|
39
|
-
size_t M, size_t nbits_per_idx):
|
40
|
-
IndexIVF (quantizer, d, nlist, 0,
|
39
|
+
size_t M, size_t nbits_per_idx, MetricType metric):
|
40
|
+
IndexIVF (quantizer, d, nlist, 0, metric),
|
41
41
|
pq (d, M, nbits_per_idx)
|
42
42
|
{
|
43
43
|
FAISS_THROW_IF_NOT (nbits_per_idx <= 8);
|
@@ -278,6 +278,8 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
278
278
|
|
279
279
|
InterruptCallback::check();
|
280
280
|
|
281
|
+
direct_map.check_can_add (xids);
|
282
|
+
|
281
283
|
FAISS_THROW_IF_NOT (is_trained);
|
282
284
|
double t0 = getmillisecs ();
|
283
285
|
const idx_t * idx;
|
@@ -312,13 +314,14 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
312
314
|
size_t n_ignore = 0;
|
313
315
|
for (size_t i = 0; i < n; i++) {
|
314
316
|
idx_t key = idx[i];
|
317
|
+
idx_t id = xids ? xids[i] : ntotal + i;
|
315
318
|
if (key < 0) {
|
319
|
+
direct_map.add_single_id (id, -1, 0);
|
316
320
|
n_ignore ++;
|
317
321
|
if (residuals_2)
|
318
322
|
memset (residuals_2, 0, sizeof(*residuals_2) * d);
|
319
323
|
continue;
|
320
324
|
}
|
321
|
-
idx_t id = xids ? xids[i] : ntotal + i;
|
322
325
|
|
323
326
|
uint8_t *code = xcodes + i * code_size;
|
324
327
|
size_t offset = invlists->add_entry (key, id, code);
|
@@ -331,11 +334,9 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
331
334
|
res2[j] = xi[j] - res2[j];
|
332
335
|
}
|
333
336
|
|
334
|
-
|
335
|
-
direct_map.push_back (key << 32 | offset);
|
337
|
+
direct_map.add_single_id (id, key, offset);
|
336
338
|
}
|
337
339
|
|
338
|
-
|
339
340
|
double t3 = getmillisecs ();
|
340
341
|
if(verbose) {
|
341
342
|
char comment[100] = {0};
|
@@ -802,7 +803,7 @@ struct KnnSearchResults {
|
|
802
803
|
inline void add (idx_t j, float dis) {
|
803
804
|
if (C::cmp (heap_sim[0], dis)) {
|
804
805
|
heap_pop<C> (k, heap_sim, heap_ids);
|
805
|
-
idx_t id = ids ? ids[j] : (key
|
806
|
+
idx_t id = ids ? ids[j] : lo_build (key, j);
|
806
807
|
heap_push<C> (k, heap_sim, heap_ids, dis, id);
|
807
808
|
nup++;
|
808
809
|
}
|
@@ -821,7 +822,7 @@ struct RangeSearchResults {
|
|
821
822
|
|
822
823
|
inline void add (idx_t j, float dis) {
|
823
824
|
if (C::cmp (radius, dis)) {
|
824
|
-
idx_t id = ids ? ids[j] : (key
|
825
|
+
idx_t id = ids ? ids[j] : lo_build (key, j);
|
825
826
|
rres.add (dis, id);
|
826
827
|
}
|
827
828
|
}
|
@@ -834,7 +835,7 @@ struct RangeSearchResults {
|
|
834
835
|
* The scanning functions call their favorite precompute_*
|
835
836
|
* function to precompute the tables they need.
|
836
837
|
*****************************************************/
|
837
|
-
template <typename IDType, MetricType METRIC_TYPE>
|
838
|
+
template <typename IDType, MetricType METRIC_TYPE, class PQDecoder>
|
838
839
|
struct IVFPQScannerT: QueryTables {
|
839
840
|
|
840
841
|
const uint8_t * list_codes;
|
@@ -844,7 +845,6 @@ struct IVFPQScannerT: QueryTables {
|
|
844
845
|
IVFPQScannerT (const IndexIVFPQ & ivfpq, const IVFSearchParameters *params):
|
845
846
|
QueryTables (ivfpq, params)
|
846
847
|
{
|
847
|
-
FAISS_THROW_IF_NOT (pq.nbits == 8);
|
848
848
|
assert(METRIC_TYPE == metric_type);
|
849
849
|
}
|
850
850
|
|
@@ -872,12 +872,13 @@ struct IVFPQScannerT: QueryTables {
|
|
872
872
|
SearchResultType & res) const
|
873
873
|
{
|
874
874
|
for (size_t j = 0; j < ncode; j++) {
|
875
|
-
|
875
|
+
PQDecoder decoder(codes, pq.nbits);
|
876
|
+
codes += pq.code_size;
|
876
877
|
float dis = dis0;
|
877
878
|
const float *tab = sim_table;
|
878
879
|
|
879
880
|
for (size_t m = 0; m < pq.M; m++) {
|
880
|
-
dis += tab[
|
881
|
+
dis += tab[decoder.decode()];
|
881
882
|
tab += pq.ksub;
|
882
883
|
}
|
883
884
|
|
@@ -893,12 +894,14 @@ struct IVFPQScannerT: QueryTables {
|
|
893
894
|
SearchResultType & res) const
|
894
895
|
{
|
895
896
|
for (size_t j = 0; j < ncode; j++) {
|
897
|
+
PQDecoder decoder(codes, pq.nbits);
|
898
|
+
codes += pq.code_size;
|
896
899
|
|
897
900
|
float dis = dis0;
|
898
901
|
const float *tab = sim_table_2;
|
899
902
|
|
900
903
|
for (size_t m = 0; m < pq.M; m++) {
|
901
|
-
int ci =
|
904
|
+
int ci = decoder.decode();
|
902
905
|
dis += sim_table_ptrs [m][ci] - 2 * tab [ci];
|
903
906
|
tab += pq.ksub;
|
904
907
|
}
|
@@ -963,12 +966,13 @@ struct IVFPQScannerT: QueryTables {
|
|
963
966
|
int hd = hc.hamming (b_code);
|
964
967
|
if (hd < ht) {
|
965
968
|
n_hamming_pass ++;
|
969
|
+
PQDecoder decoder(codes, pq.nbits);
|
966
970
|
|
967
971
|
float dis = dis0;
|
968
972
|
const float *tab = sim_table;
|
969
973
|
|
970
974
|
for (size_t m = 0; m < pq.M; m++) {
|
971
|
-
dis += tab[
|
975
|
+
dis += tab[decoder.decode()];
|
972
976
|
tab += pq.ksub;
|
973
977
|
}
|
974
978
|
|
@@ -1023,16 +1027,18 @@ struct IVFPQScannerT: QueryTables {
|
|
1023
1027
|
* much we precompute (2 = precompute distance tables, 1 = precompute
|
1024
1028
|
* pointers to distances, 0 = compute distances one by one).
|
1025
1029
|
* Currently only 2 is supported */
|
1026
|
-
template<MetricType METRIC_TYPE, class C,
|
1030
|
+
template<MetricType METRIC_TYPE, class C, class PQDecoder>
|
1027
1031
|
struct IVFPQScanner:
|
1028
|
-
IVFPQScannerT<Index::idx_t, METRIC_TYPE>,
|
1032
|
+
IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
1029
1033
|
InvertedListScanner
|
1030
1034
|
{
|
1031
1035
|
bool store_pairs;
|
1036
|
+
int precompute_mode;
|
1032
1037
|
|
1033
|
-
IVFPQScanner(const IndexIVFPQ & ivfpq, bool store_pairs
|
1034
|
-
|
1035
|
-
|
1038
|
+
IVFPQScanner(const IndexIVFPQ & ivfpq, bool store_pairs,
|
1039
|
+
int precompute_mode):
|
1040
|
+
IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>(ivfpq, nullptr),
|
1041
|
+
store_pairs(store_pairs), precompute_mode(precompute_mode)
|
1036
1042
|
{
|
1037
1043
|
}
|
1038
1044
|
|
@@ -1048,9 +1054,10 @@ struct IVFPQScanner:
|
|
1048
1054
|
assert(precompute_mode == 2);
|
1049
1055
|
float dis = this->dis0;
|
1050
1056
|
const float *tab = this->sim_table;
|
1057
|
+
PQDecoder decoder(code, this->pq.nbits);
|
1051
1058
|
|
1052
1059
|
for (size_t m = 0; m < this->pq.M; m++) {
|
1053
|
-
dis += tab[
|
1060
|
+
dis += tab[decoder.decode()];
|
1054
1061
|
tab += this->pq.ksub;
|
1055
1062
|
}
|
1056
1063
|
return dis;
|
@@ -1115,7 +1122,22 @@ struct IVFPQScanner:
|
|
1115
1122
|
}
|
1116
1123
|
};
|
1117
1124
|
|
1125
|
+
template<class PQDecoder>
|
1126
|
+
InvertedListScanner *get_InvertedListScanner1 (const IndexIVFPQ &index,
|
1127
|
+
bool store_pairs)
|
1128
|
+
{
|
1118
1129
|
|
1130
|
+
if (index.metric_type == METRIC_INNER_PRODUCT) {
|
1131
|
+
return new IVFPQScanner
|
1132
|
+
<METRIC_INNER_PRODUCT, CMin<float, idx_t>, PQDecoder>
|
1133
|
+
(index, store_pairs, 2);
|
1134
|
+
} else if (index.metric_type == METRIC_L2) {
|
1135
|
+
return new IVFPQScanner
|
1136
|
+
<METRIC_L2, CMax<float, idx_t>, PQDecoder>
|
1137
|
+
(index, store_pairs, 2);
|
1138
|
+
}
|
1139
|
+
return nullptr;
|
1140
|
+
}
|
1119
1141
|
|
1120
1142
|
|
1121
1143
|
} // anonymous namespace
|
@@ -1123,12 +1145,13 @@ struct IVFPQScanner:
|
|
1123
1145
|
InvertedListScanner *
|
1124
1146
|
IndexIVFPQ::get_InvertedListScanner (bool store_pairs) const
|
1125
1147
|
{
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
} else if (
|
1130
|
-
return
|
1131
|
-
|
1148
|
+
|
1149
|
+
if (pq.nbits == 8) {
|
1150
|
+
return get_InvertedListScanner1<PQDecoder8> (*this, store_pairs);
|
1151
|
+
} else if (pq.nbits == 16) {
|
1152
|
+
return get_InvertedListScanner1<PQDecoder16> (*this, store_pairs);
|
1153
|
+
} else {
|
1154
|
+
return get_InvertedListScanner1<PQDecoderGeneric> (*this, store_pairs);
|
1132
1155
|
}
|
1133
1156
|
return nullptr;
|
1134
1157
|
|
data/vendor/faiss/IndexIVFPQ.h
CHANGED
@@ -42,14 +42,14 @@ struct IndexIVFPQ: IndexIVF {
|
|
42
42
|
int polysemous_ht; ///< Hamming thresh for polysemous filtering
|
43
43
|
|
44
44
|
/** Precompute table that speed up query preprocessing at some
|
45
|
-
* memory cost
|
45
|
+
* memory cost (used only for by_residual with L2 metric)
|
46
46
|
* =-1: force disable
|
47
47
|
* =0: decide heuristically (default: use tables only if they are
|
48
48
|
* < precomputed_tables_max_bytes)
|
49
49
|
* =1: tables that work for all quantizers (size 256 * nlist * M)
|
50
50
|
* =2: specific version for MultiIndexQuantizer (much more compact)
|
51
51
|
*/
|
52
|
-
int use_precomputed_table;
|
52
|
+
int use_precomputed_table;
|
53
53
|
static size_t precomputed_table_max_bytes;
|
54
54
|
|
55
55
|
/// if use_precompute_table
|
@@ -58,7 +58,7 @@ struct IndexIVFPQ: IndexIVF {
|
|
58
58
|
|
59
59
|
IndexIVFPQ (
|
60
60
|
Index * quantizer, size_t d, size_t nlist,
|
61
|
-
size_t M, size_t nbits_per_idx);
|
61
|
+
size_t M, size_t nbits_per_idx, MetricType metric = METRIC_L2);
|
62
62
|
|
63
63
|
void add_with_ids(idx_t n, const float* x, const idx_t* xids = nullptr)
|
64
64
|
override;
|
@@ -93,9 +93,9 @@ struct IndexIVFPQ: IndexIVF {
|
|
93
93
|
* the duplicates are returned in pre-allocated arrays (see the
|
94
94
|
* max sizes).
|
95
95
|
*
|
96
|
-
* @
|
96
|
+
* @param lims limits between groups of duplicates
|
97
97
|
* (max size ntotal / 2 + 1)
|
98
|
-
* @
|
98
|
+
* @param ids ids[lims[i]] : ids[lims[i+1]-1] is a group of
|
99
99
|
* duplicates (max size ntotal)
|
100
100
|
* @return n number of groups found
|
101
101
|
*/
|
@@ -135,15 +135,14 @@ struct IndexIVFPQ: IndexIVF {
|
|
135
135
|
/// statistics are robust to internal threading, but not if
|
136
136
|
/// IndexIVFPQ::search_preassigned is called by multiple threads
|
137
137
|
struct IndexIVFPQStats {
|
138
|
-
size_t nrefine;
|
138
|
+
size_t nrefine; ///< nb of refines (IVFPQR)
|
139
139
|
|
140
140
|
size_t n_hamming_pass;
|
141
|
-
|
141
|
+
///< nb of passed Hamming distance tests (for polysemous)
|
142
142
|
|
143
|
-
// timings measured with the CPU RTC
|
144
|
-
// on all threads
|
143
|
+
// timings measured with the CPU RTC on all threads
|
145
144
|
size_t search_cycles;
|
146
|
-
size_t refine_cycles;
|
145
|
+
size_t refine_cycles; ///< only for IVFPQR
|
147
146
|
|
148
147
|
IndexIVFPQStats () {reset (); }
|
149
148
|
void reset ();
|