faiss 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +18 -18
- data/README.md +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/Clustering.cpp +318 -53
- data/vendor/faiss/Clustering.h +39 -11
- data/vendor/faiss/DirectMap.cpp +267 -0
- data/vendor/faiss/DirectMap.h +120 -0
- data/vendor/faiss/IVFlib.cpp +24 -4
- data/vendor/faiss/IVFlib.h +4 -0
- data/vendor/faiss/Index.h +5 -24
- data/vendor/faiss/Index2Layer.cpp +0 -1
- data/vendor/faiss/IndexBinary.h +7 -3
- data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
- data/vendor/faiss/IndexBinaryFlat.h +3 -0
- data/vendor/faiss/IndexBinaryHash.cpp +492 -0
- data/vendor/faiss/IndexBinaryHash.h +116 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
- data/vendor/faiss/IndexBinaryIVF.h +14 -4
- data/vendor/faiss/IndexFlat.h +2 -1
- data/vendor/faiss/IndexHNSW.cpp +68 -16
- data/vendor/faiss/IndexHNSW.h +3 -3
- data/vendor/faiss/IndexIVF.cpp +72 -76
- data/vendor/faiss/IndexIVF.h +24 -5
- data/vendor/faiss/IndexIVFFlat.cpp +19 -54
- data/vendor/faiss/IndexIVFFlat.h +1 -11
- data/vendor/faiss/IndexIVFPQ.cpp +49 -26
- data/vendor/faiss/IndexIVFPQ.h +9 -10
- data/vendor/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
- data/vendor/faiss/IndexLSH.h +4 -1
- data/vendor/faiss/IndexPreTransform.cpp +0 -1
- data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
- data/vendor/faiss/InvertedLists.cpp +0 -2
- data/vendor/faiss/MetaIndexes.cpp +0 -1
- data/vendor/faiss/MetricType.h +36 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
- data/vendor/faiss/c_api/Clustering_c.h +11 -5
- data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
- data/vendor/faiss/gpu/GpuDistance.h +93 -0
- data/vendor/faiss/gpu/GpuIndex.h +7 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
- data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
- data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
- data/vendor/faiss/impl/HNSW.cpp +0 -1
- data/vendor/faiss/impl/PolysemousTraining.h +5 -5
- data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
- data/vendor/faiss/impl/ProductQuantizer.h +42 -47
- data/vendor/faiss/impl/index_read.cpp +103 -7
- data/vendor/faiss/impl/index_write.cpp +101 -5
- data/vendor/faiss/impl/io.cpp +111 -1
- data/vendor/faiss/impl/io.h +38 -0
- data/vendor/faiss/index_factory.cpp +0 -1
- data/vendor/faiss/tests/test_merge.cpp +0 -1
- data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
- data/vendor/faiss/utils/distances.cpp +4 -5
- data/vendor/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/utils/hamming.cpp +85 -3
- data/vendor/faiss/utils/hamming.h +20 -0
- data/vendor/faiss/utils/utils.cpp +0 -96
- data/vendor/faiss/utils/utils.h +0 -15
- metadata +11 -3
- data/lib/faiss/ext.bundle +0 -0
data/vendor/faiss/IndexIVF.h
CHANGED
@@ -12,10 +12,12 @@
|
|
12
12
|
|
13
13
|
|
14
14
|
#include <vector>
|
15
|
+
#include <unordered_map>
|
15
16
|
#include <stdint.h>
|
16
17
|
|
17
18
|
#include <faiss/Index.h>
|
18
19
|
#include <faiss/InvertedLists.h>
|
20
|
+
#include <faiss/DirectMap.h>
|
19
21
|
#include <faiss/Clustering.h>
|
20
22
|
#include <faiss/utils/Heap.h>
|
21
23
|
|
@@ -32,7 +34,6 @@ struct Level1Quantizer {
|
|
32
34
|
Index * quantizer; ///< quantizer that maps vectors to inverted lists
|
33
35
|
size_t nlist; ///< number of possible key values
|
34
36
|
|
35
|
-
|
36
37
|
/**
|
37
38
|
* = 0: use the quantizer as index in a kmeans training
|
38
39
|
* = 1: just pass on the training set to the train() of the quantizer
|
@@ -107,14 +108,18 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
107
108
|
/** Parallel mode determines how queries are parallelized with OpenMP
|
108
109
|
*
|
109
110
|
* 0 (default): parallelize over queries
|
110
|
-
* 1: parallelize over
|
111
|
+
* 1: parallelize over inverted lists
|
111
112
|
* 2: parallelize over both
|
113
|
+
*
|
114
|
+
* PARALLEL_MODE_NO_HEAP_INIT: binary or with the previous to
|
115
|
+
* prevent the heap to be initialized and finalized
|
112
116
|
*/
|
113
117
|
int parallel_mode;
|
118
|
+
const int PARALLEL_MODE_NO_HEAP_INIT = 1024;
|
114
119
|
|
115
|
-
|
116
|
-
|
117
|
-
|
120
|
+
/** optional map that maps back ids to invlist entries. This
|
121
|
+
* enables reconstruct() */
|
122
|
+
DirectMap direct_map;
|
118
123
|
|
119
124
|
/** The Inverted file takes a quantizer (an Index) on input,
|
120
125
|
* which implements the function mapping a vector to a list
|
@@ -195,8 +200,19 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
195
200
|
virtual InvertedListScanner *get_InvertedListScanner (
|
196
201
|
bool store_pairs=false) const;
|
197
202
|
|
203
|
+
/** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2 */
|
198
204
|
void reconstruct (idx_t key, float* recons) const override;
|
199
205
|
|
206
|
+
/** Update a subset of vectors.
|
207
|
+
*
|
208
|
+
* The index must have a direct_map
|
209
|
+
*
|
210
|
+
* @param nv nb of vectors to update
|
211
|
+
* @param idx vector indices to update, size nv
|
212
|
+
* @param v vectors of new values, size nv*d
|
213
|
+
*/
|
214
|
+
virtual void update_vectors (int nv, const idx_t *idx, const float *v);
|
215
|
+
|
200
216
|
/** Reconstruct a subset of the indexed vectors.
|
201
217
|
*
|
202
218
|
* Overrides default implementation to bypass reconstruct() which requires
|
@@ -268,6 +284,9 @@ struct IndexIVF: Index, Level1Quantizer {
|
|
268
284
|
*/
|
269
285
|
void make_direct_map (bool new_maintain_direct_map=true);
|
270
286
|
|
287
|
+
void set_direct_map_type (DirectMap::Type type);
|
288
|
+
|
289
|
+
|
271
290
|
/// replace the inverted lists, old one is deallocated if own_invlists
|
272
291
|
void replace_invlists (InvertedLists *il, bool own=false);
|
273
292
|
|
@@ -45,8 +45,7 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids,
|
|
45
45
|
{
|
46
46
|
FAISS_THROW_IF_NOT (is_trained);
|
47
47
|
assert (invlists);
|
48
|
-
|
49
|
-
"cannot have direct map and add with ids");
|
48
|
+
direct_map.check_can_add (xids);
|
50
49
|
const int64_t * idx;
|
51
50
|
ScopeDeleter<int64_t> del;
|
52
51
|
|
@@ -60,19 +59,21 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const int64_t *xids,
|
|
60
59
|
}
|
61
60
|
int64_t n_add = 0;
|
62
61
|
for (size_t i = 0; i < n; i++) {
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
if (list_no < 0)
|
67
|
-
continue;
|
68
|
-
const float *xi = x + i * d;
|
69
|
-
size_t offset = invlists->add_entry (
|
70
|
-
list_no, id, (const uint8_t*) xi);
|
62
|
+
idx_t id = xids ? xids[i] : ntotal + i;
|
63
|
+
idx_t list_no = idx [i];
|
64
|
+
size_t offset;
|
71
65
|
|
72
|
-
if (
|
73
|
-
|
74
|
-
|
66
|
+
if (list_no >= 0) {
|
67
|
+
const float *xi = x + i * d;
|
68
|
+
offset = invlists->add_entry (
|
69
|
+
list_no, id, (const uint8_t*) xi);
|
70
|
+
n_add++;
|
71
|
+
} else {
|
72
|
+
offset = 0;
|
73
|
+
}
|
74
|
+
direct_map.add_single_id (id, list_no, offset);
|
75
75
|
}
|
76
|
+
|
76
77
|
if (verbose) {
|
77
78
|
printf("IndexIVFFlat::add_core: added %ld / %ld vectors\n",
|
78
79
|
n_add, n);
|
@@ -158,7 +159,7 @@ struct IVFFlatScanner: InvertedListScanner {
|
|
158
159
|
fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
|
159
160
|
if (C::cmp (simi[0], dis)) {
|
160
161
|
heap_pop<C> (k, simi, idxi);
|
161
|
-
int64_t id = store_pairs ? (list_no
|
162
|
+
int64_t id = store_pairs ? lo_build (list_no, j) : ids[j];
|
162
163
|
heap_push<C> (k, simi, idxi, dis, id);
|
163
164
|
nup++;
|
164
165
|
}
|
@@ -178,7 +179,7 @@ struct IVFFlatScanner: InvertedListScanner {
|
|
178
179
|
float dis = metric == METRIC_INNER_PRODUCT ?
|
179
180
|
fvec_inner_product (xi, yj, d) : fvec_L2sqr (xi, yj, d);
|
180
181
|
if (C::cmp (radius, dis)) {
|
181
|
-
int64_t id = store_pairs ? (list_no
|
182
|
+
int64_t id = store_pairs ? lo_build (list_no, j) : ids[j];
|
182
183
|
res.add (dis, id);
|
183
184
|
}
|
184
185
|
}
|
@@ -209,41 +210,6 @@ InvertedListScanner* IndexIVFFlat::get_InvertedListScanner
|
|
209
210
|
|
210
211
|
|
211
212
|
|
212
|
-
void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
|
213
|
-
{
|
214
|
-
|
215
|
-
FAISS_THROW_IF_NOT (maintain_direct_map);
|
216
|
-
FAISS_THROW_IF_NOT (is_trained);
|
217
|
-
std::vector<idx_t> assign (n);
|
218
|
-
quantizer->assign (n, x, assign.data());
|
219
|
-
|
220
|
-
for (size_t i = 0; i < n; i++) {
|
221
|
-
idx_t id = new_ids[i];
|
222
|
-
FAISS_THROW_IF_NOT_MSG (0 <= id && id < ntotal,
|
223
|
-
"id to update out of range");
|
224
|
-
{ // remove old one
|
225
|
-
int64_t dm = direct_map[id];
|
226
|
-
int64_t ofs = dm & 0xffffffff;
|
227
|
-
int64_t il = dm >> 32;
|
228
|
-
size_t l = invlists->list_size (il);
|
229
|
-
if (ofs != l - 1) { // move l - 1 to ofs
|
230
|
-
int64_t id2 = invlists->get_single_id (il, l - 1);
|
231
|
-
direct_map[id2] = (il << 32) | ofs;
|
232
|
-
invlists->update_entry (il, ofs, id2,
|
233
|
-
invlists->get_single_code (il, l - 1));
|
234
|
-
}
|
235
|
-
invlists->resize (il, l - 1);
|
236
|
-
}
|
237
|
-
{ // insert new one
|
238
|
-
int64_t il = assign[i];
|
239
|
-
size_t l = invlists->list_size (il);
|
240
|
-
int64_t dm = (il << 32) | l;
|
241
|
-
direct_map[id] = dm;
|
242
|
-
invlists->add_entry (il, id, (const uint8_t*)(x + i * d));
|
243
|
-
}
|
244
|
-
}
|
245
|
-
|
246
|
-
}
|
247
213
|
|
248
214
|
void IndexIVFFlat::reconstruct_from_offset (int64_t list_no, int64_t offset,
|
249
215
|
float* recons) const
|
@@ -295,8 +261,7 @@ void IndexIVFFlatDedup::add_with_ids(
|
|
295
261
|
|
296
262
|
FAISS_THROW_IF_NOT (is_trained);
|
297
263
|
assert (invlists);
|
298
|
-
FAISS_THROW_IF_NOT_MSG (
|
299
|
-
!maintain_direct_map,
|
264
|
+
FAISS_THROW_IF_NOT_MSG (direct_map.no(),
|
300
265
|
"IVFFlatDedup not implemented with direct_map");
|
301
266
|
int64_t * idx = new int64_t [na];
|
302
267
|
ScopeDeleter<int64_t> del (idx);
|
@@ -431,7 +396,7 @@ size_t IndexIVFFlatDedup::remove_ids(const IDSelector& sel)
|
|
431
396
|
|
432
397
|
// mostly copied from IndexIVF.cpp
|
433
398
|
|
434
|
-
FAISS_THROW_IF_NOT_MSG (
|
399
|
+
FAISS_THROW_IF_NOT_MSG (direct_map.no(),
|
435
400
|
"direct map remove not implemented");
|
436
401
|
|
437
402
|
std::vector<int64_t> toremove(nlist);
|
@@ -484,7 +449,7 @@ void IndexIVFFlatDedup::range_search(
|
|
484
449
|
FAISS_THROW_MSG ("not implemented");
|
485
450
|
}
|
486
451
|
|
487
|
-
void IndexIVFFlatDedup::update_vectors (int , idx_t *, const float *)
|
452
|
+
void IndexIVFFlatDedup::update_vectors (int , const idx_t *, const float *)
|
488
453
|
{
|
489
454
|
FAISS_THROW_MSG ("not implemented");
|
490
455
|
}
|
data/vendor/faiss/IndexIVFFlat.h
CHANGED
@@ -44,15 +44,6 @@ struct IndexIVFFlat: IndexIVF {
|
|
44
44
|
InvertedListScanner *get_InvertedListScanner (bool store_pairs)
|
45
45
|
const override;
|
46
46
|
|
47
|
-
/** Update a subset of vectors.
|
48
|
-
*
|
49
|
-
* The index must have a direct_map
|
50
|
-
*
|
51
|
-
* @param nv nb of vectors to update
|
52
|
-
* @param idx vector indices to update, size nv
|
53
|
-
* @param v vectors of new values, size nv*d
|
54
|
-
*/
|
55
|
-
virtual void update_vectors (int nv, idx_t *idx, const float *v);
|
56
47
|
|
57
48
|
void reconstruct_from_offset (int64_t list_no, int64_t offset,
|
58
49
|
float* recons) const override;
|
@@ -99,8 +90,7 @@ struct IndexIVFFlatDedup: IndexIVFFlat {
|
|
99
90
|
RangeSearchResult* result) const override;
|
100
91
|
|
101
92
|
/// not implemented
|
102
|
-
void update_vectors (int nv, idx_t *idx, const float *v) override;
|
103
|
-
|
93
|
+
void update_vectors (int nv, const idx_t *idx, const float *v) override;
|
104
94
|
|
105
95
|
/// not implemented
|
106
96
|
void reconstruct_from_offset (int64_t list_no, int64_t offset,
|
data/vendor/faiss/IndexIVFPQ.cpp
CHANGED
@@ -36,8 +36,8 @@ namespace faiss {
|
|
36
36
|
******************************************/
|
37
37
|
|
38
38
|
IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
|
39
|
-
size_t M, size_t nbits_per_idx):
|
40
|
-
IndexIVF (quantizer, d, nlist, 0,
|
39
|
+
size_t M, size_t nbits_per_idx, MetricType metric):
|
40
|
+
IndexIVF (quantizer, d, nlist, 0, metric),
|
41
41
|
pq (d, M, nbits_per_idx)
|
42
42
|
{
|
43
43
|
FAISS_THROW_IF_NOT (nbits_per_idx <= 8);
|
@@ -278,6 +278,8 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
278
278
|
|
279
279
|
InterruptCallback::check();
|
280
280
|
|
281
|
+
direct_map.check_can_add (xids);
|
282
|
+
|
281
283
|
FAISS_THROW_IF_NOT (is_trained);
|
282
284
|
double t0 = getmillisecs ();
|
283
285
|
const idx_t * idx;
|
@@ -312,13 +314,14 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
312
314
|
size_t n_ignore = 0;
|
313
315
|
for (size_t i = 0; i < n; i++) {
|
314
316
|
idx_t key = idx[i];
|
317
|
+
idx_t id = xids ? xids[i] : ntotal + i;
|
315
318
|
if (key < 0) {
|
319
|
+
direct_map.add_single_id (id, -1, 0);
|
316
320
|
n_ignore ++;
|
317
321
|
if (residuals_2)
|
318
322
|
memset (residuals_2, 0, sizeof(*residuals_2) * d);
|
319
323
|
continue;
|
320
324
|
}
|
321
|
-
idx_t id = xids ? xids[i] : ntotal + i;
|
322
325
|
|
323
326
|
uint8_t *code = xcodes + i * code_size;
|
324
327
|
size_t offset = invlists->add_entry (key, id, code);
|
@@ -331,11 +334,9 @@ void IndexIVFPQ::add_core_o (idx_t n, const float * x, const idx_t *xids,
|
|
331
334
|
res2[j] = xi[j] - res2[j];
|
332
335
|
}
|
333
336
|
|
334
|
-
|
335
|
-
direct_map.push_back (key << 32 | offset);
|
337
|
+
direct_map.add_single_id (id, key, offset);
|
336
338
|
}
|
337
339
|
|
338
|
-
|
339
340
|
double t3 = getmillisecs ();
|
340
341
|
if(verbose) {
|
341
342
|
char comment[100] = {0};
|
@@ -802,7 +803,7 @@ struct KnnSearchResults {
|
|
802
803
|
inline void add (idx_t j, float dis) {
|
803
804
|
if (C::cmp (heap_sim[0], dis)) {
|
804
805
|
heap_pop<C> (k, heap_sim, heap_ids);
|
805
|
-
idx_t id = ids ? ids[j] : (key
|
806
|
+
idx_t id = ids ? ids[j] : lo_build (key, j);
|
806
807
|
heap_push<C> (k, heap_sim, heap_ids, dis, id);
|
807
808
|
nup++;
|
808
809
|
}
|
@@ -821,7 +822,7 @@ struct RangeSearchResults {
|
|
821
822
|
|
822
823
|
inline void add (idx_t j, float dis) {
|
823
824
|
if (C::cmp (radius, dis)) {
|
824
|
-
idx_t id = ids ? ids[j] : (key
|
825
|
+
idx_t id = ids ? ids[j] : lo_build (key, j);
|
825
826
|
rres.add (dis, id);
|
826
827
|
}
|
827
828
|
}
|
@@ -834,7 +835,7 @@ struct RangeSearchResults {
|
|
834
835
|
* The scanning functions call their favorite precompute_*
|
835
836
|
* function to precompute the tables they need.
|
836
837
|
*****************************************************/
|
837
|
-
template <typename IDType, MetricType METRIC_TYPE>
|
838
|
+
template <typename IDType, MetricType METRIC_TYPE, class PQDecoder>
|
838
839
|
struct IVFPQScannerT: QueryTables {
|
839
840
|
|
840
841
|
const uint8_t * list_codes;
|
@@ -844,7 +845,6 @@ struct IVFPQScannerT: QueryTables {
|
|
844
845
|
IVFPQScannerT (const IndexIVFPQ & ivfpq, const IVFSearchParameters *params):
|
845
846
|
QueryTables (ivfpq, params)
|
846
847
|
{
|
847
|
-
FAISS_THROW_IF_NOT (pq.nbits == 8);
|
848
848
|
assert(METRIC_TYPE == metric_type);
|
849
849
|
}
|
850
850
|
|
@@ -872,12 +872,13 @@ struct IVFPQScannerT: QueryTables {
|
|
872
872
|
SearchResultType & res) const
|
873
873
|
{
|
874
874
|
for (size_t j = 0; j < ncode; j++) {
|
875
|
-
|
875
|
+
PQDecoder decoder(codes, pq.nbits);
|
876
|
+
codes += pq.code_size;
|
876
877
|
float dis = dis0;
|
877
878
|
const float *tab = sim_table;
|
878
879
|
|
879
880
|
for (size_t m = 0; m < pq.M; m++) {
|
880
|
-
dis += tab[
|
881
|
+
dis += tab[decoder.decode()];
|
881
882
|
tab += pq.ksub;
|
882
883
|
}
|
883
884
|
|
@@ -893,12 +894,14 @@ struct IVFPQScannerT: QueryTables {
|
|
893
894
|
SearchResultType & res) const
|
894
895
|
{
|
895
896
|
for (size_t j = 0; j < ncode; j++) {
|
897
|
+
PQDecoder decoder(codes, pq.nbits);
|
898
|
+
codes += pq.code_size;
|
896
899
|
|
897
900
|
float dis = dis0;
|
898
901
|
const float *tab = sim_table_2;
|
899
902
|
|
900
903
|
for (size_t m = 0; m < pq.M; m++) {
|
901
|
-
int ci =
|
904
|
+
int ci = decoder.decode();
|
902
905
|
dis += sim_table_ptrs [m][ci] - 2 * tab [ci];
|
903
906
|
tab += pq.ksub;
|
904
907
|
}
|
@@ -963,12 +966,13 @@ struct IVFPQScannerT: QueryTables {
|
|
963
966
|
int hd = hc.hamming (b_code);
|
964
967
|
if (hd < ht) {
|
965
968
|
n_hamming_pass ++;
|
969
|
+
PQDecoder decoder(codes, pq.nbits);
|
966
970
|
|
967
971
|
float dis = dis0;
|
968
972
|
const float *tab = sim_table;
|
969
973
|
|
970
974
|
for (size_t m = 0; m < pq.M; m++) {
|
971
|
-
dis += tab[
|
975
|
+
dis += tab[decoder.decode()];
|
972
976
|
tab += pq.ksub;
|
973
977
|
}
|
974
978
|
|
@@ -1023,16 +1027,18 @@ struct IVFPQScannerT: QueryTables {
|
|
1023
1027
|
* much we precompute (2 = precompute distance tables, 1 = precompute
|
1024
1028
|
* pointers to distances, 0 = compute distances one by one).
|
1025
1029
|
* Currently only 2 is supported */
|
1026
|
-
template<MetricType METRIC_TYPE, class C,
|
1030
|
+
template<MetricType METRIC_TYPE, class C, class PQDecoder>
|
1027
1031
|
struct IVFPQScanner:
|
1028
|
-
IVFPQScannerT<Index::idx_t, METRIC_TYPE>,
|
1032
|
+
IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
1029
1033
|
InvertedListScanner
|
1030
1034
|
{
|
1031
1035
|
bool store_pairs;
|
1036
|
+
int precompute_mode;
|
1032
1037
|
|
1033
|
-
IVFPQScanner(const IndexIVFPQ & ivfpq, bool store_pairs
|
1034
|
-
|
1035
|
-
|
1038
|
+
IVFPQScanner(const IndexIVFPQ & ivfpq, bool store_pairs,
|
1039
|
+
int precompute_mode):
|
1040
|
+
IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>(ivfpq, nullptr),
|
1041
|
+
store_pairs(store_pairs), precompute_mode(precompute_mode)
|
1036
1042
|
{
|
1037
1043
|
}
|
1038
1044
|
|
@@ -1048,9 +1054,10 @@ struct IVFPQScanner:
|
|
1048
1054
|
assert(precompute_mode == 2);
|
1049
1055
|
float dis = this->dis0;
|
1050
1056
|
const float *tab = this->sim_table;
|
1057
|
+
PQDecoder decoder(code, this->pq.nbits);
|
1051
1058
|
|
1052
1059
|
for (size_t m = 0; m < this->pq.M; m++) {
|
1053
|
-
dis += tab[
|
1060
|
+
dis += tab[decoder.decode()];
|
1054
1061
|
tab += this->pq.ksub;
|
1055
1062
|
}
|
1056
1063
|
return dis;
|
@@ -1115,7 +1122,22 @@ struct IVFPQScanner:
|
|
1115
1122
|
}
|
1116
1123
|
};
|
1117
1124
|
|
1125
|
+
template<class PQDecoder>
|
1126
|
+
InvertedListScanner *get_InvertedListScanner1 (const IndexIVFPQ &index,
|
1127
|
+
bool store_pairs)
|
1128
|
+
{
|
1118
1129
|
|
1130
|
+
if (index.metric_type == METRIC_INNER_PRODUCT) {
|
1131
|
+
return new IVFPQScanner
|
1132
|
+
<METRIC_INNER_PRODUCT, CMin<float, idx_t>, PQDecoder>
|
1133
|
+
(index, store_pairs, 2);
|
1134
|
+
} else if (index.metric_type == METRIC_L2) {
|
1135
|
+
return new IVFPQScanner
|
1136
|
+
<METRIC_L2, CMax<float, idx_t>, PQDecoder>
|
1137
|
+
(index, store_pairs, 2);
|
1138
|
+
}
|
1139
|
+
return nullptr;
|
1140
|
+
}
|
1119
1141
|
|
1120
1142
|
|
1121
1143
|
} // anonymous namespace
|
@@ -1123,12 +1145,13 @@ struct IVFPQScanner:
|
|
1123
1145
|
InvertedListScanner *
|
1124
1146
|
IndexIVFPQ::get_InvertedListScanner (bool store_pairs) const
|
1125
1147
|
{
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
} else if (
|
1130
|
-
return
|
1131
|
-
|
1148
|
+
|
1149
|
+
if (pq.nbits == 8) {
|
1150
|
+
return get_InvertedListScanner1<PQDecoder8> (*this, store_pairs);
|
1151
|
+
} else if (pq.nbits == 16) {
|
1152
|
+
return get_InvertedListScanner1<PQDecoder16> (*this, store_pairs);
|
1153
|
+
} else {
|
1154
|
+
return get_InvertedListScanner1<PQDecoderGeneric> (*this, store_pairs);
|
1132
1155
|
}
|
1133
1156
|
return nullptr;
|
1134
1157
|
|
data/vendor/faiss/IndexIVFPQ.h
CHANGED
@@ -42,14 +42,14 @@ struct IndexIVFPQ: IndexIVF {
|
|
42
42
|
int polysemous_ht; ///< Hamming thresh for polysemous filtering
|
43
43
|
|
44
44
|
/** Precompute table that speed up query preprocessing at some
|
45
|
-
* memory cost
|
45
|
+
* memory cost (used only for by_residual with L2 metric)
|
46
46
|
* =-1: force disable
|
47
47
|
* =0: decide heuristically (default: use tables only if they are
|
48
48
|
* < precomputed_tables_max_bytes)
|
49
49
|
* =1: tables that work for all quantizers (size 256 * nlist * M)
|
50
50
|
* =2: specific version for MultiIndexQuantizer (much more compact)
|
51
51
|
*/
|
52
|
-
int use_precomputed_table;
|
52
|
+
int use_precomputed_table;
|
53
53
|
static size_t precomputed_table_max_bytes;
|
54
54
|
|
55
55
|
/// if use_precompute_table
|
@@ -58,7 +58,7 @@ struct IndexIVFPQ: IndexIVF {
|
|
58
58
|
|
59
59
|
IndexIVFPQ (
|
60
60
|
Index * quantizer, size_t d, size_t nlist,
|
61
|
-
size_t M, size_t nbits_per_idx);
|
61
|
+
size_t M, size_t nbits_per_idx, MetricType metric = METRIC_L2);
|
62
62
|
|
63
63
|
void add_with_ids(idx_t n, const float* x, const idx_t* xids = nullptr)
|
64
64
|
override;
|
@@ -93,9 +93,9 @@ struct IndexIVFPQ: IndexIVF {
|
|
93
93
|
* the duplicates are returned in pre-allocated arrays (see the
|
94
94
|
* max sizes).
|
95
95
|
*
|
96
|
-
* @
|
96
|
+
* @param lims limits between groups of duplicates
|
97
97
|
* (max size ntotal / 2 + 1)
|
98
|
-
* @
|
98
|
+
* @param ids ids[lims[i]] : ids[lims[i+1]-1] is a group of
|
99
99
|
* duplicates (max size ntotal)
|
100
100
|
* @return n number of groups found
|
101
101
|
*/
|
@@ -135,15 +135,14 @@ struct IndexIVFPQ: IndexIVF {
|
|
135
135
|
/// statistics are robust to internal threading, but not if
|
136
136
|
/// IndexIVFPQ::search_preassigned is called by multiple threads
|
137
137
|
struct IndexIVFPQStats {
|
138
|
-
size_t nrefine;
|
138
|
+
size_t nrefine; ///< nb of refines (IVFPQR)
|
139
139
|
|
140
140
|
size_t n_hamming_pass;
|
141
|
-
|
141
|
+
///< nb of passed Hamming distance tests (for polysemous)
|
142
142
|
|
143
|
-
// timings measured with the CPU RTC
|
144
|
-
// on all threads
|
143
|
+
// timings measured with the CPU RTC on all threads
|
145
144
|
size_t search_cycles;
|
146
|
-
size_t refine_cycles;
|
145
|
+
size_t refine_cycles; ///< only for IVFPQR
|
147
146
|
|
148
147
|
IndexIVFPQStats () {reset (); }
|
149
148
|
void reset ();
|