faiss 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +18 -18
- data/README.md +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/Clustering.cpp +318 -53
- data/vendor/faiss/Clustering.h +39 -11
- data/vendor/faiss/DirectMap.cpp +267 -0
- data/vendor/faiss/DirectMap.h +120 -0
- data/vendor/faiss/IVFlib.cpp +24 -4
- data/vendor/faiss/IVFlib.h +4 -0
- data/vendor/faiss/Index.h +5 -24
- data/vendor/faiss/Index2Layer.cpp +0 -1
- data/vendor/faiss/IndexBinary.h +7 -3
- data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
- data/vendor/faiss/IndexBinaryFlat.h +3 -0
- data/vendor/faiss/IndexBinaryHash.cpp +492 -0
- data/vendor/faiss/IndexBinaryHash.h +116 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
- data/vendor/faiss/IndexBinaryIVF.h +14 -4
- data/vendor/faiss/IndexFlat.h +2 -1
- data/vendor/faiss/IndexHNSW.cpp +68 -16
- data/vendor/faiss/IndexHNSW.h +3 -3
- data/vendor/faiss/IndexIVF.cpp +72 -76
- data/vendor/faiss/IndexIVF.h +24 -5
- data/vendor/faiss/IndexIVFFlat.cpp +19 -54
- data/vendor/faiss/IndexIVFFlat.h +1 -11
- data/vendor/faiss/IndexIVFPQ.cpp +49 -26
- data/vendor/faiss/IndexIVFPQ.h +9 -10
- data/vendor/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
- data/vendor/faiss/IndexLSH.h +4 -1
- data/vendor/faiss/IndexPreTransform.cpp +0 -1
- data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
- data/vendor/faiss/InvertedLists.cpp +0 -2
- data/vendor/faiss/MetaIndexes.cpp +0 -1
- data/vendor/faiss/MetricType.h +36 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
- data/vendor/faiss/c_api/Clustering_c.h +11 -5
- data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
- data/vendor/faiss/gpu/GpuDistance.h +93 -0
- data/vendor/faiss/gpu/GpuIndex.h +7 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
- data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
- data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
- data/vendor/faiss/impl/HNSW.cpp +0 -1
- data/vendor/faiss/impl/PolysemousTraining.h +5 -5
- data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
- data/vendor/faiss/impl/ProductQuantizer.h +42 -47
- data/vendor/faiss/impl/index_read.cpp +103 -7
- data/vendor/faiss/impl/index_write.cpp +101 -5
- data/vendor/faiss/impl/io.cpp +111 -1
- data/vendor/faiss/impl/io.h +38 -0
- data/vendor/faiss/index_factory.cpp +0 -1
- data/vendor/faiss/tests/test_merge.cpp +0 -1
- data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
- data/vendor/faiss/utils/distances.cpp +4 -5
- data/vendor/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/utils/hamming.cpp +85 -3
- data/vendor/faiss/utils/hamming.h +20 -0
- data/vendor/faiss/utils/utils.cpp +0 -96
- data/vendor/faiss/utils/utils.h +0 -15
- metadata +11 -3
- data/lib/faiss/ext.bundle +0 -0
@@ -0,0 +1,116 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#ifndef FAISS_BINARY_HASH_H
|
11
|
+
#define FAISS_BINARY_HASH_H
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
#include <vector>
|
16
|
+
#include <unordered_map>
|
17
|
+
|
18
|
+
#include <faiss/IndexBinary.h>
|
19
|
+
#include <faiss/IndexBinaryFlat.h>
|
20
|
+
#include <faiss/utils/Heap.h>
|
21
|
+
|
22
|
+
|
23
|
+
namespace faiss {
|
24
|
+
|
25
|
+
struct RangeSearchResult;
|
26
|
+
|
27
|
+
|
28
|
+
/** just uses the b first bits as a hash value */
|
29
|
+
struct IndexBinaryHash : IndexBinary {
|
30
|
+
|
31
|
+
struct InvertedList {
|
32
|
+
std::vector<idx_t> ids;
|
33
|
+
std::vector<uint8_t> vecs;
|
34
|
+
|
35
|
+
void add (idx_t id, size_t code_size, const uint8_t *code);
|
36
|
+
};
|
37
|
+
|
38
|
+
using InvertedListMap = std::unordered_map<idx_t, InvertedList>;
|
39
|
+
InvertedListMap invlists;
|
40
|
+
|
41
|
+
int b, nflip;
|
42
|
+
|
43
|
+
IndexBinaryHash(int d, int b);
|
44
|
+
|
45
|
+
IndexBinaryHash();
|
46
|
+
|
47
|
+
void reset() override;
|
48
|
+
|
49
|
+
void add(idx_t n, const uint8_t *x) override;
|
50
|
+
|
51
|
+
void add_with_ids(idx_t n, const uint8_t *x, const idx_t *xids) override;
|
52
|
+
|
53
|
+
void range_search(idx_t n, const uint8_t *x, int radius,
|
54
|
+
RangeSearchResult *result) const override;
|
55
|
+
|
56
|
+
void search(idx_t n, const uint8_t *x, idx_t k,
|
57
|
+
int32_t *distances, idx_t *labels) const override;
|
58
|
+
|
59
|
+
void display() const;
|
60
|
+
size_t hashtable_size() const;
|
61
|
+
|
62
|
+
};
|
63
|
+
|
64
|
+
struct IndexBinaryHashStats {
|
65
|
+
size_t nq; // nb of queries run
|
66
|
+
size_t n0; // nb of empty lists
|
67
|
+
size_t nlist; // nb of non-empty inverted lists scanned
|
68
|
+
size_t ndis; // nb of distancs computed
|
69
|
+
|
70
|
+
IndexBinaryHashStats () {reset (); }
|
71
|
+
void reset ();
|
72
|
+
};
|
73
|
+
|
74
|
+
extern IndexBinaryHashStats indexBinaryHash_stats;
|
75
|
+
|
76
|
+
|
77
|
+
/** just uses the b first bits as a hash value */
|
78
|
+
struct IndexBinaryMultiHash: IndexBinary {
|
79
|
+
|
80
|
+
// where the vectors are actually stored
|
81
|
+
IndexBinaryFlat *storage;
|
82
|
+
bool own_fields;
|
83
|
+
|
84
|
+
// maps hash values to the ids that hash to them
|
85
|
+
using Map = std::unordered_map<idx_t, std::vector<idx_t> >;
|
86
|
+
|
87
|
+
// the different hashes, size nhash
|
88
|
+
std::vector<Map> maps;
|
89
|
+
|
90
|
+
int nhash; ///< nb of hash maps
|
91
|
+
int b; ///< nb bits per hash map
|
92
|
+
int nflip; ///< nb bit flips to use at search time
|
93
|
+
|
94
|
+
IndexBinaryMultiHash(int d, int nhash, int b);
|
95
|
+
|
96
|
+
IndexBinaryMultiHash();
|
97
|
+
|
98
|
+
~IndexBinaryMultiHash();
|
99
|
+
|
100
|
+
void reset() override;
|
101
|
+
|
102
|
+
void add(idx_t n, const uint8_t *x) override;
|
103
|
+
|
104
|
+
void range_search(idx_t n, const uint8_t *x, int radius,
|
105
|
+
RangeSearchResult *result) const override;
|
106
|
+
|
107
|
+
void search(idx_t n, const uint8_t *x, idx_t k,
|
108
|
+
int32_t *distances, idx_t *labels) const override;
|
109
|
+
|
110
|
+
size_t hashtable_size() const;
|
111
|
+
|
112
|
+
};
|
113
|
+
|
114
|
+
}
|
115
|
+
|
116
|
+
#endif
|
@@ -11,14 +11,17 @@
|
|
11
11
|
#include <faiss/IndexBinaryIVF.h>
|
12
12
|
|
13
13
|
#include <cstdio>
|
14
|
+
#include <omp.h>
|
15
|
+
|
14
16
|
#include <memory>
|
15
17
|
|
18
|
+
|
16
19
|
#include <faiss/utils/hamming.h>
|
17
20
|
#include <faiss/utils/utils.h>
|
18
|
-
|
19
21
|
#include <faiss/impl/AuxIndexStructures.h>
|
20
22
|
#include <faiss/impl/FaissAssert.h>
|
21
23
|
#include <faiss/IndexFlat.h>
|
24
|
+
#include <faiss/IndexLSH.h>
|
22
25
|
|
23
26
|
|
24
27
|
namespace faiss {
|
@@ -29,7 +32,6 @@ IndexBinaryIVF::IndexBinaryIVF(IndexBinary *quantizer, size_t d, size_t nlist)
|
|
29
32
|
own_invlists(true),
|
30
33
|
nprobe(1),
|
31
34
|
max_codes(0),
|
32
|
-
maintain_direct_map(false),
|
33
35
|
quantizer(quantizer),
|
34
36
|
nlist(nlist),
|
35
37
|
own_fields(false),
|
@@ -46,7 +48,6 @@ IndexBinaryIVF::IndexBinaryIVF()
|
|
46
48
|
own_invlists(false),
|
47
49
|
nprobe(1),
|
48
50
|
max_codes(0),
|
49
|
-
maintain_direct_map(false),
|
50
51
|
quantizer(nullptr),
|
51
52
|
nlist(0),
|
52
53
|
own_fields(false),
|
@@ -65,8 +66,7 @@ void IndexBinaryIVF::add_core(idx_t n, const uint8_t *x, const idx_t *xids,
|
|
65
66
|
const idx_t *precomputed_idx) {
|
66
67
|
FAISS_THROW_IF_NOT(is_trained);
|
67
68
|
assert(invlists);
|
68
|
-
|
69
|
-
"cannot have direct map and add with ids");
|
69
|
+
direct_map.check_can_add (xids);
|
70
70
|
|
71
71
|
const idx_t * idx;
|
72
72
|
|
@@ -85,13 +85,15 @@ void IndexBinaryIVF::add_core(idx_t n, const uint8_t *x, const idx_t *xids,
|
|
85
85
|
idx_t id = xids ? xids[i] : ntotal + i;
|
86
86
|
idx_t list_no = idx[i];
|
87
87
|
|
88
|
-
if (list_no < 0)
|
89
|
-
|
90
|
-
|
91
|
-
|
88
|
+
if (list_no < 0) {
|
89
|
+
direct_map.add_single_id (id, -1, 0);
|
90
|
+
} else {
|
91
|
+
const uint8_t *xi = x + i * code_size;
|
92
|
+
size_t offset = invlists->add_entry(list_no, id, xi);
|
93
|
+
|
94
|
+
direct_map.add_single_id (id, list_no, offset);
|
95
|
+
}
|
92
96
|
|
93
|
-
if (maintain_direct_map)
|
94
|
-
direct_map.push_back(list_no << 32 | offset);
|
95
97
|
n_add++;
|
96
98
|
}
|
97
99
|
if (verbose) {
|
@@ -101,29 +103,21 @@ void IndexBinaryIVF::add_core(idx_t n, const uint8_t *x, const idx_t *xids,
|
|
101
103
|
ntotal += n_add;
|
102
104
|
}
|
103
105
|
|
104
|
-
void IndexBinaryIVF::make_direct_map(bool
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
direct_map.resize(ntotal, -1);
|
111
|
-
for (size_t key = 0; key < nlist; key++) {
|
112
|
-
size_t list_size = invlists->list_size(key);
|
113
|
-
const idx_t *idlist = invlists->get_ids(key);
|
114
|
-
|
115
|
-
for (size_t ofs = 0; ofs < list_size; ofs++) {
|
116
|
-
FAISS_THROW_IF_NOT_MSG(0 <= idlist[ofs] && idlist[ofs] < ntotal,
|
117
|
-
"direct map supported only for seuquential ids");
|
118
|
-
direct_map[idlist[ofs]] = key << 32 | ofs;
|
119
|
-
}
|
106
|
+
void IndexBinaryIVF::make_direct_map (bool b)
|
107
|
+
{
|
108
|
+
if (b) {
|
109
|
+
direct_map.set_type (DirectMap::Array, invlists, ntotal);
|
110
|
+
} else {
|
111
|
+
direct_map.set_type (DirectMap::NoMap, invlists, ntotal);
|
120
112
|
}
|
121
|
-
} else {
|
122
|
-
direct_map.clear();
|
123
|
-
}
|
124
|
-
maintain_direct_map = new_maintain_direct_map;
|
125
113
|
}
|
126
114
|
|
115
|
+
void IndexBinaryIVF::set_direct_map_type (DirectMap::Type type)
|
116
|
+
{
|
117
|
+
direct_map.set_type (type, invlists, ntotal);
|
118
|
+
}
|
119
|
+
|
120
|
+
|
127
121
|
void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
|
128
122
|
int32_t *distances, idx_t *labels) const {
|
129
123
|
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
|
@@ -142,11 +136,8 @@ void IndexBinaryIVF::search(idx_t n, const uint8_t *x, idx_t k,
|
|
142
136
|
}
|
143
137
|
|
144
138
|
void IndexBinaryIVF::reconstruct(idx_t key, uint8_t *recons) const {
|
145
|
-
|
146
|
-
|
147
|
-
idx_t list_no = direct_map[key] >> 32;
|
148
|
-
idx_t offset = direct_map[key] & 0xffffffff;
|
149
|
-
reconstruct_from_offset(list_no, offset, recons);
|
139
|
+
idx_t lo = direct_map.get (key);
|
140
|
+
reconstruct_from_offset (lo_listno(lo), lo_offset(lo), recons);
|
150
141
|
}
|
151
142
|
|
152
143
|
void IndexBinaryIVF::reconstruct_n(idx_t i0, idx_t ni, uint8_t *recons) const {
|
@@ -215,39 +206,9 @@ void IndexBinaryIVF::reset() {
|
|
215
206
|
}
|
216
207
|
|
217
208
|
size_t IndexBinaryIVF::remove_ids(const IDSelector& sel) {
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
std::vector<idx_t> toremove(nlist);
|
222
|
-
|
223
|
-
#pragma omp parallel for
|
224
|
-
for (idx_t i = 0; i < nlist; i++) {
|
225
|
-
idx_t l0 = invlists->list_size (i), l = l0, j = 0;
|
226
|
-
const idx_t *idsi = invlists->get_ids(i);
|
227
|
-
while (j < l) {
|
228
|
-
if (sel.is_member(idsi[j])) {
|
229
|
-
l--;
|
230
|
-
invlists->update_entry(
|
231
|
-
i, j,
|
232
|
-
invlists->get_single_id(i, l),
|
233
|
-
invlists->get_single_code(i, l));
|
234
|
-
} else {
|
235
|
-
j++;
|
236
|
-
}
|
237
|
-
}
|
238
|
-
toremove[i] = l0 - l;
|
239
|
-
}
|
240
|
-
// this will not run well in parallel on ondisk because of possible shrinks
|
241
|
-
size_t nremove = 0;
|
242
|
-
for (idx_t i = 0; i < nlist; i++) {
|
243
|
-
if (toremove[i] > 0) {
|
244
|
-
nremove += toremove[i];
|
245
|
-
invlists->resize(
|
246
|
-
i, invlists->list_size(i) - toremove[i]);
|
247
|
-
}
|
248
|
-
}
|
249
|
-
ntotal -= nremove;
|
250
|
-
return nremove;
|
209
|
+
size_t nremove = direct_map.remove_ids (sel, invlists);
|
210
|
+
ntotal -= nremove;
|
211
|
+
return nremove;
|
251
212
|
}
|
252
213
|
|
253
214
|
void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
|
@@ -267,9 +228,6 @@ void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
|
|
267
228
|
Clustering clus(d, nlist, cp);
|
268
229
|
quantizer->reset();
|
269
230
|
|
270
|
-
std::unique_ptr<float[]> x_f(new float[n * d]);
|
271
|
-
binary_to_real(n * d, x, x_f.get());
|
272
|
-
|
273
231
|
IndexFlatL2 index_tmp(d);
|
274
232
|
|
275
233
|
if (clustering_index && verbose) {
|
@@ -277,8 +235,12 @@ void IndexBinaryIVF::train(idx_t n, const uint8_t *x) {
|
|
277
235
|
clustering_index->d);
|
278
236
|
}
|
279
237
|
|
280
|
-
|
238
|
+
// LSH codec that is able to convert the binary vectors to floats.
|
239
|
+
IndexLSH codec(d, d, false, false);
|
240
|
+
|
241
|
+
clus.train_encoded (n, x, &codec, clustering_index ? *clustering_index : index_tmp);
|
281
242
|
|
243
|
+
// convert clusters to binary
|
282
244
|
std::unique_ptr<uint8_t[]> x_b(new uint8_t[clus.k * code_size]);
|
283
245
|
real_to_binary(d * clus.k, clus.centroids.data(), x_b.get());
|
284
246
|
|
@@ -294,8 +256,7 @@ void IndexBinaryIVF::merge_from(IndexBinaryIVF &other, idx_t add_id) {
|
|
294
256
|
FAISS_THROW_IF_NOT(other.d == d);
|
295
257
|
FAISS_THROW_IF_NOT(other.nlist == nlist);
|
296
258
|
FAISS_THROW_IF_NOT(other.code_size == code_size);
|
297
|
-
FAISS_THROW_IF_NOT_MSG((
|
298
|
-
!other.maintain_direct_map),
|
259
|
+
FAISS_THROW_IF_NOT_MSG(direct_map.no() && other.direct_map.no(),
|
299
260
|
"direct map copy not implemented");
|
300
261
|
FAISS_THROW_IF_NOT_MSG(typeid (*this) == typeid (other),
|
301
262
|
"can only merge indexes of the same type");
|
@@ -322,13 +283,15 @@ namespace {
|
|
322
283
|
using idx_t = Index::idx_t;
|
323
284
|
|
324
285
|
|
325
|
-
template<class HammingComputer
|
286
|
+
template<class HammingComputer>
|
326
287
|
struct IVFBinaryScannerL2: BinaryInvertedListScanner {
|
327
288
|
|
328
289
|
HammingComputer hc;
|
329
290
|
size_t code_size;
|
291
|
+
bool store_pairs;
|
330
292
|
|
331
|
-
IVFBinaryScannerL2 (size_t code_size
|
293
|
+
IVFBinaryScannerL2 (size_t code_size, bool store_pairs):
|
294
|
+
code_size (code_size), store_pairs(store_pairs)
|
332
295
|
{}
|
333
296
|
|
334
297
|
void set_query (const uint8_t *query_vector) override {
|
@@ -357,7 +320,7 @@ struct IVFBinaryScannerL2: BinaryInvertedListScanner {
|
|
357
320
|
uint32_t dis = hc.hamming (codes);
|
358
321
|
if (dis < simi[0]) {
|
359
322
|
heap_pop<C> (k, simi, idxi);
|
360
|
-
idx_t id = store_pairs ? (list_no
|
323
|
+
idx_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
361
324
|
heap_push<C> (k, simi, idxi, dis, id);
|
362
325
|
nup++;
|
363
326
|
}
|
@@ -366,6 +329,24 @@ struct IVFBinaryScannerL2: BinaryInvertedListScanner {
|
|
366
329
|
return nup;
|
367
330
|
}
|
368
331
|
|
332
|
+
void scan_codes_range (size_t n,
|
333
|
+
const uint8_t *codes,
|
334
|
+
const idx_t *ids,
|
335
|
+
int radius,
|
336
|
+
RangeQueryResult &result) const
|
337
|
+
{
|
338
|
+
size_t nup = 0;
|
339
|
+
for (size_t j = 0; j < n; j++) {
|
340
|
+
uint32_t dis = hc.hamming (codes);
|
341
|
+
if (dis < radius) {
|
342
|
+
int64_t id = store_pairs ? lo_build (list_no, j) : ids[j];
|
343
|
+
result.add (dis, id);
|
344
|
+
}
|
345
|
+
codes += code_size;
|
346
|
+
}
|
347
|
+
|
348
|
+
}
|
349
|
+
|
369
350
|
|
370
351
|
};
|
371
352
|
|
@@ -373,29 +354,6 @@ struct IVFBinaryScannerL2: BinaryInvertedListScanner {
|
|
373
354
|
template <bool store_pairs>
|
374
355
|
BinaryInvertedListScanner *select_IVFBinaryScannerL2 (size_t code_size) {
|
375
356
|
|
376
|
-
switch (code_size) {
|
377
|
-
#define HANDLE_CS(cs) \
|
378
|
-
case cs: \
|
379
|
-
return new IVFBinaryScannerL2<HammingComputer ## cs, store_pairs> (cs);
|
380
|
-
HANDLE_CS(4);
|
381
|
-
HANDLE_CS(8);
|
382
|
-
HANDLE_CS(16);
|
383
|
-
HANDLE_CS(20);
|
384
|
-
HANDLE_CS(32);
|
385
|
-
HANDLE_CS(64);
|
386
|
-
#undef HANDLE_CS
|
387
|
-
default:
|
388
|
-
if (code_size % 8 == 0) {
|
389
|
-
return new IVFBinaryScannerL2<HammingComputerM8,
|
390
|
-
store_pairs> (code_size);
|
391
|
-
} else if (code_size % 4 == 0) {
|
392
|
-
return new IVFBinaryScannerL2<HammingComputerM4,
|
393
|
-
store_pairs> (code_size);
|
394
|
-
} else {
|
395
|
-
return new IVFBinaryScannerL2<HammingComputerDefault,
|
396
|
-
store_pairs> (code_size);
|
397
|
-
}
|
398
|
-
}
|
399
357
|
}
|
400
358
|
|
401
359
|
|
@@ -466,8 +424,10 @@ void search_knn_hamming_heap(const IndexBinaryIVF& ivf,
|
|
466
424
|
ids = sids->get();
|
467
425
|
}
|
468
426
|
|
469
|
-
nheap += scanner->scan_codes (
|
470
|
-
|
427
|
+
nheap += scanner->scan_codes (
|
428
|
+
list_size, scodes.get(),
|
429
|
+
ids, simi, idxi, k
|
430
|
+
);
|
471
431
|
|
472
432
|
nscan += list_size;
|
473
433
|
if (max_codes && nscan >= max_codes)
|
@@ -627,11 +587,26 @@ void search_knn_hamming_count_1 (
|
|
627
587
|
BinaryInvertedListScanner *IndexBinaryIVF::get_InvertedListScanner
|
628
588
|
(bool store_pairs) const
|
629
589
|
{
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
590
|
+
|
591
|
+
#define HC(name) return new IVFBinaryScannerL2<name> (code_size, store_pairs)
|
592
|
+
switch (code_size) {
|
593
|
+
case 4: HC(HammingComputer4);
|
594
|
+
case 8: HC(HammingComputer8);
|
595
|
+
case 16: HC(HammingComputer16);
|
596
|
+
case 20: HC(HammingComputer20);
|
597
|
+
case 32: HC(HammingComputer32);
|
598
|
+
case 64: HC(HammingComputer64);
|
599
|
+
default:
|
600
|
+
if (code_size % 8 == 0) {
|
601
|
+
HC(HammingComputerM8);
|
602
|
+
} else if (code_size % 4 == 0) {
|
603
|
+
HC(HammingComputerM4);
|
604
|
+
} else {
|
605
|
+
HC(HammingComputerDefault);
|
606
|
+
}
|
634
607
|
}
|
608
|
+
#undef HC
|
609
|
+
|
635
610
|
}
|
636
611
|
|
637
612
|
void IndexBinaryIVF::search_preassigned(idx_t n, const uint8_t *x, idx_t k,
|
@@ -657,6 +632,84 @@ void IndexBinaryIVF::search_preassigned(idx_t n, const uint8_t *x, idx_t k,
|
|
657
632
|
}
|
658
633
|
}
|
659
634
|
|
635
|
+
|
636
|
+
void IndexBinaryIVF::range_search(
|
637
|
+
idx_t n, const uint8_t *x, int radius,
|
638
|
+
RangeSearchResult *res) const
|
639
|
+
{
|
640
|
+
|
641
|
+
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
|
642
|
+
std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
|
643
|
+
|
644
|
+
double t0 = getmillisecs();
|
645
|
+
quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
|
646
|
+
indexIVF_stats.quantization_time += getmillisecs() - t0;
|
647
|
+
|
648
|
+
t0 = getmillisecs();
|
649
|
+
invlists->prefetch_lists(idx.get(), n * nprobe);
|
650
|
+
|
651
|
+
bool store_pairs = false;
|
652
|
+
size_t nlistv = 0, ndis = 0;
|
653
|
+
|
654
|
+
std::vector<RangeSearchPartialResult *> all_pres (omp_get_max_threads());
|
655
|
+
|
656
|
+
#pragma omp parallel reduction(+: nlistv, ndis)
|
657
|
+
{
|
658
|
+
RangeSearchPartialResult pres(res);
|
659
|
+
std::unique_ptr<BinaryInvertedListScanner> scanner
|
660
|
+
(get_InvertedListScanner(store_pairs));
|
661
|
+
FAISS_THROW_IF_NOT (scanner.get ());
|
662
|
+
|
663
|
+
all_pres[omp_get_thread_num()] = &pres;
|
664
|
+
|
665
|
+
auto scan_list_func = [&](size_t i, size_t ik, RangeQueryResult &qres)
|
666
|
+
{
|
667
|
+
|
668
|
+
idx_t key = idx[i * nprobe + ik]; /* select the list */
|
669
|
+
if (key < 0) return;
|
670
|
+
FAISS_THROW_IF_NOT_FMT (
|
671
|
+
key < (idx_t) nlist,
|
672
|
+
"Invalid key=%ld at ik=%ld nlist=%ld\n",
|
673
|
+
key, ik, nlist);
|
674
|
+
const size_t list_size = invlists->list_size(key);
|
675
|
+
|
676
|
+
if (list_size == 0) return;
|
677
|
+
|
678
|
+
InvertedLists::ScopedCodes scodes (invlists, key);
|
679
|
+
InvertedLists::ScopedIds ids (invlists, key);
|
680
|
+
|
681
|
+
scanner->set_list (key, coarse_dis[i * nprobe + ik]);
|
682
|
+
nlistv++;
|
683
|
+
ndis += list_size;
|
684
|
+
scanner->scan_codes_range (list_size, scodes.get(),
|
685
|
+
ids.get(), radius, qres);
|
686
|
+
};
|
687
|
+
|
688
|
+
#pragma omp for
|
689
|
+
for (size_t i = 0; i < n; i++) {
|
690
|
+
scanner->set_query (x + i * code_size);
|
691
|
+
|
692
|
+
RangeQueryResult & qres = pres.new_result (i);
|
693
|
+
|
694
|
+
for (size_t ik = 0; ik < nprobe; ik++) {
|
695
|
+
scan_list_func (i, ik, qres);
|
696
|
+
}
|
697
|
+
|
698
|
+
}
|
699
|
+
|
700
|
+
pres.finalize();
|
701
|
+
|
702
|
+
}
|
703
|
+
indexIVF_stats.nq += n;
|
704
|
+
indexIVF_stats.nlist += nlistv;
|
705
|
+
indexIVF_stats.ndis += ndis;
|
706
|
+
indexIVF_stats.search_time += getmillisecs() - t0;
|
707
|
+
|
708
|
+
}
|
709
|
+
|
710
|
+
|
711
|
+
|
712
|
+
|
660
713
|
IndexBinaryIVF::~IndexBinaryIVF() {
|
661
714
|
if (own_invlists) {
|
662
715
|
delete invlists;
|