faiss 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +32 -0
- data/vendor/faiss/faiss/Clustering.h +14 -0
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
- data/vendor/faiss/faiss/Index2Layer.h +2 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
- data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
- data/vendor/faiss/faiss/IndexFlat.h +9 -15
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
- data/vendor/faiss/faiss/IndexIVF.h +25 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
- data/vendor/faiss/faiss/IndexLSH.h +2 -15
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
- data/vendor/faiss/faiss/IndexPQ.h +2 -17
- data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
- data/vendor/faiss/faiss/IndexRefine.h +10 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
- data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
- data/vendor/faiss/faiss/VectorTransform.h +3 -0
- data/vendor/faiss/faiss/clone_index.cpp +3 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
- data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
- data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
- data/vendor/faiss/faiss/impl/io.cpp +1 -1
- data/vendor/faiss/faiss/impl/io_macros.h +20 -0
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/index_factory.cpp +585 -414
- data/vendor/faiss/faiss/index_factory.h +3 -0
- data/vendor/faiss/faiss/utils/distances.cpp +4 -2
- data/vendor/faiss/faiss/utils/distances.h +36 -3
- data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
- data/vendor/faiss/faiss/utils/utils.h +1 -1
- metadata +12 -5
- data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -19,17 +19,8 @@
|
|
19
19
|
|
20
20
|
namespace faiss {
|
21
21
|
|
22
|
-
IndexFlat::IndexFlat(idx_t d, MetricType metric)
|
23
|
-
|
24
|
-
void IndexFlat::add(idx_t n, const float* x) {
|
25
|
-
xb.insert(xb.end(), x, x + n * d);
|
26
|
-
ntotal += n;
|
27
|
-
}
|
28
|
-
|
29
|
-
void IndexFlat::reset() {
|
30
|
-
xb.clear();
|
31
|
-
ntotal = 0;
|
32
|
-
}
|
22
|
+
IndexFlat::IndexFlat(idx_t d, MetricType metric)
|
23
|
+
: IndexFlatCodes(sizeof(float) * d, d, metric) {}
|
33
24
|
|
34
25
|
void IndexFlat::search(
|
35
26
|
idx_t n,
|
@@ -43,14 +34,14 @@ void IndexFlat::search(
|
|
43
34
|
|
44
35
|
if (metric_type == METRIC_INNER_PRODUCT) {
|
45
36
|
float_minheap_array_t res = {size_t(n), size_t(k), labels, distances};
|
46
|
-
knn_inner_product(x,
|
37
|
+
knn_inner_product(x, get_xb(), d, n, ntotal, &res);
|
47
38
|
} else if (metric_type == METRIC_L2) {
|
48
39
|
float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
|
49
|
-
knn_L2sqr(x,
|
40
|
+
knn_L2sqr(x, get_xb(), d, n, ntotal, &res);
|
50
41
|
} else {
|
51
42
|
float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
|
52
43
|
knn_extra_metrics(
|
53
|
-
x,
|
44
|
+
x, get_xb(), d, n, ntotal, metric_type, metric_arg, &res);
|
54
45
|
}
|
55
46
|
}
|
56
47
|
|
@@ -62,10 +53,10 @@ void IndexFlat::range_search(
|
|
62
53
|
switch (metric_type) {
|
63
54
|
case METRIC_INNER_PRODUCT:
|
64
55
|
range_search_inner_product(
|
65
|
-
x,
|
56
|
+
x, get_xb(), d, n, ntotal, radius, result);
|
66
57
|
break;
|
67
58
|
case METRIC_L2:
|
68
|
-
range_search_L2sqr(x,
|
59
|
+
range_search_L2sqr(x, get_xb(), d, n, ntotal, radius, result);
|
69
60
|
break;
|
70
61
|
default:
|
71
62
|
FAISS_THROW_MSG("metric type not supported");
|
@@ -80,37 +71,16 @@ void IndexFlat::compute_distance_subset(
|
|
80
71
|
const idx_t* labels) const {
|
81
72
|
switch (metric_type) {
|
82
73
|
case METRIC_INNER_PRODUCT:
|
83
|
-
fvec_inner_products_by_idx(
|
84
|
-
distances, x, xb.data(), labels, d, n, k);
|
74
|
+
fvec_inner_products_by_idx(distances, x, get_xb(), labels, d, n, k);
|
85
75
|
break;
|
86
76
|
case METRIC_L2:
|
87
|
-
fvec_L2sqr_by_idx(distances, x,
|
77
|
+
fvec_L2sqr_by_idx(distances, x, get_xb(), labels, d, n, k);
|
88
78
|
break;
|
89
79
|
default:
|
90
80
|
FAISS_THROW_MSG("metric type not supported");
|
91
81
|
}
|
92
82
|
}
|
93
83
|
|
94
|
-
size_t IndexFlat::remove_ids(const IDSelector& sel) {
|
95
|
-
idx_t j = 0;
|
96
|
-
for (idx_t i = 0; i < ntotal; i++) {
|
97
|
-
if (sel.is_member(i)) {
|
98
|
-
// should be removed
|
99
|
-
} else {
|
100
|
-
if (i > j) {
|
101
|
-
memmove(&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
|
102
|
-
}
|
103
|
-
j++;
|
104
|
-
}
|
105
|
-
}
|
106
|
-
size_t nremove = ntotal - j;
|
107
|
-
if (nremove > 0) {
|
108
|
-
ntotal = j;
|
109
|
-
xb.resize(ntotal * d);
|
110
|
-
}
|
111
|
-
return nremove;
|
112
|
-
}
|
113
|
-
|
114
84
|
namespace {
|
115
85
|
|
116
86
|
struct FlatL2Dis : DistanceComputer {
|
@@ -133,7 +103,7 @@ struct FlatL2Dis : DistanceComputer {
|
|
133
103
|
: d(storage.d),
|
134
104
|
nb(storage.ntotal),
|
135
105
|
q(q),
|
136
|
-
b(storage.
|
106
|
+
b(storage.get_xb()),
|
137
107
|
ndis(0) {}
|
138
108
|
|
139
109
|
void set_query(const float* x) override {
|
@@ -161,7 +131,7 @@ struct FlatIPDis : DistanceComputer {
|
|
161
131
|
: d(storage.d),
|
162
132
|
nb(storage.ntotal),
|
163
133
|
q(q),
|
164
|
-
b(storage.
|
134
|
+
b(storage.get_xb()),
|
165
135
|
ndis(0) {}
|
166
136
|
|
167
137
|
void set_query(const float* x) override {
|
@@ -178,25 +148,24 @@ DistanceComputer* IndexFlat::get_distance_computer() const {
|
|
178
148
|
return new FlatIPDis(*this);
|
179
149
|
} else {
|
180
150
|
return get_extra_distance_computer(
|
181
|
-
d, metric_type, metric_arg, ntotal,
|
151
|
+
d, metric_type, metric_arg, ntotal, get_xb());
|
182
152
|
}
|
183
153
|
}
|
184
154
|
|
185
155
|
void IndexFlat::reconstruct(idx_t key, float* recons) const {
|
186
|
-
memcpy(recons, &(
|
187
|
-
}
|
188
|
-
|
189
|
-
/* The standalone codec interface */
|
190
|
-
size_t IndexFlat::sa_code_size() const {
|
191
|
-
return sizeof(float) * d;
|
156
|
+
memcpy(recons, &(codes[key * code_size]), code_size);
|
192
157
|
}
|
193
158
|
|
194
159
|
void IndexFlat::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
|
195
|
-
|
160
|
+
if (n > 0) {
|
161
|
+
memcpy(bytes, x, sizeof(float) * d * n);
|
162
|
+
}
|
196
163
|
}
|
197
164
|
|
198
165
|
void IndexFlat::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
|
199
|
-
|
166
|
+
if (n > 0) {
|
167
|
+
memcpy(x, bytes, sizeof(float) * d * n);
|
168
|
+
}
|
200
169
|
}
|
201
170
|
|
202
171
|
/***************************************************
|
@@ -211,9 +180,9 @@ IndexFlat1D::IndexFlat1D(bool continuous_update)
|
|
211
180
|
void IndexFlat1D::update_permutation() {
|
212
181
|
perm.resize(ntotal);
|
213
182
|
if (ntotal < 1000000) {
|
214
|
-
fvec_argsort(ntotal,
|
183
|
+
fvec_argsort(ntotal, get_xb(), (size_t*)perm.data());
|
215
184
|
} else {
|
216
|
-
fvec_argsort_parallel(ntotal,
|
185
|
+
fvec_argsort_parallel(ntotal, get_xb(), (size_t*)perm.data());
|
217
186
|
}
|
218
187
|
}
|
219
188
|
|
@@ -238,6 +207,7 @@ void IndexFlat1D::search(
|
|
238
207
|
|
239
208
|
FAISS_THROW_IF_NOT_MSG(
|
240
209
|
perm.size() == ntotal, "Call update_permutation before search");
|
210
|
+
const float* xb = get_xb();
|
241
211
|
|
242
212
|
#pragma omp parallel for
|
243
213
|
for (idx_t i = 0; i < n; i++) {
|
@@ -12,21 +12,14 @@
|
|
12
12
|
|
13
13
|
#include <vector>
|
14
14
|
|
15
|
-
#include <faiss/
|
15
|
+
#include <faiss/IndexFlatCodes.h>
|
16
16
|
|
17
17
|
namespace faiss {
|
18
18
|
|
19
19
|
/** Index that stores the full vectors and performs exhaustive search */
|
20
|
-
struct IndexFlat :
|
21
|
-
/// database vectors, size ntotal * d
|
22
|
-
std::vector<float> xb;
|
23
|
-
|
20
|
+
struct IndexFlat : IndexFlatCodes {
|
24
21
|
explicit IndexFlat(idx_t d, MetricType metric = METRIC_L2);
|
25
22
|
|
26
|
-
void add(idx_t n, const float* x) override;
|
27
|
-
|
28
|
-
void reset() override;
|
29
|
-
|
30
23
|
void search(
|
31
24
|
idx_t n,
|
32
25
|
const float* x,
|
@@ -57,18 +50,19 @@ struct IndexFlat : Index {
|
|
57
50
|
float* distances,
|
58
51
|
const idx_t* labels) const;
|
59
52
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
53
|
+
// get pointer to the floating point data
|
54
|
+
float* get_xb() {
|
55
|
+
return (float*)codes.data();
|
56
|
+
}
|
57
|
+
const float* get_xb() const {
|
58
|
+
return (const float*)codes.data();
|
59
|
+
}
|
64
60
|
|
65
61
|
IndexFlat() {}
|
66
62
|
|
67
63
|
DistanceComputer* get_distance_computer() const override;
|
68
64
|
|
69
65
|
/* The stanadlone codec interface (just memcopies in this case) */
|
70
|
-
size_t sa_code_size() const override;
|
71
|
-
|
72
66
|
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
|
73
67
|
|
74
68
|
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
|
@@ -0,0 +1,67 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <faiss/IndexFlatCodes.h>
|
9
|
+
|
10
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
11
|
+
#include <faiss/impl/FaissAssert.h>
|
12
|
+
|
13
|
+
namespace faiss {
|
14
|
+
|
15
|
+
IndexFlatCodes::IndexFlatCodes(size_t code_size, idx_t d, MetricType metric)
|
16
|
+
: Index(d, metric), code_size(code_size) {}
|
17
|
+
|
18
|
+
IndexFlatCodes::IndexFlatCodes() : code_size(0) {}
|
19
|
+
|
20
|
+
void IndexFlatCodes::add(idx_t n, const float* x) {
|
21
|
+
FAISS_THROW_IF_NOT(is_trained);
|
22
|
+
codes.resize((ntotal + n) * code_size);
|
23
|
+
sa_encode(n, x, &codes[ntotal * code_size]);
|
24
|
+
ntotal += n;
|
25
|
+
}
|
26
|
+
|
27
|
+
void IndexFlatCodes::reset() {
|
28
|
+
codes.clear();
|
29
|
+
ntotal = 0;
|
30
|
+
}
|
31
|
+
|
32
|
+
size_t IndexFlatCodes::sa_code_size() const {
|
33
|
+
return code_size;
|
34
|
+
}
|
35
|
+
|
36
|
+
size_t IndexFlatCodes::remove_ids(const IDSelector& sel) {
|
37
|
+
idx_t j = 0;
|
38
|
+
for (idx_t i = 0; i < ntotal; i++) {
|
39
|
+
if (sel.is_member(i)) {
|
40
|
+
// should be removed
|
41
|
+
} else {
|
42
|
+
if (i > j) {
|
43
|
+
memmove(&codes[code_size * j],
|
44
|
+
&codes[code_size * i],
|
45
|
+
code_size);
|
46
|
+
}
|
47
|
+
j++;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
size_t nremove = ntotal - j;
|
51
|
+
if (nremove > 0) {
|
52
|
+
ntotal = j;
|
53
|
+
codes.resize(ntotal * code_size);
|
54
|
+
}
|
55
|
+
return nremove;
|
56
|
+
}
|
57
|
+
|
58
|
+
void IndexFlatCodes::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
|
59
|
+
FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
|
60
|
+
sa_decode(ni, codes.data() + i0 * code_size, recons);
|
61
|
+
}
|
62
|
+
|
63
|
+
void IndexFlatCodes::reconstruct(idx_t key, float* recons) const {
|
64
|
+
reconstruct_n(key, 1, recons);
|
65
|
+
}
|
66
|
+
|
67
|
+
} // namespace faiss
|
@@ -0,0 +1,47 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#pragma once
|
11
|
+
|
12
|
+
#include <faiss/Index.h>
|
13
|
+
#include <vector>
|
14
|
+
|
15
|
+
namespace faiss {
|
16
|
+
|
17
|
+
/** Index that encodes all vectors as fixed-size codes (size code_size). Storage
|
18
|
+
* is in the codes vector */
|
19
|
+
struct IndexFlatCodes : Index {
|
20
|
+
size_t code_size;
|
21
|
+
|
22
|
+
/// encoded dataset, size ntotal * code_size
|
23
|
+
std::vector<uint8_t> codes;
|
24
|
+
|
25
|
+
IndexFlatCodes();
|
26
|
+
|
27
|
+
IndexFlatCodes(size_t code_size, idx_t d, MetricType metric = METRIC_L2);
|
28
|
+
|
29
|
+
/// default add uses sa_encode
|
30
|
+
void add(idx_t n, const float* x) override;
|
31
|
+
|
32
|
+
void reset() override;
|
33
|
+
|
34
|
+
/// reconstruction using the codec interface
|
35
|
+
void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
|
36
|
+
|
37
|
+
void reconstruct(idx_t key, float* recons) const override;
|
38
|
+
|
39
|
+
size_t sa_code_size() const override;
|
40
|
+
|
41
|
+
/** remove some ids. NB that Because of the structure of the
|
42
|
+
* indexing structure, the semantics of this operation are
|
43
|
+
* different from the usual ones: the new ids are shifted */
|
44
|
+
size_t remove_ids(const IDSelector& sel) override;
|
45
|
+
};
|
46
|
+
|
47
|
+
} // namespace faiss
|
@@ -107,8 +107,15 @@ void Level1Quantizer::train_q1(
|
|
107
107
|
} else {
|
108
108
|
clus.train(n, x, *clustering_index);
|
109
109
|
}
|
110
|
-
if (verbose)
|
110
|
+
if (verbose) {
|
111
111
|
printf("Adding centroids to quantizer\n");
|
112
|
+
}
|
113
|
+
if (!quantizer->is_trained) {
|
114
|
+
if (verbose) {
|
115
|
+
printf("But training it first on centroids table...\n");
|
116
|
+
}
|
117
|
+
quantizer->train(nlist, clus.centroids.data());
|
118
|
+
}
|
112
119
|
quantizer->add(nlist, clus.centroids.data());
|
113
120
|
}
|
114
121
|
}
|
@@ -190,6 +197,20 @@ void IndexIVF::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
|
|
190
197
|
add_core(n, x, xids, coarse_idx.get());
|
191
198
|
}
|
192
199
|
|
200
|
+
void IndexIVF::add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids) {
|
201
|
+
size_t coarse_size = coarse_code_size();
|
202
|
+
DirectMapAdd dm_adder(direct_map, n, xids);
|
203
|
+
|
204
|
+
for (idx_t i = 0; i < n; i++) {
|
205
|
+
const uint8_t* code = codes + (code_size + coarse_size) * i;
|
206
|
+
idx_t list_no = decode_listno(code);
|
207
|
+
idx_t id = xids ? xids[i] : ntotal + i;
|
208
|
+
size_t ofs = invlists->add_entry(list_no, id, code + coarse_size);
|
209
|
+
dm_adder.add(i, list_no, ofs);
|
210
|
+
}
|
211
|
+
ntotal += n;
|
212
|
+
}
|
213
|
+
|
193
214
|
void IndexIVF::add_core(
|
194
215
|
idx_t n,
|
195
216
|
const float* x,
|
@@ -1068,6 +1089,10 @@ IndexIVF::~IndexIVF() {
|
|
1068
1089
|
}
|
1069
1090
|
}
|
1070
1091
|
|
1092
|
+
/*************************************************************************
|
1093
|
+
* IndexIVFStats
|
1094
|
+
*************************************************************************/
|
1095
|
+
|
1071
1096
|
void IndexIVFStats::reset() {
|
1072
1097
|
memset((void*)this, 0, sizeof(*this));
|
1073
1098
|
}
|
@@ -1083,13 +1108,60 @@ void IndexIVFStats::add(const IndexIVFStats& other) {
|
|
1083
1108
|
|
1084
1109
|
IndexIVFStats indexIVF_stats;
|
1085
1110
|
|
1111
|
+
/*************************************************************************
|
1112
|
+
* InvertedListScanner
|
1113
|
+
*************************************************************************/
|
1114
|
+
|
1115
|
+
size_t InvertedListScanner::scan_codes(
|
1116
|
+
size_t list_size,
|
1117
|
+
const uint8_t* codes,
|
1118
|
+
const idx_t* ids,
|
1119
|
+
float* simi,
|
1120
|
+
idx_t* idxi,
|
1121
|
+
size_t k) const {
|
1122
|
+
size_t nup = 0;
|
1123
|
+
|
1124
|
+
if (!keep_max) {
|
1125
|
+
for (size_t j = 0; j < list_size; j++) {
|
1126
|
+
float dis = distance_to_code(codes);
|
1127
|
+
if (dis < simi[0]) {
|
1128
|
+
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
1129
|
+
maxheap_replace_top(k, simi, idxi, dis, id);
|
1130
|
+
nup++;
|
1131
|
+
}
|
1132
|
+
codes += code_size;
|
1133
|
+
}
|
1134
|
+
} else {
|
1135
|
+
for (size_t j = 0; j < list_size; j++) {
|
1136
|
+
float dis = distance_to_code(codes);
|
1137
|
+
if (dis > simi[0]) {
|
1138
|
+
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
1139
|
+
minheap_replace_top(k, simi, idxi, dis, id);
|
1140
|
+
nup++;
|
1141
|
+
}
|
1142
|
+
codes += code_size;
|
1143
|
+
}
|
1144
|
+
}
|
1145
|
+
return nup;
|
1146
|
+
}
|
1147
|
+
|
1086
1148
|
void InvertedListScanner::scan_codes_range(
|
1087
|
-
size_t,
|
1088
|
-
const uint8_t
|
1089
|
-
const idx_t
|
1090
|
-
float,
|
1091
|
-
RangeQueryResult&) const {
|
1092
|
-
|
1149
|
+
size_t list_size,
|
1150
|
+
const uint8_t* codes,
|
1151
|
+
const idx_t* ids,
|
1152
|
+
float radius,
|
1153
|
+
RangeQueryResult& res) const {
|
1154
|
+
for (size_t j = 0; j < list_size; j++) {
|
1155
|
+
float dis = distance_to_code(codes);
|
1156
|
+
bool keep = !keep_max
|
1157
|
+
? dis < radius
|
1158
|
+
: dis > radius; // TODO templatize to remove this test
|
1159
|
+
if (keep) {
|
1160
|
+
int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
|
1161
|
+
res.add(dis, id);
|
1162
|
+
}
|
1163
|
+
codes += code_size;
|
1164
|
+
}
|
1093
1165
|
}
|
1094
1166
|
|
1095
1167
|
} // namespace faiss
|
@@ -38,7 +38,7 @@ struct Level1Quantizer {
|
|
38
38
|
* = 2: kmeans training on a flat index + add the centroids to the quantizer
|
39
39
|
*/
|
40
40
|
char quantizer_trains_alone;
|
41
|
-
bool own_fields; ///< whether object owns the quantizer
|
41
|
+
bool own_fields; ///< whether object owns the quantizer (false by default)
|
42
42
|
|
43
43
|
ClusteringParameters cp; ///< to override default clustering params
|
44
44
|
Index* clustering_index; ///< to override index used during clustering
|
@@ -121,8 +121,7 @@ struct IndexIVF : Index, Level1Quantizer {
|
|
121
121
|
|
122
122
|
/** The Inverted file takes a quantizer (an Index) on input,
|
123
123
|
* which implements the function mapping a vector to a list
|
124
|
-
* identifier.
|
125
|
-
* be deleted while the IndexIVF is in use.
|
124
|
+
* identifier.
|
126
125
|
*/
|
127
126
|
IndexIVF(
|
128
127
|
Index* quantizer,
|
@@ -171,6 +170,13 @@ struct IndexIVF : Index, Level1Quantizer {
|
|
171
170
|
uint8_t* codes,
|
172
171
|
bool include_listno = false) const = 0;
|
173
172
|
|
173
|
+
/** Add vectors that are computed with the standalone codec
|
174
|
+
*
|
175
|
+
* @param codes codes to add size n * sa_code_size()
|
176
|
+
* @param xids corresponding ids, size n
|
177
|
+
*/
|
178
|
+
void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
|
179
|
+
|
174
180
|
/// Sub-classes that encode the residuals can train their encoders here
|
175
181
|
/// does nothing by default
|
176
182
|
virtual void train_residual(idx_t n, const float* x);
|
@@ -231,7 +237,10 @@ struct IndexIVF : Index, Level1Quantizer {
|
|
231
237
|
const IVFSearchParameters* params = nullptr,
|
232
238
|
IndexIVFStats* stats = nullptr) const;
|
233
239
|
|
234
|
-
|
240
|
+
/** Get a scanner for this index (store_pairs means ignore labels)
|
241
|
+
*
|
242
|
+
* The default search implementation uses this to compute the distances
|
243
|
+
*/
|
235
244
|
virtual InvertedListScanner* get_InvertedListScanner(
|
236
245
|
bool store_pairs = false) const;
|
237
246
|
|
@@ -351,6 +360,14 @@ struct RangeQueryResult;
|
|
351
360
|
struct InvertedListScanner {
|
352
361
|
using idx_t = Index::idx_t;
|
353
362
|
|
363
|
+
idx_t list_no = -1; ///< remember current list
|
364
|
+
bool keep_max = false; ///< keep maximum instead of minimum
|
365
|
+
/// store positions in invlists rather than labels
|
366
|
+
bool store_pairs = false;
|
367
|
+
|
368
|
+
/// used in default implementation of scan_codes
|
369
|
+
size_t code_size = 0;
|
370
|
+
|
354
371
|
/// from now on we handle this query.
|
355
372
|
virtual void set_query(const float* query_vector) = 0;
|
356
373
|
|
@@ -361,7 +378,8 @@ struct InvertedListScanner {
|
|
361
378
|
virtual float distance_to_code(const uint8_t* code) const = 0;
|
362
379
|
|
363
380
|
/** scan a set of codes, compute distances to current query and
|
364
|
-
* update heap of results if necessary.
|
381
|
+
* update heap of results if necessary. Default implemetation
|
382
|
+
* calls distance_to_code.
|
365
383
|
*
|
366
384
|
* @param n number of codes to scan
|
367
385
|
* @param codes codes to scan (n * code_size)
|
@@ -377,7 +395,7 @@ struct InvertedListScanner {
|
|
377
395
|
const idx_t* ids,
|
378
396
|
float* distances,
|
379
397
|
idx_t* labels,
|
380
|
-
size_t k) const
|
398
|
+
size_t k) const;
|
381
399
|
|
382
400
|
/** scan a set of codes, compute distances to current query and
|
383
401
|
* update results if distances are below radius
|
@@ -396,7 +414,7 @@ struct InvertedListScanner {
|
|
396
414
|
struct IndexIVFStats {
|
397
415
|
size_t nq; // nb of queries run
|
398
416
|
size_t nlist; // nb of inverted lists scanned
|
399
|
-
size_t ndis; // nb of
|
417
|
+
size_t ndis; // nb of distances computed
|
400
418
|
size_t nheap_updates; // nb of times the heap was updated
|
401
419
|
double quantization_time; // time spent quantizing vectors (in ms)
|
402
420
|
double search_time; // time spent searching lists (in ms)
|