faiss 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +39 -29
- data/vendor/faiss/faiss/Clustering.cpp +4 -2
- data/vendor/faiss/faiss/IVFlib.cpp +14 -7
- data/vendor/faiss/faiss/Index.h +72 -3
- data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/IndexBinary.h +46 -3
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
- data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
- data/vendor/faiss/faiss/IndexHNSW.h +10 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
- data/vendor/faiss/faiss/IndexIDMap.h +20 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
- data/vendor/faiss/faiss/IndexIVF.h +16 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
- data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
- data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
- data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
- data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
- data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -0
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
- data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
- data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
- data/vendor/faiss/faiss/IndexShards.cpp +7 -6
- data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
- data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
- data/vendor/faiss/faiss/MetricType.h +5 -3
- data/vendor/faiss/faiss/clone_index.cpp +2 -4
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
- data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
- data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
- data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
- data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
- data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
- data/vendor/faiss/faiss/impl/io.cpp +9 -5
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
- data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
- data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
- data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +49 -33
- data/vendor/faiss/faiss/index_factory.h +8 -2
- data/vendor/faiss/faiss/index_io.h +0 -3
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
- data/vendor/faiss/faiss/utils/Heap.h +23 -12
- data/vendor/faiss/faiss/utils/distances.cpp +42 -21
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
- data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
- data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
- data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
- data/vendor/faiss/faiss/utils/random.cpp +14 -7
- data/vendor/faiss/faiss/utils/utils.cpp +0 -3
- metadata +5 -2
@@ -12,11 +12,11 @@
|
|
12
12
|
#include <cinttypes>
|
13
13
|
#include <cstdint>
|
14
14
|
#include <cstdio>
|
15
|
+
#include "faiss/Index.h"
|
15
16
|
|
16
17
|
#include <faiss/impl/AuxIndexStructures.h>
|
17
18
|
#include <faiss/impl/FaissAssert.h>
|
18
19
|
#include <faiss/utils/Heap.h>
|
19
|
-
#include <faiss/utils/WorkerThread.h>
|
20
20
|
|
21
21
|
namespace faiss {
|
22
22
|
|
@@ -33,6 +33,17 @@ void sync_d(IndexBinary* index) {
|
|
33
33
|
|
34
34
|
} // anonymous namespace
|
35
35
|
|
36
|
+
template <typename componentT>
|
37
|
+
NumericType component_t_to_numeric() {
|
38
|
+
if constexpr (std::is_same<componentT, float>::value) {
|
39
|
+
return NumericType::Float32;
|
40
|
+
} else if constexpr (std::is_same<componentT, uint8_t>::value) {
|
41
|
+
return NumericType::UInt8;
|
42
|
+
} else {
|
43
|
+
FAISS_THROW_MSG("Unsupported component_t");
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
36
47
|
/*****************************************************
|
37
48
|
* IndexIDMap implementation
|
38
49
|
*******************************************************/
|
@@ -47,6 +58,16 @@ IndexIDMapTemplate<IndexT>::IndexIDMapTemplate(IndexT* index) : index(index) {
|
|
47
58
|
sync_d(this);
|
48
59
|
}
|
49
60
|
|
61
|
+
template <typename IndexT>
|
62
|
+
void IndexIDMapTemplate<IndexT>::addEx(
|
63
|
+
idx_t,
|
64
|
+
const void*,
|
65
|
+
NumericType numeric_type) {
|
66
|
+
FAISS_THROW_MSG(
|
67
|
+
"add does not make sense with IndexIDMap, "
|
68
|
+
"use add_with_ids");
|
69
|
+
}
|
70
|
+
|
50
71
|
template <typename IndexT>
|
51
72
|
void IndexIDMapTemplate<IndexT>::add(
|
52
73
|
idx_t,
|
@@ -56,12 +77,22 @@ void IndexIDMapTemplate<IndexT>::add(
|
|
56
77
|
"use add_with_ids");
|
57
78
|
}
|
58
79
|
|
80
|
+
template <typename IndexT>
|
81
|
+
void IndexIDMapTemplate<IndexT>::trainEx(
|
82
|
+
idx_t n,
|
83
|
+
const void* x,
|
84
|
+
NumericType numeric_type) {
|
85
|
+
index->trainEx(n, x, numeric_type);
|
86
|
+
this->is_trained = index->is_trained;
|
87
|
+
}
|
88
|
+
|
59
89
|
template <typename IndexT>
|
60
90
|
void IndexIDMapTemplate<IndexT>::train(
|
61
91
|
idx_t n,
|
62
92
|
const typename IndexT::component_t* x) {
|
63
|
-
|
64
|
-
|
93
|
+
trainEx(n,
|
94
|
+
static_cast<const void*>(x),
|
95
|
+
component_t_to_numeric<typename IndexT::component_t>());
|
65
96
|
}
|
66
97
|
|
67
98
|
template <typename IndexT>
|
@@ -72,16 +103,30 @@ void IndexIDMapTemplate<IndexT>::reset() {
|
|
72
103
|
}
|
73
104
|
|
74
105
|
template <typename IndexT>
|
75
|
-
void IndexIDMapTemplate<IndexT>::
|
106
|
+
void IndexIDMapTemplate<IndexT>::add_with_idsEx(
|
76
107
|
idx_t n,
|
77
|
-
const
|
108
|
+
const void* x,
|
109
|
+
NumericType numeric_type,
|
78
110
|
const idx_t* xids) {
|
79
|
-
index->
|
80
|
-
for (idx_t i = 0; i < n; i++)
|
111
|
+
index->addEx(n, x, numeric_type);
|
112
|
+
for (idx_t i = 0; i < n; i++) {
|
81
113
|
id_map.push_back(xids[i]);
|
114
|
+
}
|
82
115
|
this->ntotal = index->ntotal;
|
83
116
|
}
|
84
117
|
|
118
|
+
template <typename IndexT>
|
119
|
+
void IndexIDMapTemplate<IndexT>::add_with_ids(
|
120
|
+
idx_t n,
|
121
|
+
const typename IndexT::component_t* x,
|
122
|
+
const idx_t* xids) {
|
123
|
+
add_with_idsEx(
|
124
|
+
n,
|
125
|
+
static_cast<const void*>(x),
|
126
|
+
component_t_to_numeric<typename IndexT::component_t>(),
|
127
|
+
xids);
|
128
|
+
}
|
129
|
+
|
85
130
|
template <typename IndexT>
|
86
131
|
size_t IndexIDMapTemplate<IndexT>::sa_code_size() const {
|
87
132
|
return index->sa_code_size();
|
@@ -106,10 +151,10 @@ struct ScopedSelChange {
|
|
106
151
|
SearchParameters* params = nullptr;
|
107
152
|
IDSelector* old_sel = nullptr;
|
108
153
|
|
109
|
-
void set(SearchParameters*
|
110
|
-
this->params =
|
111
|
-
old_sel =
|
112
|
-
|
154
|
+
void set(SearchParameters* target_params, IDSelector* new_sel) {
|
155
|
+
this->params = target_params;
|
156
|
+
old_sel = target_params->sel;
|
157
|
+
target_params->sel = new_sel;
|
113
158
|
}
|
114
159
|
~ScopedSelChange() {
|
115
160
|
if (params) {
|
@@ -121,9 +166,10 @@ struct ScopedSelChange {
|
|
121
166
|
} // namespace
|
122
167
|
|
123
168
|
template <typename IndexT>
|
124
|
-
void IndexIDMapTemplate<IndexT>::
|
169
|
+
void IndexIDMapTemplate<IndexT>::searchEx(
|
125
170
|
idx_t n,
|
126
|
-
const
|
171
|
+
const void* x,
|
172
|
+
NumericType numeric_type,
|
127
173
|
idx_t k,
|
128
174
|
typename IndexT::distance_t* distances,
|
129
175
|
idx_t* labels,
|
@@ -147,7 +193,7 @@ void IndexIDMapTemplate<IndexT>::search(
|
|
147
193
|
sel_change.set(params_non_const, &this_idtrans);
|
148
194
|
}
|
149
195
|
}
|
150
|
-
index->
|
196
|
+
index->searchEx(n, x, numeric_type, k, distances, labels, params);
|
151
197
|
idx_t* li = labels;
|
152
198
|
#pragma omp parallel for
|
153
199
|
for (idx_t i = 0; i < n * k; i++) {
|
@@ -155,6 +201,24 @@ void IndexIDMapTemplate<IndexT>::search(
|
|
155
201
|
}
|
156
202
|
}
|
157
203
|
|
204
|
+
template <typename IndexT>
|
205
|
+
void IndexIDMapTemplate<IndexT>::search(
|
206
|
+
idx_t n,
|
207
|
+
const typename IndexT::component_t* x,
|
208
|
+
idx_t k,
|
209
|
+
typename IndexT::distance_t* distances,
|
210
|
+
idx_t* labels,
|
211
|
+
const SearchParameters* params) const {
|
212
|
+
searchEx(
|
213
|
+
n,
|
214
|
+
static_cast<const void*>(x),
|
215
|
+
component_t_to_numeric<typename IndexT::component_t>(),
|
216
|
+
k,
|
217
|
+
distances,
|
218
|
+
labels,
|
219
|
+
params);
|
220
|
+
}
|
221
|
+
|
158
222
|
template <typename IndexT>
|
159
223
|
void IndexIDMapTemplate<IndexT>::range_search(
|
160
224
|
idx_t n,
|
@@ -223,8 +287,9 @@ void IndexIDMapTemplate<IndexT>::merge_from(IndexT& otherIndex, idx_t add_id) {
|
|
223
287
|
|
224
288
|
template <typename IndexT>
|
225
289
|
IndexIDMapTemplate<IndexT>::~IndexIDMapTemplate() {
|
226
|
-
if (own_fields)
|
290
|
+
if (own_fields) {
|
227
291
|
delete index;
|
292
|
+
}
|
228
293
|
}
|
229
294
|
|
230
295
|
/*****************************************************
|
@@ -236,17 +301,30 @@ IndexIDMap2Template<IndexT>::IndexIDMap2Template(IndexT* index)
|
|
236
301
|
: IndexIDMapTemplate<IndexT>(index) {}
|
237
302
|
|
238
303
|
template <typename IndexT>
|
239
|
-
void IndexIDMap2Template<IndexT>::
|
304
|
+
void IndexIDMap2Template<IndexT>::add_with_idsEx(
|
240
305
|
idx_t n,
|
241
|
-
const
|
306
|
+
const void* x,
|
307
|
+
NumericType numeric_type,
|
242
308
|
const idx_t* xids) {
|
243
309
|
size_t prev_ntotal = this->ntotal;
|
244
|
-
IndexIDMapTemplate<IndexT>::
|
310
|
+
IndexIDMapTemplate<IndexT>::add_with_idsEx(n, x, numeric_type, xids);
|
245
311
|
for (size_t i = prev_ntotal; i < this->ntotal; i++) {
|
246
312
|
rev_map[this->id_map[i]] = i;
|
247
313
|
}
|
248
314
|
}
|
249
315
|
|
316
|
+
template <typename IndexT>
|
317
|
+
void IndexIDMap2Template<IndexT>::add_with_ids(
|
318
|
+
idx_t n,
|
319
|
+
const typename IndexT::component_t* x,
|
320
|
+
const idx_t* xids) {
|
321
|
+
add_with_idsEx(
|
322
|
+
n,
|
323
|
+
static_cast<const void*>(x),
|
324
|
+
component_t_to_numeric<typename IndexT::component_t>(),
|
325
|
+
xids);
|
326
|
+
}
|
327
|
+
|
250
328
|
template <typename IndexT>
|
251
329
|
void IndexIDMap2Template<IndexT>::check_consistency() const {
|
252
330
|
FAISS_THROW_IF_NOT(rev_map.size() == this->id_map.size());
|
@@ -31,9 +31,15 @@ struct IndexIDMapTemplate : IndexT {
|
|
31
31
|
/// @param xids if non-null, ids to store for the vectors (size n)
|
32
32
|
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
|
33
33
|
override;
|
34
|
+
void add_with_idsEx(
|
35
|
+
idx_t n,
|
36
|
+
const void* x,
|
37
|
+
NumericType numeric_type,
|
38
|
+
const idx_t* xids) override;
|
34
39
|
|
35
40
|
/// this will fail. Use add_with_ids
|
36
41
|
void add(idx_t n, const component_t* x) override;
|
42
|
+
void addEx(idx_t n, const void* x, NumericType numeric_type) override;
|
37
43
|
|
38
44
|
void search(
|
39
45
|
idx_t n,
|
@@ -42,8 +48,17 @@ struct IndexIDMapTemplate : IndexT {
|
|
42
48
|
distance_t* distances,
|
43
49
|
idx_t* labels,
|
44
50
|
const SearchParameters* params = nullptr) const override;
|
51
|
+
void searchEx(
|
52
|
+
idx_t n,
|
53
|
+
const void* x,
|
54
|
+
NumericType numeric_type,
|
55
|
+
idx_t k,
|
56
|
+
distance_t* distances,
|
57
|
+
idx_t* labels,
|
58
|
+
const SearchParameters* params = nullptr) const override;
|
45
59
|
|
46
60
|
void train(idx_t n, const component_t* x) override;
|
61
|
+
void trainEx(idx_t n, const void* x, NumericType numeric_type) override;
|
47
62
|
|
48
63
|
void reset() override;
|
49
64
|
|
@@ -89,6 +104,11 @@ struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
|
|
89
104
|
|
90
105
|
void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
|
91
106
|
override;
|
107
|
+
void add_with_idsEx(
|
108
|
+
idx_t n,
|
109
|
+
const void* x,
|
110
|
+
NumericType numeric_type,
|
111
|
+
const idx_t* xids) override;
|
92
112
|
|
93
113
|
size_t remove_ids(const IDSelector& sel) override;
|
94
114
|
|
@@ -60,19 +60,22 @@ void Level1Quantizer::train_q1(
|
|
60
60
|
MetricType metric_type) {
|
61
61
|
size_t d = quantizer->d;
|
62
62
|
if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
|
63
|
-
if (verbose)
|
63
|
+
if (verbose) {
|
64
64
|
printf("IVF quantizer does not need training.\n");
|
65
|
+
}
|
65
66
|
} else if (quantizer_trains_alone == 1) {
|
66
|
-
if (verbose)
|
67
|
+
if (verbose) {
|
67
68
|
printf("IVF quantizer trains alone...\n");
|
69
|
+
}
|
68
70
|
quantizer->verbose = verbose;
|
69
71
|
quantizer->train(n, x);
|
70
72
|
FAISS_THROW_IF_NOT_MSG(
|
71
73
|
quantizer->ntotal == nlist,
|
72
74
|
"nlist not consistent with quantizer size");
|
73
75
|
} else if (quantizer_trains_alone == 0) {
|
74
|
-
if (verbose)
|
76
|
+
if (verbose) {
|
75
77
|
printf("Training level-1 quantizer on %zd vectors in %zdD\n", n, d);
|
78
|
+
}
|
76
79
|
|
77
80
|
Clustering clus(d, nlist, cp);
|
78
81
|
quantizer->reset();
|
@@ -158,11 +161,14 @@ IndexIVF::IndexIVF(
|
|
158
161
|
size_t d,
|
159
162
|
size_t nlist,
|
160
163
|
size_t code_size,
|
161
|
-
MetricType metric
|
164
|
+
MetricType metric,
|
165
|
+
bool own_invlists)
|
162
166
|
: Index(d, metric),
|
163
167
|
IndexIVFInterface(quantizer, nlist),
|
164
|
-
invlists(
|
165
|
-
|
168
|
+
invlists(
|
169
|
+
own_invlists ? new ArrayInvertedLists(nlist, code_size)
|
170
|
+
: nullptr),
|
171
|
+
own_invlists(own_invlists),
|
166
172
|
code_size(code_size) {
|
167
173
|
FAISS_THROW_IF_NOT(d == quantizer->d);
|
168
174
|
is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
|
@@ -230,8 +236,9 @@ void IndexIVF::add_core(
|
|
230
236
|
size_t nadd = 0, nminus1 = 0;
|
231
237
|
|
232
238
|
for (size_t i = 0; i < n; i++) {
|
233
|
-
if (coarse_idx[i] < 0)
|
239
|
+
if (coarse_idx[i] < 0) {
|
234
240
|
nminus1++;
|
241
|
+
}
|
235
242
|
}
|
236
243
|
|
237
244
|
std::unique_ptr<uint8_t[]> flat_codes(new uint8_t[n * code_size]);
|
@@ -466,8 +473,9 @@ void IndexIVF::search_preassigned(
|
|
466
473
|
// initialize + reorder a result heap
|
467
474
|
|
468
475
|
auto init_result = [&](float* simi, idx_t* idxi) {
|
469
|
-
if (!do_heap_init)
|
476
|
+
if (!do_heap_init) {
|
470
477
|
return;
|
478
|
+
}
|
471
479
|
if (metric_type == METRIC_INNER_PRODUCT) {
|
472
480
|
heap_heapify<HeapForIP>(k, simi, idxi);
|
473
481
|
} else {
|
@@ -487,8 +495,9 @@ void IndexIVF::search_preassigned(
|
|
487
495
|
};
|
488
496
|
|
489
497
|
auto reorder_result = [&](float* simi, idx_t* idxi) {
|
490
|
-
if (!do_heap_init)
|
498
|
+
if (!do_heap_init) {
|
491
499
|
return;
|
500
|
+
}
|
492
501
|
if (metric_type == METRIC_INNER_PRODUCT) {
|
493
502
|
heap_reorder<HeapForIP>(k, simi, idxi);
|
494
503
|
} else {
|
@@ -804,8 +813,9 @@ void IndexIVF::range_search_preassigned(
|
|
804
813
|
|
805
814
|
auto scan_list_func = [&](size_t i, size_t ik, RangeQueryResult& qres) {
|
806
815
|
idx_t key = keys[i * nprobe + ik]; /* select the list */
|
807
|
-
if (key < 0)
|
816
|
+
if (key < 0) {
|
808
817
|
return;
|
818
|
+
}
|
809
819
|
FAISS_THROW_IF_NOT_FMT(
|
810
820
|
key < (idx_t)nlist,
|
811
821
|
"Invalid key=%" PRId64 " at ik=%zd nlist=%zd\n",
|
@@ -956,6 +966,14 @@ bool IndexIVF::check_ids_sorted() const {
|
|
956
966
|
return nflip == 0;
|
957
967
|
}
|
958
968
|
|
969
|
+
void IndexIVF::decode_vectors(
|
970
|
+
idx_t /*n*/,
|
971
|
+
const uint8_t* /*codes*/,
|
972
|
+
const idx_t* /*list_nos*/,
|
973
|
+
float* /*x*/) const {
|
974
|
+
FAISS_THROW_MSG("decode_vectors not implemented");
|
975
|
+
}
|
976
|
+
|
959
977
|
/* standalone codec interface */
|
960
978
|
size_t IndexIVF::sa_code_size() const {
|
961
979
|
size_t coarse_size = coarse_code_size();
|
@@ -210,7 +210,8 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
210
210
|
size_t d,
|
211
211
|
size_t nlist,
|
212
212
|
size_t code_size,
|
213
|
-
MetricType metric = METRIC_L2
|
213
|
+
MetricType metric = METRIC_L2,
|
214
|
+
bool own_invlists = true);
|
214
215
|
|
215
216
|
void reset() override;
|
216
217
|
|
@@ -253,6 +254,20 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
253
254
|
uint8_t* codes,
|
254
255
|
bool include_listno = false) const = 0;
|
255
256
|
|
257
|
+
/** Decodes a set of vectors as they would appear in a given set of inverted
|
258
|
+
* lists (inverse of encode_vectors)
|
259
|
+
*
|
260
|
+
* @param codes input codes, size n * code_size
|
261
|
+
* @param x output decoded vectors
|
262
|
+
* @param list_nos input listnos, size n
|
263
|
+
*
|
264
|
+
*/
|
265
|
+
virtual void decode_vectors(
|
266
|
+
idx_t n,
|
267
|
+
const uint8_t* codes,
|
268
|
+
const idx_t* list_nos,
|
269
|
+
float* x) const;
|
270
|
+
|
256
271
|
/** Add vectors that are computed with the standalone codec
|
257
272
|
*
|
258
273
|
* @param codes codes to add size n * sa_code_size()
|
@@ -28,8 +28,9 @@ IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(
|
|
28
28
|
Index* quantizer,
|
29
29
|
size_t d,
|
30
30
|
size_t nlist,
|
31
|
-
MetricType metric
|
32
|
-
|
31
|
+
MetricType metric,
|
32
|
+
bool own_invlists)
|
33
|
+
: IndexIVF(quantizer, d, nlist, 0, metric, own_invlists), aq(aq) {
|
33
34
|
by_residual = true;
|
34
35
|
}
|
35
36
|
|
@@ -89,6 +90,31 @@ void IndexIVFAdditiveQuantizer::encode_vectors(
|
|
89
90
|
}
|
90
91
|
}
|
91
92
|
|
93
|
+
void IndexIVFAdditiveQuantizer::decode_vectors(
|
94
|
+
idx_t n,
|
95
|
+
const uint8_t* codes,
|
96
|
+
const idx_t* listnos,
|
97
|
+
float* x) const {
|
98
|
+
#pragma omp parallel if (n > 1000)
|
99
|
+
{
|
100
|
+
std::vector<float> residual(d);
|
101
|
+
|
102
|
+
#pragma omp for
|
103
|
+
for (idx_t i = 0; i < n; i++) {
|
104
|
+
const uint8_t* code = codes + i * (code_size);
|
105
|
+
float* xi = x + i * d;
|
106
|
+
aq->decode(code, xi, 1);
|
107
|
+
if (by_residual) {
|
108
|
+
int64_t list_no = listnos[i];
|
109
|
+
quantizer->reconstruct(list_no, residual.data());
|
110
|
+
for (size_t j = 0; j < d; j++) {
|
111
|
+
xi[j] += residual[j];
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
92
118
|
void IndexIVFAdditiveQuantizer::sa_decode(
|
93
119
|
idx_t n,
|
94
120
|
const uint8_t* codes,
|
@@ -301,10 +327,20 @@ IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
|
|
301
327
|
size_t nlist,
|
302
328
|
const std::vector<size_t>& nbits,
|
303
329
|
MetricType metric,
|
304
|
-
Search_type_t search_type
|
305
|
-
|
330
|
+
Search_type_t search_type,
|
331
|
+
bool own_invlists)
|
332
|
+
: IndexIVFAdditiveQuantizer(
|
333
|
+
&rq,
|
334
|
+
quantizer,
|
335
|
+
d,
|
336
|
+
nlist,
|
337
|
+
metric,
|
338
|
+
own_invlists),
|
306
339
|
rq(d, nbits, search_type) {
|
307
|
-
code_size =
|
340
|
+
code_size = rq.code_size;
|
341
|
+
if (invlists) {
|
342
|
+
invlists->code_size = code_size;
|
343
|
+
}
|
308
344
|
}
|
309
345
|
|
310
346
|
IndexIVFResidualQuantizer::IndexIVFResidualQuantizer()
|
@@ -317,14 +353,16 @@ IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
|
|
317
353
|
size_t M, /* number of subquantizers */
|
318
354
|
size_t nbits, /* number of bit per subvector index */
|
319
355
|
MetricType metric,
|
320
|
-
Search_type_t search_type
|
356
|
+
Search_type_t search_type,
|
357
|
+
bool own_invlists)
|
321
358
|
: IndexIVFResidualQuantizer(
|
322
359
|
quantizer,
|
323
360
|
d,
|
324
361
|
nlist,
|
325
362
|
std::vector<size_t>(M, nbits),
|
326
363
|
metric,
|
327
|
-
search_type
|
364
|
+
search_type,
|
365
|
+
own_invlists) {}
|
328
366
|
|
329
367
|
IndexIVFResidualQuantizer::~IndexIVFResidualQuantizer() = default;
|
330
368
|
|
@@ -339,10 +377,20 @@ IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer(
|
|
339
377
|
size_t M, /* number of subquantizers */
|
340
378
|
size_t nbits, /* number of bit per subvector index */
|
341
379
|
MetricType metric,
|
342
|
-
Search_type_t search_type
|
343
|
-
|
380
|
+
Search_type_t search_type,
|
381
|
+
bool own_invlists)
|
382
|
+
: IndexIVFAdditiveQuantizer(
|
383
|
+
&lsq,
|
384
|
+
quantizer,
|
385
|
+
d,
|
386
|
+
nlist,
|
387
|
+
metric,
|
388
|
+
own_invlists),
|
344
389
|
lsq(d, M, nbits, search_type) {
|
345
|
-
code_size =
|
390
|
+
code_size = lsq.code_size;
|
391
|
+
if (invlists) {
|
392
|
+
invlists->code_size = code_size;
|
393
|
+
}
|
346
394
|
}
|
347
395
|
|
348
396
|
IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer()
|
@@ -362,10 +410,20 @@ IndexIVFProductResidualQuantizer::IndexIVFProductResidualQuantizer(
|
|
362
410
|
size_t Msub,
|
363
411
|
size_t nbits,
|
364
412
|
MetricType metric,
|
365
|
-
Search_type_t search_type
|
366
|
-
|
413
|
+
Search_type_t search_type,
|
414
|
+
bool own_invlists)
|
415
|
+
: IndexIVFAdditiveQuantizer(
|
416
|
+
&prq,
|
417
|
+
quantizer,
|
418
|
+
d,
|
419
|
+
nlist,
|
420
|
+
metric,
|
421
|
+
own_invlists),
|
367
422
|
prq(d, nsplits, Msub, nbits, search_type) {
|
368
|
-
code_size =
|
423
|
+
code_size = prq.code_size;
|
424
|
+
if (invlists) {
|
425
|
+
invlists->code_size = code_size;
|
426
|
+
}
|
369
427
|
}
|
370
428
|
|
371
429
|
IndexIVFProductResidualQuantizer::IndexIVFProductResidualQuantizer()
|
@@ -385,10 +443,20 @@ IndexIVFProductLocalSearchQuantizer::IndexIVFProductLocalSearchQuantizer(
|
|
385
443
|
size_t Msub,
|
386
444
|
size_t nbits,
|
387
445
|
MetricType metric,
|
388
|
-
Search_type_t search_type
|
389
|
-
|
446
|
+
Search_type_t search_type,
|
447
|
+
bool own_invlists)
|
448
|
+
: IndexIVFAdditiveQuantizer(
|
449
|
+
&plsq,
|
450
|
+
quantizer,
|
451
|
+
d,
|
452
|
+
nlist,
|
453
|
+
metric,
|
454
|
+
own_invlists),
|
390
455
|
plsq(d, nsplits, Msub, nbits, search_type) {
|
391
|
-
code_size =
|
456
|
+
code_size = plsq.code_size;
|
457
|
+
if (invlists) {
|
458
|
+
invlists->code_size = code_size;
|
459
|
+
}
|
392
460
|
}
|
393
461
|
|
394
462
|
IndexIVFProductLocalSearchQuantizer::IndexIVFProductLocalSearchQuantizer()
|
@@ -35,7 +35,8 @@ struct IndexIVFAdditiveQuantizer : IndexIVF {
|
|
35
35
|
Index* quantizer,
|
36
36
|
size_t d,
|
37
37
|
size_t nlist,
|
38
|
-
MetricType metric = METRIC_L2
|
38
|
+
MetricType metric = METRIC_L2,
|
39
|
+
bool own_invlists = true);
|
39
40
|
|
40
41
|
explicit IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq);
|
41
42
|
|
@@ -50,6 +51,12 @@ struct IndexIVFAdditiveQuantizer : IndexIVF {
|
|
50
51
|
uint8_t* codes,
|
51
52
|
bool include_listnos = false) const override;
|
52
53
|
|
54
|
+
void decode_vectors(
|
55
|
+
idx_t n,
|
56
|
+
const uint8_t* codes,
|
57
|
+
const idx_t* list_nos,
|
58
|
+
float* x) const override;
|
59
|
+
|
53
60
|
InvertedListScanner* get_InvertedListScanner(
|
54
61
|
bool store_pairs,
|
55
62
|
const IDSelector* sel,
|
@@ -82,7 +89,8 @@ struct IndexIVFResidualQuantizer : IndexIVFAdditiveQuantizer {
|
|
82
89
|
size_t nlist,
|
83
90
|
const std::vector<size_t>& nbits,
|
84
91
|
MetricType metric = METRIC_L2,
|
85
|
-
Search_type_t search_type = AdditiveQuantizer::ST_decompress
|
92
|
+
Search_type_t search_type = AdditiveQuantizer::ST_decompress,
|
93
|
+
bool own_invlists = true);
|
86
94
|
|
87
95
|
IndexIVFResidualQuantizer(
|
88
96
|
Index* quantizer,
|
@@ -91,7 +99,8 @@ struct IndexIVFResidualQuantizer : IndexIVFAdditiveQuantizer {
|
|
91
99
|
size_t M, /* number of subquantizers */
|
92
100
|
size_t nbits, /* number of bit per subvector index */
|
93
101
|
MetricType metric = METRIC_L2,
|
94
|
-
Search_type_t search_type = AdditiveQuantizer::ST_decompress
|
102
|
+
Search_type_t search_type = AdditiveQuantizer::ST_decompress,
|
103
|
+
bool own_invlists = true);
|
95
104
|
|
96
105
|
IndexIVFResidualQuantizer();
|
97
106
|
|
@@ -118,7 +127,8 @@ struct IndexIVFLocalSearchQuantizer : IndexIVFAdditiveQuantizer {
|
|
118
127
|
size_t M, /* number of subquantizers */
|
119
128
|
size_t nbits, /* number of bit per subvector index */
|
120
129
|
MetricType metric = METRIC_L2,
|
121
|
-
Search_type_t search_type = AdditiveQuantizer::ST_decompress
|
130
|
+
Search_type_t search_type = AdditiveQuantizer::ST_decompress,
|
131
|
+
bool own_invlists = true);
|
122
132
|
|
123
133
|
IndexIVFLocalSearchQuantizer();
|
124
134
|
|
@@ -147,7 +157,8 @@ struct IndexIVFProductResidualQuantizer : IndexIVFAdditiveQuantizer {
|
|
147
157
|
size_t Msub,
|
148
158
|
size_t nbits,
|
149
159
|
MetricType metric = METRIC_L2,
|
150
|
-
Search_type_t search_type = AdditiveQuantizer::ST_decompress
|
160
|
+
Search_type_t search_type = AdditiveQuantizer::ST_decompress,
|
161
|
+
bool own_invlists = true);
|
151
162
|
|
152
163
|
IndexIVFProductResidualQuantizer();
|
153
164
|
|
@@ -176,7 +187,8 @@ struct IndexIVFProductLocalSearchQuantizer : IndexIVFAdditiveQuantizer {
|
|
176
187
|
size_t Msub,
|
177
188
|
size_t nbits,
|
178
189
|
MetricType metric = METRIC_L2,
|
179
|
-
Search_type_t search_type = AdditiveQuantizer::ST_decompress
|
190
|
+
Search_type_t search_type = AdditiveQuantizer::ST_decompress,
|
191
|
+
bool own_invlists = true);
|
180
192
|
|
181
193
|
IndexIVFProductLocalSearchQuantizer();
|
182
194
|
|