faiss 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +4 -18
- data/vendor/faiss/faiss/Clustering.h +31 -21
- data/vendor/faiss/faiss/IVFlib.cpp +22 -11
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +20 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
- data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
- data/vendor/faiss/faiss/IndexHNSW.h +12 -48
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
- data/vendor/faiss/faiss/IndexIVF.h +37 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
- data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +10 -10
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
- data/vendor/faiss/faiss/impl/HNSW.h +9 -8
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
- data/vendor/faiss/faiss/impl/io.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
- data/vendor/faiss/faiss/index_factory.cpp +10 -7
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
- data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/distances.cpp +128 -74
- data/vendor/faiss/faiss/utils/distances.h +81 -4
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/utils.cpp +112 -6
- data/vendor/faiss/faiss/utils/utils.h +57 -20
- metadata +10 -3
|
@@ -9,10 +9,10 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/IndexIVFPQ.h>
|
|
11
11
|
|
|
12
|
-
#include <stdint.h>
|
|
13
12
|
#include <cassert>
|
|
14
13
|
#include <cinttypes>
|
|
15
14
|
#include <cmath>
|
|
15
|
+
#include <cstdint>
|
|
16
16
|
#include <cstdio>
|
|
17
17
|
|
|
18
18
|
#include <algorithm>
|
|
@@ -64,74 +64,16 @@ IndexIVFPQ::IndexIVFPQ(
|
|
|
64
64
|
/****************************************************************
|
|
65
65
|
* training */
|
|
66
66
|
|
|
67
|
-
void IndexIVFPQ::
|
|
68
|
-
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
void IndexIVFPQ::train_residual_o(idx_t n, const float* x, float* residuals_2) {
|
|
72
|
-
const float* x_in = x;
|
|
73
|
-
|
|
74
|
-
x = fvecs_maybe_subsample(
|
|
75
|
-
d,
|
|
76
|
-
(size_t*)&n,
|
|
77
|
-
pq.cp.max_points_per_centroid * pq.ksub,
|
|
78
|
-
x,
|
|
79
|
-
verbose,
|
|
80
|
-
pq.cp.seed);
|
|
81
|
-
|
|
82
|
-
ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
|
|
83
|
-
|
|
84
|
-
const float* trainset;
|
|
85
|
-
ScopeDeleter<float> del_residuals;
|
|
86
|
-
if (by_residual) {
|
|
87
|
-
if (verbose)
|
|
88
|
-
printf("computing residuals\n");
|
|
89
|
-
idx_t* assign = new idx_t[n]; // assignement to coarse centroids
|
|
90
|
-
ScopeDeleter<idx_t> del(assign);
|
|
91
|
-
quantizer->assign(n, x, assign);
|
|
92
|
-
float* residuals = new float[n * d];
|
|
93
|
-
del_residuals.set(residuals);
|
|
94
|
-
for (idx_t i = 0; i < n; i++)
|
|
95
|
-
quantizer->compute_residual(
|
|
96
|
-
x + i * d, residuals + i * d, assign[i]);
|
|
97
|
-
|
|
98
|
-
trainset = residuals;
|
|
99
|
-
} else {
|
|
100
|
-
trainset = x;
|
|
101
|
-
}
|
|
102
|
-
if (verbose)
|
|
103
|
-
printf("training %zdx%zd product quantizer on %" PRId64
|
|
104
|
-
" vectors in %dD\n",
|
|
105
|
-
pq.M,
|
|
106
|
-
pq.ksub,
|
|
107
|
-
n,
|
|
108
|
-
d);
|
|
109
|
-
pq.verbose = verbose;
|
|
110
|
-
pq.train(n, trainset);
|
|
67
|
+
void IndexIVFPQ::train_encoder(idx_t n, const float* x, const idx_t* assign) {
|
|
68
|
+
pq.train(n, x);
|
|
111
69
|
|
|
112
70
|
if (do_polysemous_training) {
|
|
113
71
|
if (verbose)
|
|
114
72
|
printf("doing polysemous training for PQ\n");
|
|
115
73
|
PolysemousTraining default_pt;
|
|
116
|
-
PolysemousTraining* pt =
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
pt->optimize_pq_for_hamming(pq, n, trainset);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
// prepare second-level residuals for refine PQ
|
|
123
|
-
if (residuals_2) {
|
|
124
|
-
uint8_t* train_codes = new uint8_t[pq.code_size * n];
|
|
125
|
-
ScopeDeleter<uint8_t> del(train_codes);
|
|
126
|
-
pq.compute_codes(trainset, train_codes, n);
|
|
127
|
-
|
|
128
|
-
for (idx_t i = 0; i < n; i++) {
|
|
129
|
-
const float* xx = trainset + i * d;
|
|
130
|
-
float* res = residuals_2 + i * d;
|
|
131
|
-
pq.decode(train_codes + i * pq.code_size, res);
|
|
132
|
-
for (int j = 0; j < d; j++)
|
|
133
|
-
res[j] = xx[j] - res[j];
|
|
134
|
-
}
|
|
74
|
+
PolysemousTraining* pt =
|
|
75
|
+
polysemous_training ? polysemous_training : &default_pt;
|
|
76
|
+
pt->optimize_pq_for_hamming(pq, n, x);
|
|
135
77
|
}
|
|
136
78
|
|
|
137
79
|
if (by_residual) {
|
|
@@ -139,6 +81,10 @@ void IndexIVFPQ::train_residual_o(idx_t n, const float* x, float* residuals_2) {
|
|
|
139
81
|
}
|
|
140
82
|
}
|
|
141
83
|
|
|
84
|
+
idx_t IndexIVFPQ::train_encoder_num_vectors() const {
|
|
85
|
+
return pq.cp.max_points_per_centroid * pq.ksub;
|
|
86
|
+
}
|
|
87
|
+
|
|
142
88
|
/****************************************************************
|
|
143
89
|
* IVFPQ as codec */
|
|
144
90
|
|
|
@@ -189,24 +135,25 @@ void IndexIVFPQ::add_core(
|
|
|
189
135
|
idx_t n,
|
|
190
136
|
const float* x,
|
|
191
137
|
const idx_t* xids,
|
|
192
|
-
const idx_t* coarse_idx
|
|
193
|
-
|
|
138
|
+
const idx_t* coarse_idx,
|
|
139
|
+
void* inverted_list_context) {
|
|
140
|
+
add_core_o(n, x, xids, nullptr, coarse_idx, inverted_list_context);
|
|
194
141
|
}
|
|
195
142
|
|
|
196
|
-
static float
|
|
143
|
+
static std::unique_ptr<float[]> compute_residuals(
|
|
197
144
|
const Index* quantizer,
|
|
198
145
|
idx_t n,
|
|
199
146
|
const float* x,
|
|
200
147
|
const idx_t* list_nos) {
|
|
201
148
|
size_t d = quantizer->d;
|
|
202
|
-
float
|
|
149
|
+
std::unique_ptr<float[]> residuals(new float[n * d]);
|
|
203
150
|
// TODO: parallelize?
|
|
204
151
|
for (size_t i = 0; i < n; i++) {
|
|
205
152
|
if (list_nos[i] < 0)
|
|
206
|
-
memset(residuals + i * d, 0, sizeof(
|
|
153
|
+
memset(residuals.get() + i * d, 0, sizeof(float) * d);
|
|
207
154
|
else
|
|
208
155
|
quantizer->compute_residual(
|
|
209
|
-
x + i * d, residuals + i * d, list_nos[i]);
|
|
156
|
+
x + i * d, residuals.get() + i * d, list_nos[i]);
|
|
210
157
|
}
|
|
211
158
|
return residuals;
|
|
212
159
|
}
|
|
@@ -218,9 +165,9 @@ void IndexIVFPQ::encode_vectors(
|
|
|
218
165
|
uint8_t* codes,
|
|
219
166
|
bool include_listnos) const {
|
|
220
167
|
if (by_residual) {
|
|
221
|
-
float
|
|
222
|
-
|
|
223
|
-
pq.compute_codes(to_encode, codes, n);
|
|
168
|
+
std::unique_ptr<float[]> to_encode =
|
|
169
|
+
compute_residuals(quantizer, n, x, list_nos);
|
|
170
|
+
pq.compute_codes(to_encode.get(), codes, n);
|
|
224
171
|
} else {
|
|
225
172
|
pq.compute_codes(x, codes, n);
|
|
226
173
|
}
|
|
@@ -266,7 +213,8 @@ void IndexIVFPQ::add_core_o(
|
|
|
266
213
|
const float* x,
|
|
267
214
|
const idx_t* xids,
|
|
268
215
|
float* residuals_2,
|
|
269
|
-
const idx_t* precomputed_idx
|
|
216
|
+
const idx_t* precomputed_idx,
|
|
217
|
+
void* inverted_list_context) {
|
|
270
218
|
idx_t bs = index_ivfpq_add_core_o_bs;
|
|
271
219
|
if (n > bs) {
|
|
272
220
|
for (idx_t i0 = 0; i0 < n; i0 += bs) {
|
|
@@ -283,7 +231,8 @@ void IndexIVFPQ::add_core_o(
|
|
|
283
231
|
x + i0 * d,
|
|
284
232
|
xids ? xids + i0 : nullptr,
|
|
285
233
|
residuals_2 ? residuals_2 + i0 * d : nullptr,
|
|
286
|
-
precomputed_idx ? precomputed_idx + i0 : nullptr
|
|
234
|
+
precomputed_idx ? precomputed_idx + i0 : nullptr,
|
|
235
|
+
inverted_list_context);
|
|
287
236
|
}
|
|
288
237
|
return;
|
|
289
238
|
}
|
|
@@ -295,31 +244,30 @@ void IndexIVFPQ::add_core_o(
|
|
|
295
244
|
FAISS_THROW_IF_NOT(is_trained);
|
|
296
245
|
double t0 = getmillisecs();
|
|
297
246
|
const idx_t* idx;
|
|
298
|
-
|
|
247
|
+
std::unique_ptr<idx_t[]> del_idx;
|
|
299
248
|
|
|
300
249
|
if (precomputed_idx) {
|
|
301
250
|
idx = precomputed_idx;
|
|
302
251
|
} else {
|
|
303
252
|
idx_t* idx0 = new idx_t[n];
|
|
304
|
-
del_idx.
|
|
253
|
+
del_idx.reset(idx0);
|
|
305
254
|
quantizer->assign(n, x, idx0);
|
|
306
255
|
idx = idx0;
|
|
307
256
|
}
|
|
308
257
|
|
|
309
258
|
double t1 = getmillisecs();
|
|
310
|
-
uint8_t
|
|
311
|
-
ScopeDeleter<uint8_t> del_xcodes(xcodes);
|
|
259
|
+
std::unique_ptr<uint8_t[]> xcodes(new uint8_t[n * code_size]);
|
|
312
260
|
|
|
313
261
|
const float* to_encode = nullptr;
|
|
314
|
-
|
|
262
|
+
std::unique_ptr<const float[]> del_to_encode;
|
|
315
263
|
|
|
316
264
|
if (by_residual) {
|
|
317
|
-
|
|
318
|
-
del_to_encode.
|
|
265
|
+
del_to_encode = compute_residuals(quantizer, n, x, idx);
|
|
266
|
+
to_encode = del_to_encode.get();
|
|
319
267
|
} else {
|
|
320
268
|
to_encode = x;
|
|
321
269
|
}
|
|
322
|
-
pq.compute_codes(to_encode, xcodes, n);
|
|
270
|
+
pq.compute_codes(to_encode, xcodes.get(), n);
|
|
323
271
|
|
|
324
272
|
double t2 = getmillisecs();
|
|
325
273
|
// TODO: parallelize?
|
|
@@ -335,8 +283,9 @@ void IndexIVFPQ::add_core_o(
|
|
|
335
283
|
continue;
|
|
336
284
|
}
|
|
337
285
|
|
|
338
|
-
uint8_t* code = xcodes + i * code_size;
|
|
339
|
-
size_t offset =
|
|
286
|
+
uint8_t* code = xcodes.get() + i * code_size;
|
|
287
|
+
size_t offset =
|
|
288
|
+
invlists->add_entry(key, id, code, inverted_list_context);
|
|
340
289
|
|
|
341
290
|
if (residuals_2) {
|
|
342
291
|
float* res2 = residuals_2 + i * d;
|
|
@@ -804,7 +753,7 @@ struct QueryTables {
|
|
|
804
753
|
}
|
|
805
754
|
};
|
|
806
755
|
|
|
807
|
-
// This way of handling the
|
|
756
|
+
// This way of handling the selector is not optimal since all distances
|
|
808
757
|
// are computed even if the id would filter it out.
|
|
809
758
|
template <class C, bool use_sel>
|
|
810
759
|
struct KnnSearchResults {
|
|
@@ -937,7 +886,8 @@ struct IVFPQScannerT : QueryTables {
|
|
|
937
886
|
float distance_2 = 0;
|
|
938
887
|
float distance_3 = 0;
|
|
939
888
|
distance_four_codes<PQDecoder>(
|
|
940
|
-
pq,
|
|
889
|
+
pq.M,
|
|
890
|
+
pq.nbits,
|
|
941
891
|
sim_table,
|
|
942
892
|
codes + saved_j[0] * pq.code_size,
|
|
943
893
|
codes + saved_j[1] * pq.code_size,
|
|
@@ -957,24 +907,30 @@ struct IVFPQScannerT : QueryTables {
|
|
|
957
907
|
}
|
|
958
908
|
|
|
959
909
|
if (counter >= 1) {
|
|
960
|
-
float dis =
|
|
961
|
-
dis0 +
|
|
910
|
+
float dis = dis0 +
|
|
962
911
|
distance_single_code<PQDecoder>(
|
|
963
|
-
|
|
912
|
+
pq.M,
|
|
913
|
+
pq.nbits,
|
|
914
|
+
sim_table,
|
|
915
|
+
codes + saved_j[0] * pq.code_size);
|
|
964
916
|
res.add(saved_j[0], dis);
|
|
965
917
|
}
|
|
966
918
|
if (counter >= 2) {
|
|
967
|
-
float dis =
|
|
968
|
-
dis0 +
|
|
919
|
+
float dis = dis0 +
|
|
969
920
|
distance_single_code<PQDecoder>(
|
|
970
|
-
|
|
921
|
+
pq.M,
|
|
922
|
+
pq.nbits,
|
|
923
|
+
sim_table,
|
|
924
|
+
codes + saved_j[1] * pq.code_size);
|
|
971
925
|
res.add(saved_j[1], dis);
|
|
972
926
|
}
|
|
973
927
|
if (counter >= 3) {
|
|
974
|
-
float dis =
|
|
975
|
-
dis0 +
|
|
928
|
+
float dis = dis0 +
|
|
976
929
|
distance_single_code<PQDecoder>(
|
|
977
|
-
|
|
930
|
+
pq.M,
|
|
931
|
+
pq.nbits,
|
|
932
|
+
sim_table,
|
|
933
|
+
codes + saved_j[2] * pq.code_size);
|
|
978
934
|
res.add(saved_j[2], dis);
|
|
979
935
|
}
|
|
980
936
|
}
|
|
@@ -1090,7 +1046,7 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1090
1046
|
const uint8_t* codes,
|
|
1091
1047
|
SearchResultType& res) const {
|
|
1092
1048
|
int ht = ivfpq.polysemous_ht;
|
|
1093
|
-
size_t n_hamming_pass = 0
|
|
1049
|
+
size_t n_hamming_pass = 0;
|
|
1094
1050
|
|
|
1095
1051
|
int code_size = pq.code_size;
|
|
1096
1052
|
|
|
@@ -1137,7 +1093,8 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1137
1093
|
float distance_2 = dis0;
|
|
1138
1094
|
float distance_3 = dis0;
|
|
1139
1095
|
distance_four_codes<PQDecoder>(
|
|
1140
|
-
pq,
|
|
1096
|
+
pq.M,
|
|
1097
|
+
pq.nbits,
|
|
1141
1098
|
sim_table,
|
|
1142
1099
|
codes + saved_j[0] * pq.code_size,
|
|
1143
1100
|
codes + saved_j[1] * pq.code_size,
|
|
@@ -1165,10 +1122,12 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1165
1122
|
for (size_t kk = 0; kk < counter; kk++) {
|
|
1166
1123
|
n_hamming_pass++;
|
|
1167
1124
|
|
|
1168
|
-
float dis =
|
|
1169
|
-
dis0 +
|
|
1125
|
+
float dis = dis0 +
|
|
1170
1126
|
distance_single_code<PQDecoder>(
|
|
1171
|
-
|
|
1127
|
+
pq.M,
|
|
1128
|
+
pq.nbits,
|
|
1129
|
+
sim_table,
|
|
1130
|
+
codes + saved_j[kk] * pq.code_size);
|
|
1172
1131
|
|
|
1173
1132
|
res.add(saved_j[kk], dis);
|
|
1174
1133
|
}
|
|
@@ -1185,7 +1144,10 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1185
1144
|
|
|
1186
1145
|
float dis = dis0 +
|
|
1187
1146
|
distance_single_code<PQDecoder>(
|
|
1188
|
-
pq,
|
|
1147
|
+
pq.M,
|
|
1148
|
+
pq.nbits,
|
|
1149
|
+
sim_table,
|
|
1150
|
+
codes + j * code_size);
|
|
1189
1151
|
|
|
1190
1152
|
res.add(j, dis);
|
|
1191
1153
|
}
|
|
@@ -1195,30 +1157,23 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1195
1157
|
{ indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
|
|
1196
1158
|
}
|
|
1197
1159
|
|
|
1160
|
+
template <class SearchResultType>
|
|
1161
|
+
struct Run_scan_list_polysemous_hc {
|
|
1162
|
+
using T = void;
|
|
1163
|
+
template <class HammingComputer, class... Types>
|
|
1164
|
+
void f(const IVFPQScannerT* scanner, Types... args) {
|
|
1165
|
+
scanner->scan_list_polysemous_hc<HammingComputer, SearchResultType>(
|
|
1166
|
+
args...);
|
|
1167
|
+
}
|
|
1168
|
+
};
|
|
1169
|
+
|
|
1198
1170
|
template <class SearchResultType>
|
|
1199
1171
|
void scan_list_polysemous(
|
|
1200
1172
|
size_t ncode,
|
|
1201
1173
|
const uint8_t* codes,
|
|
1202
1174
|
SearchResultType& res) const {
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
case cs: \
|
|
1206
|
-
scan_list_polysemous_hc<HammingComputer##cs, SearchResultType>( \
|
|
1207
|
-
ncode, codes, res); \
|
|
1208
|
-
break
|
|
1209
|
-
HANDLE_CODE_SIZE(4);
|
|
1210
|
-
HANDLE_CODE_SIZE(8);
|
|
1211
|
-
HANDLE_CODE_SIZE(16);
|
|
1212
|
-
HANDLE_CODE_SIZE(20);
|
|
1213
|
-
HANDLE_CODE_SIZE(32);
|
|
1214
|
-
HANDLE_CODE_SIZE(64);
|
|
1215
|
-
#undef HANDLE_CODE_SIZE
|
|
1216
|
-
default:
|
|
1217
|
-
scan_list_polysemous_hc<
|
|
1218
|
-
HammingComputerDefault,
|
|
1219
|
-
SearchResultType>(ncode, codes, res);
|
|
1220
|
-
break;
|
|
1221
|
-
}
|
|
1175
|
+
Run_scan_list_polysemous_hc<SearchResultType> r;
|
|
1176
|
+
dispatch_HammingComputer(pq.code_size, r, this, ncode, codes, res);
|
|
1222
1177
|
}
|
|
1223
1178
|
};
|
|
1224
1179
|
|
|
@@ -1248,6 +1203,7 @@ struct IVFPQScanner : IVFPQScannerT<idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1248
1203
|
precompute_mode(precompute_mode),
|
|
1249
1204
|
sel(sel) {
|
|
1250
1205
|
this->store_pairs = store_pairs;
|
|
1206
|
+
this->keep_max = is_similarity_metric(METRIC_TYPE);
|
|
1251
1207
|
}
|
|
1252
1208
|
|
|
1253
1209
|
void set_query(const float* query) override {
|
|
@@ -1263,7 +1219,7 @@ struct IVFPQScanner : IVFPQScannerT<idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1263
1219
|
assert(precompute_mode == 2);
|
|
1264
1220
|
float dis = this->dis0 +
|
|
1265
1221
|
distance_single_code<PQDecoder>(
|
|
1266
|
-
this->pq, this->sim_table, code);
|
|
1222
|
+
this->pq.M, this->pq.nbits, this->sim_table, code);
|
|
1267
1223
|
return dis;
|
|
1268
1224
|
}
|
|
1269
1225
|
|
|
@@ -32,8 +32,6 @@ FAISS_API extern size_t precomputed_table_max_bytes;
|
|
|
32
32
|
* vector is encoded as a product quantizer code.
|
|
33
33
|
*/
|
|
34
34
|
struct IndexIVFPQ : IndexIVF {
|
|
35
|
-
bool by_residual; ///< Encode residual or plain vector?
|
|
36
|
-
|
|
37
35
|
ProductQuantizer pq; ///< produces the codes
|
|
38
36
|
|
|
39
37
|
bool do_polysemous_training; ///< reorder PQ centroids after training?
|
|
@@ -73,7 +71,8 @@ struct IndexIVFPQ : IndexIVF {
|
|
|
73
71
|
idx_t n,
|
|
74
72
|
const float* x,
|
|
75
73
|
const idx_t* xids,
|
|
76
|
-
const idx_t* precomputed_idx
|
|
74
|
+
const idx_t* precomputed_idx,
|
|
75
|
+
void* inverted_list_context = nullptr) override;
|
|
77
76
|
|
|
78
77
|
/// same as add_core, also:
|
|
79
78
|
/// - output 2nd level residuals if residuals_2 != NULL
|
|
@@ -83,13 +82,13 @@ struct IndexIVFPQ : IndexIVF {
|
|
|
83
82
|
const float* x,
|
|
84
83
|
const idx_t* xids,
|
|
85
84
|
float* residuals_2,
|
|
86
|
-
const idx_t* precomputed_idx = nullptr
|
|
85
|
+
const idx_t* precomputed_idx = nullptr,
|
|
86
|
+
void* inverted_list_context = nullptr);
|
|
87
87
|
|
|
88
88
|
/// trains the product quantizer
|
|
89
|
-
void
|
|
89
|
+
void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
|
|
90
90
|
|
|
91
|
-
|
|
92
|
-
void train_residual_o(idx_t n, const float* x, float* residuals_2);
|
|
91
|
+
idx_t train_encoder_num_vectors() const override;
|
|
93
92
|
|
|
94
93
|
void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
|
|
95
94
|
const override;
|
|
@@ -44,7 +44,7 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(
|
|
|
44
44
|
MetricType metric,
|
|
45
45
|
int bbs)
|
|
46
46
|
: IndexIVFFastScan(quantizer, d, nlist, 0, metric), pq(d, M, nbits) {
|
|
47
|
-
by_residual = false; // set to false by default because it's
|
|
47
|
+
by_residual = false; // set to false by default because it's faster
|
|
48
48
|
|
|
49
49
|
init_fastscan(M, nbits, nlist, metric, bbs);
|
|
50
50
|
}
|
|
@@ -106,54 +106,22 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs)
|
|
|
106
106
|
* Training
|
|
107
107
|
*********************************************************/
|
|
108
108
|
|
|
109
|
-
void IndexIVFPQFastScan::
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
pq.cp.max_points_per_centroid * pq.ksub,
|
|
114
|
-
x_in,
|
|
115
|
-
verbose,
|
|
116
|
-
pq.cp.seed);
|
|
117
|
-
|
|
118
|
-
std::unique_ptr<float[]> del_x;
|
|
119
|
-
if (x != x_in) {
|
|
120
|
-
del_x.reset((float*)x);
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
const float* trainset;
|
|
124
|
-
AlignedTable<float> residuals;
|
|
125
|
-
|
|
126
|
-
if (by_residual) {
|
|
127
|
-
if (verbose)
|
|
128
|
-
printf("computing residuals\n");
|
|
129
|
-
std::vector<idx_t> assign(n);
|
|
130
|
-
quantizer->assign(n, x, assign.data());
|
|
131
|
-
residuals.resize(n * d);
|
|
132
|
-
for (idx_t i = 0; i < n; i++) {
|
|
133
|
-
quantizer->compute_residual(
|
|
134
|
-
x + i * d, residuals.data() + i * d, assign[i]);
|
|
135
|
-
}
|
|
136
|
-
trainset = residuals.data();
|
|
137
|
-
} else {
|
|
138
|
-
trainset = x;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
if (verbose) {
|
|
142
|
-
printf("training %zdx%zd product quantizer on "
|
|
143
|
-
"%" PRId64 " vectors in %dD\n",
|
|
144
|
-
pq.M,
|
|
145
|
-
pq.ksub,
|
|
146
|
-
n,
|
|
147
|
-
d);
|
|
148
|
-
}
|
|
109
|
+
void IndexIVFPQFastScan::train_encoder(
|
|
110
|
+
idx_t n,
|
|
111
|
+
const float* x,
|
|
112
|
+
const idx_t* assign) {
|
|
149
113
|
pq.verbose = verbose;
|
|
150
|
-
pq.train(n,
|
|
114
|
+
pq.train(n, x);
|
|
151
115
|
|
|
152
116
|
if (by_residual && metric_type == METRIC_L2) {
|
|
153
117
|
precompute_table();
|
|
154
118
|
}
|
|
155
119
|
}
|
|
156
120
|
|
|
121
|
+
idx_t IndexIVFPQFastScan::train_encoder_num_vectors() const {
|
|
122
|
+
return pq.cp.max_points_per_centroid * pq.ksub;
|
|
123
|
+
}
|
|
124
|
+
|
|
157
125
|
void IndexIVFPQFastScan::precompute_table() {
|
|
158
126
|
initialize_IVFPQ_precomputed_table(
|
|
159
127
|
use_precomputed_table,
|
|
@@ -203,7 +171,7 @@ void IndexIVFPQFastScan::encode_vectors(
|
|
|
203
171
|
* Look-Up Table functions
|
|
204
172
|
*********************************************************/
|
|
205
173
|
|
|
206
|
-
void
|
|
174
|
+
void fvec_madd_simd(
|
|
207
175
|
size_t n,
|
|
208
176
|
const float* a,
|
|
209
177
|
float bf,
|
|
@@ -234,12 +202,12 @@ bool IndexIVFPQFastScan::lookup_table_is_3d() const {
|
|
|
234
202
|
void IndexIVFPQFastScan::compute_LUT(
|
|
235
203
|
size_t n,
|
|
236
204
|
const float* x,
|
|
237
|
-
const
|
|
238
|
-
const float* coarse_dis,
|
|
205
|
+
const CoarseQuantized& cq,
|
|
239
206
|
AlignedTable<float>& dis_tables,
|
|
240
207
|
AlignedTable<float>& biases) const {
|
|
241
208
|
size_t dim12 = pq.ksub * pq.M;
|
|
242
209
|
size_t d = pq.d;
|
|
210
|
+
size_t nprobe = this->nprobe;
|
|
243
211
|
|
|
244
212
|
if (by_residual) {
|
|
245
213
|
if (metric_type == METRIC_L2) {
|
|
@@ -247,7 +215,7 @@ void IndexIVFPQFastScan::compute_LUT(
|
|
|
247
215
|
|
|
248
216
|
if (use_precomputed_table == 1) {
|
|
249
217
|
biases.resize(n * nprobe);
|
|
250
|
-
memcpy(biases.get(),
|
|
218
|
+
memcpy(biases.get(), cq.dis, sizeof(float) * n * nprobe);
|
|
251
219
|
|
|
252
220
|
AlignedTable<float> ip_table(n * dim12);
|
|
253
221
|
pq.compute_inner_prod_tables(n, x, ip_table.get());
|
|
@@ -256,10 +224,10 @@ void IndexIVFPQFastScan::compute_LUT(
|
|
|
256
224
|
for (idx_t ij = 0; ij < n * nprobe; ij++) {
|
|
257
225
|
idx_t i = ij / nprobe;
|
|
258
226
|
float* tab = dis_tables.get() + ij * dim12;
|
|
259
|
-
idx_t cij =
|
|
227
|
+
idx_t cij = cq.ids[ij];
|
|
260
228
|
|
|
261
229
|
if (cij >= 0) {
|
|
262
|
-
|
|
230
|
+
fvec_madd_simd(
|
|
263
231
|
dim12,
|
|
264
232
|
precomputed_table.get() + cij * dim12,
|
|
265
233
|
-2,
|
|
@@ -281,7 +249,7 @@ void IndexIVFPQFastScan::compute_LUT(
|
|
|
281
249
|
for (idx_t ij = 0; ij < n * nprobe; ij++) {
|
|
282
250
|
idx_t i = ij / nprobe;
|
|
283
251
|
float* xij = &xrel[ij * d];
|
|
284
|
-
idx_t cij =
|
|
252
|
+
idx_t cij = cq.ids[ij];
|
|
285
253
|
|
|
286
254
|
if (cij >= 0) {
|
|
287
255
|
quantizer->compute_residual(x + i * d, xij, cij);
|
|
@@ -301,7 +269,7 @@ void IndexIVFPQFastScan::compute_LUT(
|
|
|
301
269
|
// compute_inner_prod_tables(pq, n, x, dis_tables.get());
|
|
302
270
|
|
|
303
271
|
biases.resize(n * nprobe);
|
|
304
|
-
memcpy(biases.get(),
|
|
272
|
+
memcpy(biases.get(), cq.dis, sizeof(float) * n * nprobe);
|
|
305
273
|
} else {
|
|
306
274
|
FAISS_THROW_FMT("metric %d not supported", metric_type);
|
|
307
275
|
}
|
|
@@ -54,7 +54,9 @@ struct IndexIVFPQFastScan : IndexIVFFastScan {
|
|
|
54
54
|
// built from an IndexIVFPQ
|
|
55
55
|
explicit IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs = 32);
|
|
56
56
|
|
|
57
|
-
void
|
|
57
|
+
void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
|
|
58
|
+
|
|
59
|
+
idx_t train_encoder_num_vectors() const override;
|
|
58
60
|
|
|
59
61
|
/// build precomputed table, possibly updating use_precomputed_table
|
|
60
62
|
void precompute_table();
|
|
@@ -75,8 +77,7 @@ struct IndexIVFPQFastScan : IndexIVFFastScan {
|
|
|
75
77
|
void compute_LUT(
|
|
76
78
|
size_t n,
|
|
77
79
|
const float* x,
|
|
78
|
-
const
|
|
79
|
-
const float* coarse_dis,
|
|
80
|
+
const CoarseQuantized& cq,
|
|
80
81
|
AlignedTable<float>& dis_tables,
|
|
81
82
|
AlignedTable<float>& biases) const override;
|
|
82
83
|
|