faiss 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +32 -0
- data/vendor/faiss/faiss/Clustering.h +14 -0
- data/vendor/faiss/faiss/Index.h +1 -1
- data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
- data/vendor/faiss/faiss/Index2Layer.h +2 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
- data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
- data/vendor/faiss/faiss/IndexFlat.h +9 -15
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
- data/vendor/faiss/faiss/IndexIVF.h +25 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
- data/vendor/faiss/faiss/IndexLSH.h +2 -15
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
- data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
- data/vendor/faiss/faiss/IndexPQ.h +2 -17
- data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
- data/vendor/faiss/faiss/IndexRefine.h +10 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
- data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
- data/vendor/faiss/faiss/VectorTransform.h +3 -0
- data/vendor/faiss/faiss/clone_index.cpp +3 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
- data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
- data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
- data/vendor/faiss/faiss/impl/io.cpp +1 -1
- data/vendor/faiss/faiss/impl/io_macros.h +20 -0
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/index_factory.cpp +585 -414
- data/vendor/faiss/faiss/index_factory.h +3 -0
- data/vendor/faiss/faiss/utils/distances.cpp +4 -2
- data/vendor/faiss/faiss/utils/distances.h +36 -3
- data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
- data/vendor/faiss/faiss/utils/utils.h +1 -1
- metadata +12 -5
- data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -7,18 +7,19 @@
|
|
7
7
|
|
8
8
|
// -*- c++ -*-
|
9
9
|
|
10
|
-
#include "faiss/impl/ResidualQuantizer.h"
|
11
|
-
#include <faiss/impl/FaissAssert.h>
|
12
10
|
#include <faiss/impl/ResidualQuantizer.h>
|
13
|
-
#include "faiss/utils/utils.h"
|
14
11
|
|
12
|
+
#include <algorithm>
|
15
13
|
#include <cstddef>
|
16
14
|
#include <cstdio>
|
17
15
|
#include <cstring>
|
18
16
|
#include <memory>
|
19
17
|
|
20
|
-
#include <
|
18
|
+
#include <faiss/impl/FaissAssert.h>
|
19
|
+
#include <faiss/impl/ResidualQuantizer.h>
|
20
|
+
#include <faiss/utils/utils.h>
|
21
21
|
|
22
|
+
#include <faiss/Clustering.h>
|
22
23
|
#include <faiss/IndexFlat.h>
|
23
24
|
#include <faiss/VectorTransform.h>
|
24
25
|
#include <faiss/impl/AuxIndexStructures.h>
|
@@ -26,13 +27,34 @@
|
|
26
27
|
#include <faiss/utils/Heap.h>
|
27
28
|
#include <faiss/utils/distances.h>
|
28
29
|
#include <faiss/utils/hamming.h>
|
30
|
+
#include <faiss/utils/simdlib.h>
|
29
31
|
#include <faiss/utils/utils.h>
|
30
32
|
|
33
|
+
extern "C" {
|
34
|
+
|
35
|
+
// general matrix multiplication
|
36
|
+
int sgemm_(
|
37
|
+
const char* transa,
|
38
|
+
const char* transb,
|
39
|
+
FINTEGER* m,
|
40
|
+
FINTEGER* n,
|
41
|
+
FINTEGER* k,
|
42
|
+
const float* alpha,
|
43
|
+
const float* a,
|
44
|
+
FINTEGER* lda,
|
45
|
+
const float* b,
|
46
|
+
FINTEGER* ldb,
|
47
|
+
float* beta,
|
48
|
+
float* c,
|
49
|
+
FINTEGER* ldc);
|
50
|
+
}
|
51
|
+
|
31
52
|
namespace faiss {
|
32
53
|
|
33
54
|
ResidualQuantizer::ResidualQuantizer()
|
34
55
|
: train_type(Train_progressive_dim),
|
35
|
-
max_beam_size(
|
56
|
+
max_beam_size(5),
|
57
|
+
use_beam_LUT(0),
|
36
58
|
max_mem_distances(5 * (size_t(1) << 30)), // 5 GiB
|
37
59
|
assign_index_factory(nullptr) {
|
38
60
|
d = 0;
|
@@ -40,26 +62,24 @@ ResidualQuantizer::ResidualQuantizer()
|
|
40
62
|
verbose = false;
|
41
63
|
}
|
42
64
|
|
43
|
-
ResidualQuantizer::ResidualQuantizer(
|
65
|
+
ResidualQuantizer::ResidualQuantizer(
|
66
|
+
size_t d,
|
67
|
+
const std::vector<size_t>& nbits,
|
68
|
+
Search_type_t search_type)
|
44
69
|
: ResidualQuantizer() {
|
70
|
+
this->search_type = search_type;
|
45
71
|
this->d = d;
|
46
72
|
M = nbits.size();
|
47
73
|
this->nbits = nbits;
|
48
74
|
set_derived_values();
|
49
75
|
}
|
50
76
|
|
51
|
-
ResidualQuantizer::ResidualQuantizer(
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
for (size_t i = 0; i < d; i++) {
|
58
|
-
c[i] = a[i] - b[i];
|
59
|
-
}
|
60
|
-
}
|
61
|
-
|
62
|
-
} // anonymous namespace
|
77
|
+
ResidualQuantizer::ResidualQuantizer(
|
78
|
+
size_t d,
|
79
|
+
size_t M,
|
80
|
+
size_t nbits,
|
81
|
+
Search_type_t search_type)
|
82
|
+
: ResidualQuantizer(d, std::vector<size_t>(M, nbits), search_type) {}
|
63
83
|
|
64
84
|
void beam_search_encode_step(
|
65
85
|
size_t d,
|
@@ -90,7 +110,7 @@ void beam_search_encode_step(
|
|
90
110
|
cent_ids.resize(n * beam_size * new_beam_size);
|
91
111
|
if (assign_index->ntotal != 0) {
|
92
112
|
// then we assume the codebooks are already added to the index
|
93
|
-
FAISS_THROW_IF_NOT(assign_index->ntotal
|
113
|
+
FAISS_THROW_IF_NOT(assign_index->ntotal == K);
|
94
114
|
} else {
|
95
115
|
assign_index->add(K, cent);
|
96
116
|
}
|
@@ -208,6 +228,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
208
228
|
std::vector<int32_t> codes;
|
209
229
|
std::vector<float> distances;
|
210
230
|
double t0 = getmillisecs();
|
231
|
+
double clustering_time = 0;
|
211
232
|
|
212
233
|
for (int m = 0; m < M; m++) {
|
213
234
|
int K = 1 << nbits[m];
|
@@ -224,7 +245,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
224
245
|
}
|
225
246
|
train_residuals = residuals1;
|
226
247
|
}
|
227
|
-
train_type_t tt = train_type_t(train_type &
|
248
|
+
train_type_t tt = train_type_t(train_type & 1023);
|
228
249
|
|
229
250
|
std::vector<float> codebooks;
|
230
251
|
float obj = 0;
|
@@ -235,6 +256,9 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
235
256
|
} else {
|
236
257
|
assign_index.reset(new IndexFlatL2(d));
|
237
258
|
}
|
259
|
+
|
260
|
+
double t1 = getmillisecs();
|
261
|
+
|
238
262
|
if (tt == Train_default) {
|
239
263
|
Clustering clus(d, K, cp);
|
240
264
|
clus.train(
|
@@ -256,6 +280,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
256
280
|
} else {
|
257
281
|
FAISS_THROW_MSG("train type not supported");
|
258
282
|
}
|
283
|
+
clustering_time += (getmillisecs() - t1) / 1000;
|
259
284
|
|
260
285
|
memcpy(this->codebooks.data() + codebook_offsets[m] * d,
|
261
286
|
codebooks.data(),
|
@@ -268,21 +293,38 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
268
293
|
std::vector<float> new_residuals(n * new_beam_size * d);
|
269
294
|
std::vector<float> new_distances(n * new_beam_size);
|
270
295
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
new_codes.data(),
|
282
|
-
new_residuals.data(),
|
283
|
-
new_distances.data(),
|
284
|
-
assign_index.get());
|
296
|
+
size_t bs;
|
297
|
+
{ // determine batch size
|
298
|
+
size_t mem = memory_per_point();
|
299
|
+
if (n > 1 && mem * n > max_mem_distances) {
|
300
|
+
// then split queries to reduce temp memory
|
301
|
+
bs = std::max(max_mem_distances / mem, size_t(1));
|
302
|
+
} else {
|
303
|
+
bs = n;
|
304
|
+
}
|
305
|
+
}
|
285
306
|
|
307
|
+
for (size_t i0 = 0; i0 < n; i0 += bs) {
|
308
|
+
size_t i1 = std::min(i0 + bs, n);
|
309
|
+
|
310
|
+
/* printf("i0: %ld i1: %ld K %d ntotal assign index %ld\n",
|
311
|
+
i0, i1, K, assign_index->ntotal); */
|
312
|
+
|
313
|
+
beam_search_encode_step(
|
314
|
+
d,
|
315
|
+
K,
|
316
|
+
codebooks.data(),
|
317
|
+
i1 - i0,
|
318
|
+
cur_beam_size,
|
319
|
+
residuals.data() + i0 * cur_beam_size * d,
|
320
|
+
m,
|
321
|
+
codes.data() + i0 * cur_beam_size * m,
|
322
|
+
new_beam_size,
|
323
|
+
new_codes.data() + i0 * new_beam_size * (m + 1),
|
324
|
+
new_residuals.data() + i0 * new_beam_size * d,
|
325
|
+
new_distances.data() + i0 * new_beam_size,
|
326
|
+
assign_index.get());
|
327
|
+
}
|
286
328
|
codes.swap(new_codes);
|
287
329
|
residuals.swap(new_residuals);
|
288
330
|
distances.swap(new_distances);
|
@@ -293,20 +335,57 @@ void ResidualQuantizer::train(size_t n, const float* x) {
|
|
293
335
|
}
|
294
336
|
|
295
337
|
if (verbose) {
|
296
|
-
printf("[%.3f s] train stage %d, %d bits, kmeans objective %g, "
|
297
|
-
"total distance %g, beam_size %d->%d\n",
|
338
|
+
printf("[%.3f s, %.3f s clustering] train stage %d, %d bits, kmeans objective %g, "
|
339
|
+
"total distance %g, beam_size %d->%d (batch size %zd)\n",
|
298
340
|
(getmillisecs() - t0) / 1000,
|
341
|
+
clustering_time,
|
299
342
|
m,
|
300
343
|
int(nbits[m]),
|
301
344
|
obj,
|
302
345
|
sum_distances,
|
303
346
|
cur_beam_size,
|
304
|
-
new_beam_size
|
347
|
+
new_beam_size,
|
348
|
+
bs);
|
305
349
|
}
|
306
350
|
cur_beam_size = new_beam_size;
|
307
351
|
}
|
308
352
|
|
353
|
+
// find min and max norms
|
354
|
+
std::vector<float> norms(n);
|
355
|
+
|
356
|
+
for (size_t i = 0; i < n; i++) {
|
357
|
+
norms[i] = fvec_L2sqr(
|
358
|
+
x + i * d, residuals.data() + i * cur_beam_size * d, d);
|
359
|
+
}
|
360
|
+
|
361
|
+
// fvec_norms_L2sqr(norms.data(), x, d, n);
|
362
|
+
|
363
|
+
norm_min = HUGE_VALF;
|
364
|
+
norm_max = -HUGE_VALF;
|
365
|
+
for (idx_t i = 0; i < n; i++) {
|
366
|
+
if (norms[i] < norm_min) {
|
367
|
+
norm_min = norms[i];
|
368
|
+
}
|
369
|
+
if (norms[i] > norm_max) {
|
370
|
+
norm_max = norms[i];
|
371
|
+
}
|
372
|
+
}
|
373
|
+
|
374
|
+
if (search_type == ST_norm_cqint8 || search_type == ST_norm_cqint4) {
|
375
|
+
size_t k = (1 << 8);
|
376
|
+
if (search_type == ST_norm_cqint4) {
|
377
|
+
k = (1 << 4);
|
378
|
+
}
|
379
|
+
Clustering1D clus(k);
|
380
|
+
clus.train_exact(n, norms.data());
|
381
|
+
qnorm.add(clus.k, clus.centroids.data());
|
382
|
+
}
|
383
|
+
|
309
384
|
is_trained = true;
|
385
|
+
|
386
|
+
if (!(train_type & Skip_codebook_tables)) {
|
387
|
+
compute_codebook_tables();
|
388
|
+
}
|
310
389
|
}
|
311
390
|
|
312
391
|
size_t ResidualQuantizer::memory_per_point(int beam_size) const {
|
@@ -341,22 +420,76 @@ void ResidualQuantizer::compute_codes(
|
|
341
420
|
return;
|
342
421
|
}
|
343
422
|
|
344
|
-
std::vector<float> residuals(max_beam_size * n * d);
|
345
423
|
std::vector<int32_t> codes(max_beam_size * M * n);
|
424
|
+
std::vector<float> norms;
|
346
425
|
std::vector<float> distances(max_beam_size * n);
|
347
426
|
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
427
|
+
if (use_beam_LUT == 0) {
|
428
|
+
std::vector<float> residuals(max_beam_size * n * d);
|
429
|
+
|
430
|
+
refine_beam(
|
431
|
+
n,
|
432
|
+
1,
|
433
|
+
x,
|
434
|
+
max_beam_size,
|
435
|
+
codes.data(),
|
436
|
+
residuals.data(),
|
437
|
+
distances.data());
|
438
|
+
|
439
|
+
if (search_type == ST_norm_float || search_type == ST_norm_qint8 ||
|
440
|
+
search_type == ST_norm_qint4) {
|
441
|
+
norms.resize(n);
|
442
|
+
// recover the norms of reconstruction as
|
443
|
+
// || original_vector - residual ||^2
|
444
|
+
for (size_t i = 0; i < n; i++) {
|
445
|
+
norms[i] = fvec_L2sqr(
|
446
|
+
x + i * d, residuals.data() + i * max_beam_size * d, d);
|
447
|
+
}
|
448
|
+
}
|
449
|
+
} else if (use_beam_LUT == 1) {
|
450
|
+
FAISS_THROW_IF_NOT_MSG(
|
451
|
+
codebook_cross_products.size() ==
|
452
|
+
total_codebook_size * total_codebook_size,
|
453
|
+
"call compute_codebook_tables first");
|
454
|
+
|
455
|
+
std::vector<float> query_norms(n);
|
456
|
+
fvec_norms_L2sqr(query_norms.data(), x, d, n);
|
457
|
+
|
458
|
+
std::vector<float> query_cp(n * total_codebook_size);
|
459
|
+
{
|
460
|
+
FINTEGER ti = total_codebook_size, di = d, ni = n;
|
461
|
+
float zero = 0, one = 1;
|
462
|
+
sgemm_("Transposed",
|
463
|
+
"Not transposed",
|
464
|
+
&ti,
|
465
|
+
&ni,
|
466
|
+
&di,
|
467
|
+
&one,
|
468
|
+
codebooks.data(),
|
469
|
+
&di,
|
470
|
+
x,
|
471
|
+
&di,
|
472
|
+
&zero,
|
473
|
+
query_cp.data(),
|
474
|
+
&ti);
|
475
|
+
}
|
356
476
|
|
477
|
+
refine_beam_LUT(
|
478
|
+
n,
|
479
|
+
query_norms.data(),
|
480
|
+
query_cp.data(),
|
481
|
+
max_beam_size,
|
482
|
+
codes.data(),
|
483
|
+
distances.data());
|
484
|
+
}
|
357
485
|
// pack only the first code of the beam (hence the ld_codes=M *
|
358
486
|
// max_beam_size)
|
359
|
-
pack_codes(
|
487
|
+
pack_codes(
|
488
|
+
n,
|
489
|
+
codes.data(),
|
490
|
+
codes_out,
|
491
|
+
M * max_beam_size,
|
492
|
+
norms.size() > 0 ? norms.data() : nullptr);
|
360
493
|
}
|
361
494
|
|
362
495
|
void ResidualQuantizer::refine_beam(
|
@@ -445,4 +578,181 @@ void ResidualQuantizer::refine_beam(
|
|
445
578
|
}
|
446
579
|
}
|
447
580
|
|
581
|
+
/*******************************************************************
|
582
|
+
* Functions using the dot products between codebook entries
|
583
|
+
*******************************************************************/
|
584
|
+
|
585
|
+
void ResidualQuantizer::compute_codebook_tables() {
|
586
|
+
codebook_cross_products.resize(total_codebook_size * total_codebook_size);
|
587
|
+
cent_norms.resize(total_codebook_size);
|
588
|
+
// stricly speaking we could use ssyrk
|
589
|
+
{
|
590
|
+
FINTEGER ni = total_codebook_size;
|
591
|
+
FINTEGER di = d;
|
592
|
+
float zero = 0, one = 1;
|
593
|
+
sgemm_("Transposed",
|
594
|
+
"Not transposed",
|
595
|
+
&ni,
|
596
|
+
&ni,
|
597
|
+
&di,
|
598
|
+
&one,
|
599
|
+
codebooks.data(),
|
600
|
+
&di,
|
601
|
+
codebooks.data(),
|
602
|
+
&di,
|
603
|
+
&zero,
|
604
|
+
codebook_cross_products.data(),
|
605
|
+
&ni);
|
606
|
+
}
|
607
|
+
for (size_t i = 0; i < total_codebook_size; i++) {
|
608
|
+
cent_norms[i] = codebook_cross_products[i + i * total_codebook_size];
|
609
|
+
}
|
610
|
+
}
|
611
|
+
|
612
|
+
void beam_search_encode_step_tab(
|
613
|
+
size_t K,
|
614
|
+
size_t n,
|
615
|
+
size_t beam_size, // input sizes
|
616
|
+
const float* codebook_cross_norms, // size K * ldc
|
617
|
+
size_t ldc, // >= K
|
618
|
+
const uint64_t* codebook_offsets, // m
|
619
|
+
const float* query_cp, // size n * ldqc
|
620
|
+
size_t ldqc, // >= K
|
621
|
+
const float* cent_norms_i, // size K
|
622
|
+
size_t m,
|
623
|
+
const int32_t* codes, // n * beam_size * m
|
624
|
+
const float* distances, // n * beam_size
|
625
|
+
size_t new_beam_size,
|
626
|
+
int32_t* new_codes, // n * new_beam_size * (m + 1)
|
627
|
+
float* new_distances) // n * new_beam_size
|
628
|
+
{
|
629
|
+
FAISS_THROW_IF_NOT(ldc >= K);
|
630
|
+
|
631
|
+
#pragma omp parallel for if (n > 100)
|
632
|
+
for (int64_t i = 0; i < n; i++) {
|
633
|
+
std::vector<float> cent_distances(beam_size * K);
|
634
|
+
std::vector<float> cd_common(K);
|
635
|
+
|
636
|
+
const int32_t* codes_i = codes + i * m * beam_size;
|
637
|
+
const float* query_cp_i = query_cp + i * ldqc;
|
638
|
+
const float* distances_i = distances + i * beam_size;
|
639
|
+
|
640
|
+
for (size_t k = 0; k < K; k++) {
|
641
|
+
cd_common[k] = cent_norms_i[k] - 2 * query_cp_i[k];
|
642
|
+
}
|
643
|
+
|
644
|
+
for (size_t b = 0; b < beam_size; b++) {
|
645
|
+
std::vector<float> dp(K);
|
646
|
+
|
647
|
+
for (size_t m1 = 0; m1 < m; m1++) {
|
648
|
+
size_t c = codes_i[b * m + m1];
|
649
|
+
const float* cb =
|
650
|
+
&codebook_cross_norms[(codebook_offsets[m1] + c) * ldc];
|
651
|
+
fvec_add(K, cb, dp.data(), dp.data());
|
652
|
+
}
|
653
|
+
|
654
|
+
for (size_t k = 0; k < K; k++) {
|
655
|
+
cent_distances[b * K + k] =
|
656
|
+
distances_i[b] + cd_common[k] + 2 * dp[k];
|
657
|
+
}
|
658
|
+
}
|
659
|
+
|
660
|
+
using C = CMax<float, int>;
|
661
|
+
int32_t* new_codes_i = new_codes + i * (m + 1) * new_beam_size;
|
662
|
+
float* new_distances_i = new_distances + i * new_beam_size;
|
663
|
+
|
664
|
+
const float* cent_distances_i = cent_distances.data();
|
665
|
+
|
666
|
+
// then we have to select the best results
|
667
|
+
for (int i = 0; i < new_beam_size; i++) {
|
668
|
+
new_distances_i[i] = C::neutral();
|
669
|
+
}
|
670
|
+
std::vector<int> perm(new_beam_size, -1);
|
671
|
+
heap_addn<C>(
|
672
|
+
new_beam_size,
|
673
|
+
new_distances_i,
|
674
|
+
perm.data(),
|
675
|
+
cent_distances_i,
|
676
|
+
nullptr,
|
677
|
+
beam_size * K);
|
678
|
+
heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
|
679
|
+
|
680
|
+
for (int j = 0; j < new_beam_size; j++) {
|
681
|
+
int js = perm[j] / K;
|
682
|
+
int ls = perm[j] % K;
|
683
|
+
if (m > 0) {
|
684
|
+
memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
|
685
|
+
}
|
686
|
+
new_codes_i[m] = ls;
|
687
|
+
new_codes_i += m + 1;
|
688
|
+
}
|
689
|
+
}
|
690
|
+
}
|
691
|
+
|
692
|
+
void ResidualQuantizer::refine_beam_LUT(
|
693
|
+
size_t n,
|
694
|
+
const float* query_norms, // size n
|
695
|
+
const float* query_cp, //
|
696
|
+
int out_beam_size,
|
697
|
+
int32_t* out_codes,
|
698
|
+
float* out_distances) const {
|
699
|
+
int beam_size = 1;
|
700
|
+
|
701
|
+
std::vector<int32_t> codes;
|
702
|
+
std::vector<float> distances(query_norms, query_norms + n);
|
703
|
+
double t0 = getmillisecs();
|
704
|
+
|
705
|
+
for (int m = 0; m < M; m++) {
|
706
|
+
int K = 1 << nbits[m];
|
707
|
+
|
708
|
+
int new_beam_size = std::min(beam_size * K, out_beam_size);
|
709
|
+
std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
|
710
|
+
std::vector<float> new_distances(n * new_beam_size);
|
711
|
+
|
712
|
+
beam_search_encode_step_tab(
|
713
|
+
K,
|
714
|
+
n,
|
715
|
+
beam_size,
|
716
|
+
codebook_cross_products.data() + codebook_offsets[m],
|
717
|
+
total_codebook_size,
|
718
|
+
codebook_offsets.data(),
|
719
|
+
query_cp + codebook_offsets[m],
|
720
|
+
total_codebook_size,
|
721
|
+
cent_norms.data() + codebook_offsets[m],
|
722
|
+
m,
|
723
|
+
codes.data(),
|
724
|
+
distances.data(),
|
725
|
+
new_beam_size,
|
726
|
+
new_codes.data(),
|
727
|
+
new_distances.data());
|
728
|
+
|
729
|
+
codes.swap(new_codes);
|
730
|
+
distances.swap(new_distances);
|
731
|
+
beam_size = new_beam_size;
|
732
|
+
|
733
|
+
if (verbose) {
|
734
|
+
float sum_distances = 0;
|
735
|
+
for (int j = 0; j < distances.size(); j++) {
|
736
|
+
sum_distances += distances[j];
|
737
|
+
}
|
738
|
+
printf("[%.3f s] encode stage %d, %d bits, "
|
739
|
+
"total error %g, beam_size %d\n",
|
740
|
+
(getmillisecs() - t0) / 1000,
|
741
|
+
m,
|
742
|
+
int(nbits[m]),
|
743
|
+
sum_distances,
|
744
|
+
beam_size);
|
745
|
+
}
|
746
|
+
}
|
747
|
+
|
748
|
+
if (out_codes) {
|
749
|
+
memcpy(out_codes, codes.data(), codes.size() * sizeof(codes[0]));
|
750
|
+
}
|
751
|
+
if (out_distances) {
|
752
|
+
memcpy(out_distances,
|
753
|
+
distances.data(),
|
754
|
+
distances.size() * sizeof(distances[0]));
|
755
|
+
}
|
756
|
+
}
|
757
|
+
|
448
758
|
} // namespace faiss
|
@@ -25,18 +25,32 @@ namespace faiss {
|
|
25
25
|
struct ResidualQuantizer : AdditiveQuantizer {
|
26
26
|
/// initialization
|
27
27
|
enum train_type_t {
|
28
|
-
Train_default, ///< regular k-means
|
29
|
-
Train_progressive_dim, ///< progressive dim clustering
|
28
|
+
Train_default = 0, ///< regular k-means
|
29
|
+
Train_progressive_dim = 1, ///< progressive dim clustering
|
30
|
+
Train_default_Train_top_beam = 1024,
|
31
|
+
Train_progressive_dim_Train_top_beam = 1025,
|
32
|
+
Train_default_Skip_codebook_tables = 2048,
|
33
|
+
Train_progressive_dim_Skip_codebook_tables = 2049,
|
34
|
+
Train_default_Train_top_beam_Skip_codebook_tables = 3072,
|
35
|
+
Train_progressive_dim_Train_top_beam_Skip_codebook_tables = 3073,
|
30
36
|
};
|
31
37
|
|
38
|
+
train_type_t train_type;
|
39
|
+
|
32
40
|
// set this bit on train_type if beam is to be trained only on the
|
33
41
|
// first element of the beam (faster but less accurate)
|
34
42
|
static const int Train_top_beam = 1024;
|
35
|
-
|
43
|
+
|
44
|
+
// set this bit to not autmatically compute the codebook tables
|
45
|
+
// after training
|
46
|
+
static const int Skip_codebook_tables = 2048;
|
36
47
|
|
37
48
|
/// beam size used for training and for encoding
|
38
49
|
int max_beam_size;
|
39
50
|
|
51
|
+
/// use LUT for beam search
|
52
|
+
int use_beam_LUT;
|
53
|
+
|
40
54
|
/// distance matrixes with beam search can get large, so use this
|
41
55
|
/// to batch computations at encoding time.
|
42
56
|
size_t max_mem_distances;
|
@@ -47,12 +61,16 @@ struct ResidualQuantizer : AdditiveQuantizer {
|
|
47
61
|
/// if non-NULL, use this index for assignment
|
48
62
|
ProgressiveDimIndexFactory* assign_index_factory;
|
49
63
|
|
50
|
-
ResidualQuantizer(
|
64
|
+
ResidualQuantizer(
|
65
|
+
size_t d,
|
66
|
+
const std::vector<size_t>& nbits,
|
67
|
+
Search_type_t search_type = ST_decompress);
|
51
68
|
|
52
69
|
ResidualQuantizer(
|
53
|
-
size_t d,
|
54
|
-
size_t M,
|
55
|
-
size_t nbits
|
70
|
+
size_t d, /* dimensionality of the input vectors */
|
71
|
+
size_t M, /* number of subquantizers */
|
72
|
+
size_t nbits, /* number of bit per subvector index */
|
73
|
+
Search_type_t search_type = ST_decompress);
|
56
74
|
|
57
75
|
ResidualQuantizer();
|
58
76
|
|
@@ -85,12 +103,32 @@ struct ResidualQuantizer : AdditiveQuantizer {
|
|
85
103
|
float* new_residuals = nullptr,
|
86
104
|
float* new_distances = nullptr) const;
|
87
105
|
|
106
|
+
void refine_beam_LUT(
|
107
|
+
size_t n,
|
108
|
+
const float* query_norms,
|
109
|
+
const float* query_cp,
|
110
|
+
int new_beam_size,
|
111
|
+
int32_t* new_codes,
|
112
|
+
float* new_distances = nullptr) const;
|
113
|
+
|
88
114
|
/** Beam search can consume a lot of memory. This function estimates the
|
89
115
|
* amount of mem used by refine_beam to adjust the batch size
|
90
116
|
*
|
91
117
|
* @param beam_size if != -1, override the beam size
|
92
118
|
*/
|
93
119
|
size_t memory_per_point(int beam_size = -1) const;
|
120
|
+
|
121
|
+
/** Cross products used in codebook tables
|
122
|
+
*
|
123
|
+
* These are used to keep trak of norms of centroids.
|
124
|
+
*/
|
125
|
+
void compute_codebook_tables();
|
126
|
+
|
127
|
+
/// dot products of all codebook vectors with each other
|
128
|
+
/// size total_codebook_size * total_codebook_size
|
129
|
+
std::vector<float> codebook_cross_products;
|
130
|
+
/// norms of all vectors
|
131
|
+
std::vector<float> cent_norms;
|
94
132
|
};
|
95
133
|
|
96
134
|
/** Encode a residual by sampling from a centroid table.
|
@@ -127,4 +165,24 @@ void beam_search_encode_step(
|
|
127
165
|
float* new_distances,
|
128
166
|
Index* assign_index = nullptr);
|
129
167
|
|
168
|
+
/** Encode a set of vectors using their dot products with the codebooks
|
169
|
+
*
|
170
|
+
*/
|
171
|
+
void beam_search_encode_step_tab(
|
172
|
+
size_t K,
|
173
|
+
size_t n,
|
174
|
+
size_t beam_size, // input sizes
|
175
|
+
const float* codebook_cross_norms, // size K * ldc
|
176
|
+
size_t ldc, // >= K
|
177
|
+
const uint64_t* codebook_offsets, // m
|
178
|
+
const float* query_cp, // size n * ldqc
|
179
|
+
size_t ldqc, // >= K
|
180
|
+
const float* cent_norms_i, // size K
|
181
|
+
size_t m,
|
182
|
+
const int32_t* codes, // n * beam_size * m
|
183
|
+
const float* distances, // n * beam_size
|
184
|
+
size_t new_beam_size,
|
185
|
+
int32_t* new_codes, // n * new_beam_size * (m + 1)
|
186
|
+
float* new_distances); // n * new_beam_size
|
187
|
+
|
130
188
|
}; // namespace faiss
|
@@ -1335,12 +1335,9 @@ namespace {
|
|
1335
1335
|
template <class DCClass>
|
1336
1336
|
struct IVFSQScannerIP : InvertedListScanner {
|
1337
1337
|
DCClass dc;
|
1338
|
-
bool
|
1338
|
+
bool by_residual;
|
1339
1339
|
|
1340
|
-
|
1341
|
-
|
1342
|
-
idx_t list_no; /// current list (set to 0 for Flat index
|
1343
|
-
float accu0; /// added to all distances
|
1340
|
+
float accu0; /// added to all distances
|
1344
1341
|
|
1345
1342
|
IVFSQScannerIP(
|
1346
1343
|
int d,
|
@@ -1348,12 +1345,10 @@ struct IVFSQScannerIP : InvertedListScanner {
|
|
1348
1345
|
size_t code_size,
|
1349
1346
|
bool store_pairs,
|
1350
1347
|
bool by_residual)
|
1351
|
-
: dc(d, trained),
|
1352
|
-
|
1353
|
-
|
1354
|
-
|
1355
|
-
list_no(0),
|
1356
|
-
accu0(0) {}
|
1348
|
+
: dc(d, trained), by_residual(by_residual), accu0(0) {
|
1349
|
+
this->store_pairs = store_pairs;
|
1350
|
+
this->code_size = code_size;
|
1351
|
+
}
|
1357
1352
|
|
1358
1353
|
void set_query(const float* query) override {
|
1359
1354
|
dc.set_query(query);
|
@@ -1411,10 +1406,8 @@ template <class DCClass>
|
|
1411
1406
|
struct IVFSQScannerL2 : InvertedListScanner {
|
1412
1407
|
DCClass dc;
|
1413
1408
|
|
1414
|
-
bool
|
1415
|
-
size_t code_size;
|
1409
|
+
bool by_residual;
|
1416
1410
|
const Index* quantizer;
|
1417
|
-
idx_t list_no; /// current inverted list
|
1418
1411
|
const float* x; /// current query
|
1419
1412
|
|
1420
1413
|
std::vector<float> tmp;
|
@@ -1427,13 +1420,13 @@ struct IVFSQScannerL2 : InvertedListScanner {
|
|
1427
1420
|
bool store_pairs,
|
1428
1421
|
bool by_residual)
|
1429
1422
|
: dc(d, trained),
|
1430
|
-
store_pairs(store_pairs),
|
1431
1423
|
by_residual(by_residual),
|
1432
|
-
code_size(code_size),
|
1433
1424
|
quantizer(quantizer),
|
1434
|
-
list_no(0),
|
1435
1425
|
x(nullptr),
|
1436
|
-
tmp(d) {
|
1426
|
+
tmp(d) {
|
1427
|
+
this->store_pairs = store_pairs;
|
1428
|
+
this->code_size = code_size;
|
1429
|
+
}
|
1437
1430
|
|
1438
1431
|
void set_query(const float* query) override {
|
1439
1432
|
x = query;
|
@@ -1443,8 +1436,8 @@ struct IVFSQScannerL2 : InvertedListScanner {
|
|
1443
1436
|
}
|
1444
1437
|
|
1445
1438
|
void set_list(idx_t list_no, float /*coarse_dis*/) override {
|
1439
|
+
this->list_no = list_no;
|
1446
1440
|
if (by_residual) {
|
1447
|
-
this->list_no = list_no;
|
1448
1441
|
// shift of x_in wrt centroid
|
1449
1442
|
quantizer->compute_residual(x, tmp.data(), list_no);
|
1450
1443
|
dc.set_query(tmp.data());
|