faiss 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/faiss/Clustering.cpp +32 -0
  6. data/vendor/faiss/faiss/Clustering.h +14 -0
  7. data/vendor/faiss/faiss/Index.h +1 -1
  8. data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
  9. data/vendor/faiss/faiss/Index2Layer.h +2 -16
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  11. data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
  12. data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
  13. data/vendor/faiss/faiss/IndexFlat.h +9 -15
  14. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  15. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  16. data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
  17. data/vendor/faiss/faiss/IndexIVF.h +25 -7
  18. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  19. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  20. data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
  21. data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
  22. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  23. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
  24. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
  25. data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
  26. data/vendor/faiss/faiss/IndexLSH.h +2 -15
  27. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
  28. data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
  29. data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
  30. data/vendor/faiss/faiss/IndexPQ.h +2 -17
  31. data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
  32. data/vendor/faiss/faiss/IndexRefine.h +10 -0
  33. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
  34. data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
  35. data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
  36. data/vendor/faiss/faiss/VectorTransform.h +3 -0
  37. data/vendor/faiss/faiss/clone_index.cpp +3 -2
  38. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
  39. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  40. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
  41. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
  42. data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
  43. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
  44. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
  45. data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
  46. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  47. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
  48. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
  49. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
  50. data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
  51. data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
  52. data/vendor/faiss/faiss/impl/io.cpp +1 -1
  53. data/vendor/faiss/faiss/impl/io_macros.h +20 -0
  54. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  55. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  56. data/vendor/faiss/faiss/index_factory.cpp +585 -414
  57. data/vendor/faiss/faiss/index_factory.h +3 -0
  58. data/vendor/faiss/faiss/utils/distances.cpp +4 -2
  59. data/vendor/faiss/faiss/utils/distances.h +36 -3
  60. data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
  61. data/vendor/faiss/faiss/utils/utils.h +1 -1
  62. metadata +12 -5
  63. data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -7,18 +7,19 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
- #include "faiss/impl/ResidualQuantizer.h"
11
- #include <faiss/impl/FaissAssert.h>
12
10
  #include <faiss/impl/ResidualQuantizer.h>
13
- #include "faiss/utils/utils.h"
14
11
 
12
+ #include <algorithm>
15
13
  #include <cstddef>
16
14
  #include <cstdio>
17
15
  #include <cstring>
18
16
  #include <memory>
19
17
 
20
- #include <algorithm>
18
+ #include <faiss/impl/FaissAssert.h>
19
+ #include <faiss/impl/ResidualQuantizer.h>
20
+ #include <faiss/utils/utils.h>
21
21
 
22
+ #include <faiss/Clustering.h>
22
23
  #include <faiss/IndexFlat.h>
23
24
  #include <faiss/VectorTransform.h>
24
25
  #include <faiss/impl/AuxIndexStructures.h>
@@ -26,13 +27,34 @@
26
27
  #include <faiss/utils/Heap.h>
27
28
  #include <faiss/utils/distances.h>
28
29
  #include <faiss/utils/hamming.h>
30
+ #include <faiss/utils/simdlib.h>
29
31
  #include <faiss/utils/utils.h>
30
32
 
33
+ extern "C" {
34
+
35
+ // general matrix multiplication
36
+ int sgemm_(
37
+ const char* transa,
38
+ const char* transb,
39
+ FINTEGER* m,
40
+ FINTEGER* n,
41
+ FINTEGER* k,
42
+ const float* alpha,
43
+ const float* a,
44
+ FINTEGER* lda,
45
+ const float* b,
46
+ FINTEGER* ldb,
47
+ float* beta,
48
+ float* c,
49
+ FINTEGER* ldc);
50
+ }
51
+
31
52
  namespace faiss {
32
53
 
33
54
  ResidualQuantizer::ResidualQuantizer()
34
55
  : train_type(Train_progressive_dim),
35
- max_beam_size(30),
56
+ max_beam_size(5),
57
+ use_beam_LUT(0),
36
58
  max_mem_distances(5 * (size_t(1) << 30)), // 5 GiB
37
59
  assign_index_factory(nullptr) {
38
60
  d = 0;
@@ -40,26 +62,24 @@ ResidualQuantizer::ResidualQuantizer()
40
62
  verbose = false;
41
63
  }
42
64
 
43
- ResidualQuantizer::ResidualQuantizer(size_t d, const std::vector<size_t>& nbits)
65
+ ResidualQuantizer::ResidualQuantizer(
66
+ size_t d,
67
+ const std::vector<size_t>& nbits,
68
+ Search_type_t search_type)
44
69
  : ResidualQuantizer() {
70
+ this->search_type = search_type;
45
71
  this->d = d;
46
72
  M = nbits.size();
47
73
  this->nbits = nbits;
48
74
  set_derived_values();
49
75
  }
50
76
 
51
- ResidualQuantizer::ResidualQuantizer(size_t d, size_t M, size_t nbits)
52
- : ResidualQuantizer(d, std::vector<size_t>(M, nbits)) {}
53
-
54
- namespace {
55
-
56
- void fvec_sub(size_t d, const float* a, const float* b, float* c) {
57
- for (size_t i = 0; i < d; i++) {
58
- c[i] = a[i] - b[i];
59
- }
60
- }
61
-
62
- } // anonymous namespace
77
+ ResidualQuantizer::ResidualQuantizer(
78
+ size_t d,
79
+ size_t M,
80
+ size_t nbits,
81
+ Search_type_t search_type)
82
+ : ResidualQuantizer(d, std::vector<size_t>(M, nbits), search_type) {}
63
83
 
64
84
  void beam_search_encode_step(
65
85
  size_t d,
@@ -90,7 +110,7 @@ void beam_search_encode_step(
90
110
  cent_ids.resize(n * beam_size * new_beam_size);
91
111
  if (assign_index->ntotal != 0) {
92
112
  // then we assume the codebooks are already added to the index
93
- FAISS_THROW_IF_NOT(assign_index->ntotal != K);
113
+ FAISS_THROW_IF_NOT(assign_index->ntotal == K);
94
114
  } else {
95
115
  assign_index->add(K, cent);
96
116
  }
@@ -208,6 +228,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
208
228
  std::vector<int32_t> codes;
209
229
  std::vector<float> distances;
210
230
  double t0 = getmillisecs();
231
+ double clustering_time = 0;
211
232
 
212
233
  for (int m = 0; m < M; m++) {
213
234
  int K = 1 << nbits[m];
@@ -224,7 +245,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
224
245
  }
225
246
  train_residuals = residuals1;
226
247
  }
227
- train_type_t tt = train_type_t(train_type & ~Train_top_beam);
248
+ train_type_t tt = train_type_t(train_type & 1023);
228
249
 
229
250
  std::vector<float> codebooks;
230
251
  float obj = 0;
@@ -235,6 +256,9 @@ void ResidualQuantizer::train(size_t n, const float* x) {
235
256
  } else {
236
257
  assign_index.reset(new IndexFlatL2(d));
237
258
  }
259
+
260
+ double t1 = getmillisecs();
261
+
238
262
  if (tt == Train_default) {
239
263
  Clustering clus(d, K, cp);
240
264
  clus.train(
@@ -256,6 +280,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
256
280
  } else {
257
281
  FAISS_THROW_MSG("train type not supported");
258
282
  }
283
+ clustering_time += (getmillisecs() - t1) / 1000;
259
284
 
260
285
  memcpy(this->codebooks.data() + codebook_offsets[m] * d,
261
286
  codebooks.data(),
@@ -268,21 +293,38 @@ void ResidualQuantizer::train(size_t n, const float* x) {
268
293
  std::vector<float> new_residuals(n * new_beam_size * d);
269
294
  std::vector<float> new_distances(n * new_beam_size);
270
295
 
271
- beam_search_encode_step(
272
- d,
273
- K,
274
- codebooks.data(),
275
- n,
276
- cur_beam_size,
277
- residuals.data(),
278
- m,
279
- codes.data(),
280
- new_beam_size,
281
- new_codes.data(),
282
- new_residuals.data(),
283
- new_distances.data(),
284
- assign_index.get());
296
+ size_t bs;
297
+ { // determine batch size
298
+ size_t mem = memory_per_point();
299
+ if (n > 1 && mem * n > max_mem_distances) {
300
+ // then split queries to reduce temp memory
301
+ bs = std::max(max_mem_distances / mem, size_t(1));
302
+ } else {
303
+ bs = n;
304
+ }
305
+ }
285
306
 
307
+ for (size_t i0 = 0; i0 < n; i0 += bs) {
308
+ size_t i1 = std::min(i0 + bs, n);
309
+
310
+ /* printf("i0: %ld i1: %ld K %d ntotal assign index %ld\n",
311
+ i0, i1, K, assign_index->ntotal); */
312
+
313
+ beam_search_encode_step(
314
+ d,
315
+ K,
316
+ codebooks.data(),
317
+ i1 - i0,
318
+ cur_beam_size,
319
+ residuals.data() + i0 * cur_beam_size * d,
320
+ m,
321
+ codes.data() + i0 * cur_beam_size * m,
322
+ new_beam_size,
323
+ new_codes.data() + i0 * new_beam_size * (m + 1),
324
+ new_residuals.data() + i0 * new_beam_size * d,
325
+ new_distances.data() + i0 * new_beam_size,
326
+ assign_index.get());
327
+ }
286
328
  codes.swap(new_codes);
287
329
  residuals.swap(new_residuals);
288
330
  distances.swap(new_distances);
@@ -293,20 +335,57 @@ void ResidualQuantizer::train(size_t n, const float* x) {
293
335
  }
294
336
 
295
337
  if (verbose) {
296
- printf("[%.3f s] train stage %d, %d bits, kmeans objective %g, "
297
- "total distance %g, beam_size %d->%d\n",
338
+ printf("[%.3f s, %.3f s clustering] train stage %d, %d bits, kmeans objective %g, "
339
+ "total distance %g, beam_size %d->%d (batch size %zd)\n",
298
340
  (getmillisecs() - t0) / 1000,
341
+ clustering_time,
299
342
  m,
300
343
  int(nbits[m]),
301
344
  obj,
302
345
  sum_distances,
303
346
  cur_beam_size,
304
- new_beam_size);
347
+ new_beam_size,
348
+ bs);
305
349
  }
306
350
  cur_beam_size = new_beam_size;
307
351
  }
308
352
 
353
+ // find min and max norms
354
+ std::vector<float> norms(n);
355
+
356
+ for (size_t i = 0; i < n; i++) {
357
+ norms[i] = fvec_L2sqr(
358
+ x + i * d, residuals.data() + i * cur_beam_size * d, d);
359
+ }
360
+
361
+ // fvec_norms_L2sqr(norms.data(), x, d, n);
362
+
363
+ norm_min = HUGE_VALF;
364
+ norm_max = -HUGE_VALF;
365
+ for (idx_t i = 0; i < n; i++) {
366
+ if (norms[i] < norm_min) {
367
+ norm_min = norms[i];
368
+ }
369
+ if (norms[i] > norm_max) {
370
+ norm_max = norms[i];
371
+ }
372
+ }
373
+
374
+ if (search_type == ST_norm_cqint8 || search_type == ST_norm_cqint4) {
375
+ size_t k = (1 << 8);
376
+ if (search_type == ST_norm_cqint4) {
377
+ k = (1 << 4);
378
+ }
379
+ Clustering1D clus(k);
380
+ clus.train_exact(n, norms.data());
381
+ qnorm.add(clus.k, clus.centroids.data());
382
+ }
383
+
309
384
  is_trained = true;
385
+
386
+ if (!(train_type & Skip_codebook_tables)) {
387
+ compute_codebook_tables();
388
+ }
310
389
  }
311
390
 
312
391
  size_t ResidualQuantizer::memory_per_point(int beam_size) const {
@@ -341,22 +420,76 @@ void ResidualQuantizer::compute_codes(
341
420
  return;
342
421
  }
343
422
 
344
- std::vector<float> residuals(max_beam_size * n * d);
345
423
  std::vector<int32_t> codes(max_beam_size * M * n);
424
+ std::vector<float> norms;
346
425
  std::vector<float> distances(max_beam_size * n);
347
426
 
348
- refine_beam(
349
- n,
350
- 1,
351
- x,
352
- max_beam_size,
353
- codes.data(),
354
- residuals.data(),
355
- distances.data());
427
+ if (use_beam_LUT == 0) {
428
+ std::vector<float> residuals(max_beam_size * n * d);
429
+
430
+ refine_beam(
431
+ n,
432
+ 1,
433
+ x,
434
+ max_beam_size,
435
+ codes.data(),
436
+ residuals.data(),
437
+ distances.data());
438
+
439
+ if (search_type == ST_norm_float || search_type == ST_norm_qint8 ||
440
+ search_type == ST_norm_qint4) {
441
+ norms.resize(n);
442
+ // recover the norms of reconstruction as
443
+ // || original_vector - residual ||^2
444
+ for (size_t i = 0; i < n; i++) {
445
+ norms[i] = fvec_L2sqr(
446
+ x + i * d, residuals.data() + i * max_beam_size * d, d);
447
+ }
448
+ }
449
+ } else if (use_beam_LUT == 1) {
450
+ FAISS_THROW_IF_NOT_MSG(
451
+ codebook_cross_products.size() ==
452
+ total_codebook_size * total_codebook_size,
453
+ "call compute_codebook_tables first");
454
+
455
+ std::vector<float> query_norms(n);
456
+ fvec_norms_L2sqr(query_norms.data(), x, d, n);
457
+
458
+ std::vector<float> query_cp(n * total_codebook_size);
459
+ {
460
+ FINTEGER ti = total_codebook_size, di = d, ni = n;
461
+ float zero = 0, one = 1;
462
+ sgemm_("Transposed",
463
+ "Not transposed",
464
+ &ti,
465
+ &ni,
466
+ &di,
467
+ &one,
468
+ codebooks.data(),
469
+ &di,
470
+ x,
471
+ &di,
472
+ &zero,
473
+ query_cp.data(),
474
+ &ti);
475
+ }
356
476
 
477
+ refine_beam_LUT(
478
+ n,
479
+ query_norms.data(),
480
+ query_cp.data(),
481
+ max_beam_size,
482
+ codes.data(),
483
+ distances.data());
484
+ }
357
485
  // pack only the first code of the beam (hence the ld_codes=M *
358
486
  // max_beam_size)
359
- pack_codes(n, codes.data(), codes_out, M * max_beam_size);
487
+ pack_codes(
488
+ n,
489
+ codes.data(),
490
+ codes_out,
491
+ M * max_beam_size,
492
+ norms.size() > 0 ? norms.data() : nullptr);
360
493
  }
361
494
 
362
495
  void ResidualQuantizer::refine_beam(
@@ -445,4 +578,181 @@ void ResidualQuantizer::refine_beam(
445
578
  }
446
579
  }
447
580
 
581
+ /*******************************************************************
582
+ * Functions using the dot products between codebook entries
583
+ *******************************************************************/
584
+
585
+ void ResidualQuantizer::compute_codebook_tables() {
586
+ codebook_cross_products.resize(total_codebook_size * total_codebook_size);
587
+ cent_norms.resize(total_codebook_size);
588
+ // stricly speaking we could use ssyrk
589
+ {
590
+ FINTEGER ni = total_codebook_size;
591
+ FINTEGER di = d;
592
+ float zero = 0, one = 1;
593
+ sgemm_("Transposed",
594
+ "Not transposed",
595
+ &ni,
596
+ &ni,
597
+ &di,
598
+ &one,
599
+ codebooks.data(),
600
+ &di,
601
+ codebooks.data(),
602
+ &di,
603
+ &zero,
604
+ codebook_cross_products.data(),
605
+ &ni);
606
+ }
607
+ for (size_t i = 0; i < total_codebook_size; i++) {
608
+ cent_norms[i] = codebook_cross_products[i + i * total_codebook_size];
609
+ }
610
+ }
611
+
612
+ void beam_search_encode_step_tab(
613
+ size_t K,
614
+ size_t n,
615
+ size_t beam_size, // input sizes
616
+ const float* codebook_cross_norms, // size K * ldc
617
+ size_t ldc, // >= K
618
+ const uint64_t* codebook_offsets, // m
619
+ const float* query_cp, // size n * ldqc
620
+ size_t ldqc, // >= K
621
+ const float* cent_norms_i, // size K
622
+ size_t m,
623
+ const int32_t* codes, // n * beam_size * m
624
+ const float* distances, // n * beam_size
625
+ size_t new_beam_size,
626
+ int32_t* new_codes, // n * new_beam_size * (m + 1)
627
+ float* new_distances) // n * new_beam_size
628
+ {
629
+ FAISS_THROW_IF_NOT(ldc >= K);
630
+
631
+ #pragma omp parallel for if (n > 100)
632
+ for (int64_t i = 0; i < n; i++) {
633
+ std::vector<float> cent_distances(beam_size * K);
634
+ std::vector<float> cd_common(K);
635
+
636
+ const int32_t* codes_i = codes + i * m * beam_size;
637
+ const float* query_cp_i = query_cp + i * ldqc;
638
+ const float* distances_i = distances + i * beam_size;
639
+
640
+ for (size_t k = 0; k < K; k++) {
641
+ cd_common[k] = cent_norms_i[k] - 2 * query_cp_i[k];
642
+ }
643
+
644
+ for (size_t b = 0; b < beam_size; b++) {
645
+ std::vector<float> dp(K);
646
+
647
+ for (size_t m1 = 0; m1 < m; m1++) {
648
+ size_t c = codes_i[b * m + m1];
649
+ const float* cb =
650
+ &codebook_cross_norms[(codebook_offsets[m1] + c) * ldc];
651
+ fvec_add(K, cb, dp.data(), dp.data());
652
+ }
653
+
654
+ for (size_t k = 0; k < K; k++) {
655
+ cent_distances[b * K + k] =
656
+ distances_i[b] + cd_common[k] + 2 * dp[k];
657
+ }
658
+ }
659
+
660
+ using C = CMax<float, int>;
661
+ int32_t* new_codes_i = new_codes + i * (m + 1) * new_beam_size;
662
+ float* new_distances_i = new_distances + i * new_beam_size;
663
+
664
+ const float* cent_distances_i = cent_distances.data();
665
+
666
+ // then we have to select the best results
667
+ for (int i = 0; i < new_beam_size; i++) {
668
+ new_distances_i[i] = C::neutral();
669
+ }
670
+ std::vector<int> perm(new_beam_size, -1);
671
+ heap_addn<C>(
672
+ new_beam_size,
673
+ new_distances_i,
674
+ perm.data(),
675
+ cent_distances_i,
676
+ nullptr,
677
+ beam_size * K);
678
+ heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
679
+
680
+ for (int j = 0; j < new_beam_size; j++) {
681
+ int js = perm[j] / K;
682
+ int ls = perm[j] % K;
683
+ if (m > 0) {
684
+ memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
685
+ }
686
+ new_codes_i[m] = ls;
687
+ new_codes_i += m + 1;
688
+ }
689
+ }
690
+ }
691
+
692
+ void ResidualQuantizer::refine_beam_LUT(
693
+ size_t n,
694
+ const float* query_norms, // size n
695
+ const float* query_cp, //
696
+ int out_beam_size,
697
+ int32_t* out_codes,
698
+ float* out_distances) const {
699
+ int beam_size = 1;
700
+
701
+ std::vector<int32_t> codes;
702
+ std::vector<float> distances(query_norms, query_norms + n);
703
+ double t0 = getmillisecs();
704
+
705
+ for (int m = 0; m < M; m++) {
706
+ int K = 1 << nbits[m];
707
+
708
+ int new_beam_size = std::min(beam_size * K, out_beam_size);
709
+ std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
710
+ std::vector<float> new_distances(n * new_beam_size);
711
+
712
+ beam_search_encode_step_tab(
713
+ K,
714
+ n,
715
+ beam_size,
716
+ codebook_cross_products.data() + codebook_offsets[m],
717
+ total_codebook_size,
718
+ codebook_offsets.data(),
719
+ query_cp + codebook_offsets[m],
720
+ total_codebook_size,
721
+ cent_norms.data() + codebook_offsets[m],
722
+ m,
723
+ codes.data(),
724
+ distances.data(),
725
+ new_beam_size,
726
+ new_codes.data(),
727
+ new_distances.data());
728
+
729
+ codes.swap(new_codes);
730
+ distances.swap(new_distances);
731
+ beam_size = new_beam_size;
732
+
733
+ if (verbose) {
734
+ float sum_distances = 0;
735
+ for (int j = 0; j < distances.size(); j++) {
736
+ sum_distances += distances[j];
737
+ }
738
+ printf("[%.3f s] encode stage %d, %d bits, "
739
+ "total error %g, beam_size %d\n",
740
+ (getmillisecs() - t0) / 1000,
741
+ m,
742
+ int(nbits[m]),
743
+ sum_distances,
744
+ beam_size);
745
+ }
746
+ }
747
+
748
+ if (out_codes) {
749
+ memcpy(out_codes, codes.data(), codes.size() * sizeof(codes[0]));
750
+ }
751
+ if (out_distances) {
752
+ memcpy(out_distances,
753
+ distances.data(),
754
+ distances.size() * sizeof(distances[0]));
755
+ }
756
+ }
757
+
448
758
  } // namespace faiss
@@ -25,18 +25,32 @@ namespace faiss {
25
25
  struct ResidualQuantizer : AdditiveQuantizer {
26
26
  /// initialization
27
27
  enum train_type_t {
28
- Train_default, ///< regular k-means
29
- Train_progressive_dim, ///< progressive dim clustering
28
+ Train_default = 0, ///< regular k-means
29
+ Train_progressive_dim = 1, ///< progressive dim clustering
30
+ Train_default_Train_top_beam = 1024,
31
+ Train_progressive_dim_Train_top_beam = 1025,
32
+ Train_default_Skip_codebook_tables = 2048,
33
+ Train_progressive_dim_Skip_codebook_tables = 2049,
34
+ Train_default_Train_top_beam_Skip_codebook_tables = 3072,
35
+ Train_progressive_dim_Train_top_beam_Skip_codebook_tables = 3073,
30
36
  };
31
37
 
38
+ train_type_t train_type;
39
+
32
40
  // set this bit on train_type if beam is to be trained only on the
33
41
  // first element of the beam (faster but less accurate)
34
42
  static const int Train_top_beam = 1024;
35
- train_type_t train_type;
43
+
44
+ // set this bit to not autmatically compute the codebook tables
45
+ // after training
46
+ static const int Skip_codebook_tables = 2048;
36
47
 
37
48
  /// beam size used for training and for encoding
38
49
  int max_beam_size;
39
50
 
51
+ /// use LUT for beam search
52
+ int use_beam_LUT;
53
+
40
54
  /// distance matrixes with beam search can get large, so use this
41
55
  /// to batch computations at encoding time.
42
56
  size_t max_mem_distances;
@@ -47,12 +61,16 @@ struct ResidualQuantizer : AdditiveQuantizer {
47
61
  /// if non-NULL, use this index for assignment
48
62
  ProgressiveDimIndexFactory* assign_index_factory;
49
63
 
50
- ResidualQuantizer(size_t d, const std::vector<size_t>& nbits);
64
+ ResidualQuantizer(
65
+ size_t d,
66
+ const std::vector<size_t>& nbits,
67
+ Search_type_t search_type = ST_decompress);
51
68
 
52
69
  ResidualQuantizer(
53
- size_t d, /* dimensionality of the input vectors */
54
- size_t M, /* number of subquantizers */
55
- size_t nbits); /* number of bit per subvector index */
70
+ size_t d, /* dimensionality of the input vectors */
71
+ size_t M, /* number of subquantizers */
72
+ size_t nbits, /* number of bit per subvector index */
73
+ Search_type_t search_type = ST_decompress);
56
74
 
57
75
  ResidualQuantizer();
58
76
 
@@ -85,12 +103,32 @@ struct ResidualQuantizer : AdditiveQuantizer {
85
103
  float* new_residuals = nullptr,
86
104
  float* new_distances = nullptr) const;
87
105
 
106
+ void refine_beam_LUT(
107
+ size_t n,
108
+ const float* query_norms,
109
+ const float* query_cp,
110
+ int new_beam_size,
111
+ int32_t* new_codes,
112
+ float* new_distances = nullptr) const;
113
+
88
114
  /** Beam search can consume a lot of memory. This function estimates the
89
115
  * amount of mem used by refine_beam to adjust the batch size
90
116
  *
91
117
  * @param beam_size if != -1, override the beam size
92
118
  */
93
119
  size_t memory_per_point(int beam_size = -1) const;
120
+
121
+ /** Cross products used in codebook tables
122
+ *
123
+ * These are used to keep trak of norms of centroids.
124
+ */
125
+ void compute_codebook_tables();
126
+
127
+ /// dot products of all codebook vectors with each other
128
+ /// size total_codebook_size * total_codebook_size
129
+ std::vector<float> codebook_cross_products;
130
+ /// norms of all vectors
131
+ std::vector<float> cent_norms;
94
132
  };
95
133
 
96
134
  /** Encode a residual by sampling from a centroid table.
@@ -127,4 +165,24 @@ void beam_search_encode_step(
127
165
  float* new_distances,
128
166
  Index* assign_index = nullptr);
129
167
 
168
+ /** Encode a set of vectors using their dot products with the codebooks
169
+ *
170
+ */
171
+ void beam_search_encode_step_tab(
172
+ size_t K,
173
+ size_t n,
174
+ size_t beam_size, // input sizes
175
+ const float* codebook_cross_norms, // size K * ldc
176
+ size_t ldc, // >= K
177
+ const uint64_t* codebook_offsets, // m
178
+ const float* query_cp, // size n * ldqc
179
+ size_t ldqc, // >= K
180
+ const float* cent_norms_i, // size K
181
+ size_t m,
182
+ const int32_t* codes, // n * beam_size * m
183
+ const float* distances, // n * beam_size
184
+ size_t new_beam_size,
185
+ int32_t* new_codes, // n * new_beam_size * (m + 1)
186
+ float* new_distances); // n * new_beam_size
187
+
130
188
  }; // namespace faiss
@@ -1335,12 +1335,9 @@ namespace {
1335
1335
  template <class DCClass>
1336
1336
  struct IVFSQScannerIP : InvertedListScanner {
1337
1337
  DCClass dc;
1338
- bool store_pairs, by_residual;
1338
+ bool by_residual;
1339
1339
 
1340
- size_t code_size;
1341
-
1342
- idx_t list_no; /// current list (set to 0 for Flat index
1343
- float accu0; /// added to all distances
1340
+ float accu0; /// added to all distances
1344
1341
 
1345
1342
  IVFSQScannerIP(
1346
1343
  int d,
@@ -1348,12 +1345,10 @@ struct IVFSQScannerIP : InvertedListScanner {
1348
1345
  size_t code_size,
1349
1346
  bool store_pairs,
1350
1347
  bool by_residual)
1351
- : dc(d, trained),
1352
- store_pairs(store_pairs),
1353
- by_residual(by_residual),
1354
- code_size(code_size),
1355
- list_no(0),
1356
- accu0(0) {}
1348
+ : dc(d, trained), by_residual(by_residual), accu0(0) {
1349
+ this->store_pairs = store_pairs;
1350
+ this->code_size = code_size;
1351
+ }
1357
1352
 
1358
1353
  void set_query(const float* query) override {
1359
1354
  dc.set_query(query);
@@ -1411,10 +1406,8 @@ template <class DCClass>
1411
1406
  struct IVFSQScannerL2 : InvertedListScanner {
1412
1407
  DCClass dc;
1413
1408
 
1414
- bool store_pairs, by_residual;
1415
- size_t code_size;
1409
+ bool by_residual;
1416
1410
  const Index* quantizer;
1417
- idx_t list_no; /// current inverted list
1418
1411
  const float* x; /// current query
1419
1412
 
1420
1413
  std::vector<float> tmp;
@@ -1427,13 +1420,13 @@ struct IVFSQScannerL2 : InvertedListScanner {
1427
1420
  bool store_pairs,
1428
1421
  bool by_residual)
1429
1422
  : dc(d, trained),
1430
- store_pairs(store_pairs),
1431
1423
  by_residual(by_residual),
1432
- code_size(code_size),
1433
1424
  quantizer(quantizer),
1434
- list_no(0),
1435
1425
  x(nullptr),
1436
- tmp(d) {}
1426
+ tmp(d) {
1427
+ this->store_pairs = store_pairs;
1428
+ this->code_size = code_size;
1429
+ }
1437
1430
 
1438
1431
  void set_query(const float* query) override {
1439
1432
  x = query;
@@ -1443,8 +1436,8 @@ struct IVFSQScannerL2 : InvertedListScanner {
1443
1436
  }
1444
1437
 
1445
1438
  void set_list(idx_t list_no, float /*coarse_dis*/) override {
1439
+ this->list_no = list_no;
1446
1440
  if (by_residual) {
1447
- this->list_no = list_no;
1448
1441
  // shift of x_in wrt centroid
1449
1442
  quantizer->compute_residual(x, tmp.data(), list_no);
1450
1443
  dc.set_query(tmp.data());