faiss 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/faiss/Clustering.cpp +32 -0
  6. data/vendor/faiss/faiss/Clustering.h +14 -0
  7. data/vendor/faiss/faiss/Index.h +1 -1
  8. data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
  9. data/vendor/faiss/faiss/Index2Layer.h +2 -16
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  11. data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
  12. data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
  13. data/vendor/faiss/faiss/IndexFlat.h +9 -15
  14. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  15. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  16. data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
  17. data/vendor/faiss/faiss/IndexIVF.h +25 -7
  18. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  19. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  20. data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
  21. data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
  22. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  23. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
  24. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
  25. data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
  26. data/vendor/faiss/faiss/IndexLSH.h +2 -15
  27. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
  28. data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
  29. data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
  30. data/vendor/faiss/faiss/IndexPQ.h +2 -17
  31. data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
  32. data/vendor/faiss/faiss/IndexRefine.h +10 -0
  33. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
  34. data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
  35. data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
  36. data/vendor/faiss/faiss/VectorTransform.h +3 -0
  37. data/vendor/faiss/faiss/clone_index.cpp +3 -2
  38. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
  39. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  40. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
  41. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
  42. data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
  43. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
  44. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
  45. data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
  46. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  47. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
  48. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
  49. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
  50. data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
  51. data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
  52. data/vendor/faiss/faiss/impl/io.cpp +1 -1
  53. data/vendor/faiss/faiss/impl/io_macros.h +20 -0
  54. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  55. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  56. data/vendor/faiss/faiss/index_factory.cpp +585 -414
  57. data/vendor/faiss/faiss/index_factory.h +3 -0
  58. data/vendor/faiss/faiss/utils/distances.cpp +4 -2
  59. data/vendor/faiss/faiss/utils/distances.h +36 -3
  60. data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
  61. data/vendor/faiss/faiss/utils/utils.h +1 -1
  62. metadata +12 -5
  63. data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -7,18 +7,19 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
- #include "faiss/impl/ResidualQuantizer.h"
11
- #include <faiss/impl/FaissAssert.h>
12
10
  #include <faiss/impl/ResidualQuantizer.h>
13
- #include "faiss/utils/utils.h"
14
11
 
12
+ #include <algorithm>
15
13
  #include <cstddef>
16
14
  #include <cstdio>
17
15
  #include <cstring>
18
16
  #include <memory>
19
17
 
20
- #include <algorithm>
18
+ #include <faiss/impl/FaissAssert.h>
19
+ #include <faiss/impl/ResidualQuantizer.h>
20
+ #include <faiss/utils/utils.h>
21
21
 
22
+ #include <faiss/Clustering.h>
22
23
  #include <faiss/IndexFlat.h>
23
24
  #include <faiss/VectorTransform.h>
24
25
  #include <faiss/impl/AuxIndexStructures.h>
@@ -26,13 +27,34 @@
26
27
  #include <faiss/utils/Heap.h>
27
28
  #include <faiss/utils/distances.h>
28
29
  #include <faiss/utils/hamming.h>
30
+ #include <faiss/utils/simdlib.h>
29
31
  #include <faiss/utils/utils.h>
30
32
 
33
+ extern "C" {
34
+
35
+ // general matrix multiplication
36
+ int sgemm_(
37
+ const char* transa,
38
+ const char* transb,
39
+ FINTEGER* m,
40
+ FINTEGER* n,
41
+ FINTEGER* k,
42
+ const float* alpha,
43
+ const float* a,
44
+ FINTEGER* lda,
45
+ const float* b,
46
+ FINTEGER* ldb,
47
+ float* beta,
48
+ float* c,
49
+ FINTEGER* ldc);
50
+ }
51
+
31
52
  namespace faiss {
32
53
 
33
54
  ResidualQuantizer::ResidualQuantizer()
34
55
  : train_type(Train_progressive_dim),
35
- max_beam_size(30),
56
+ max_beam_size(5),
57
+ use_beam_LUT(0),
36
58
  max_mem_distances(5 * (size_t(1) << 30)), // 5 GiB
37
59
  assign_index_factory(nullptr) {
38
60
  d = 0;
@@ -40,26 +62,24 @@ ResidualQuantizer::ResidualQuantizer()
40
62
  verbose = false;
41
63
  }
42
64
 
43
- ResidualQuantizer::ResidualQuantizer(size_t d, const std::vector<size_t>& nbits)
65
+ ResidualQuantizer::ResidualQuantizer(
66
+ size_t d,
67
+ const std::vector<size_t>& nbits,
68
+ Search_type_t search_type)
44
69
  : ResidualQuantizer() {
70
+ this->search_type = search_type;
45
71
  this->d = d;
46
72
  M = nbits.size();
47
73
  this->nbits = nbits;
48
74
  set_derived_values();
49
75
  }
50
76
 
51
- ResidualQuantizer::ResidualQuantizer(size_t d, size_t M, size_t nbits)
52
- : ResidualQuantizer(d, std::vector<size_t>(M, nbits)) {}
53
-
54
- namespace {
55
-
56
- void fvec_sub(size_t d, const float* a, const float* b, float* c) {
57
- for (size_t i = 0; i < d; i++) {
58
- c[i] = a[i] - b[i];
59
- }
60
- }
61
-
62
- } // anonymous namespace
77
+ ResidualQuantizer::ResidualQuantizer(
78
+ size_t d,
79
+ size_t M,
80
+ size_t nbits,
81
+ Search_type_t search_type)
82
+ : ResidualQuantizer(d, std::vector<size_t>(M, nbits), search_type) {}
63
83
 
64
84
  void beam_search_encode_step(
65
85
  size_t d,
@@ -90,7 +110,7 @@ void beam_search_encode_step(
90
110
  cent_ids.resize(n * beam_size * new_beam_size);
91
111
  if (assign_index->ntotal != 0) {
92
112
  // then we assume the codebooks are already added to the index
93
- FAISS_THROW_IF_NOT(assign_index->ntotal != K);
113
+ FAISS_THROW_IF_NOT(assign_index->ntotal == K);
94
114
  } else {
95
115
  assign_index->add(K, cent);
96
116
  }
@@ -208,6 +228,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
208
228
  std::vector<int32_t> codes;
209
229
  std::vector<float> distances;
210
230
  double t0 = getmillisecs();
231
+ double clustering_time = 0;
211
232
 
212
233
  for (int m = 0; m < M; m++) {
213
234
  int K = 1 << nbits[m];
@@ -224,7 +245,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
224
245
  }
225
246
  train_residuals = residuals1;
226
247
  }
227
- train_type_t tt = train_type_t(train_type & ~Train_top_beam);
248
+ train_type_t tt = train_type_t(train_type & 1023);
228
249
 
229
250
  std::vector<float> codebooks;
230
251
  float obj = 0;
@@ -235,6 +256,9 @@ void ResidualQuantizer::train(size_t n, const float* x) {
235
256
  } else {
236
257
  assign_index.reset(new IndexFlatL2(d));
237
258
  }
259
+
260
+ double t1 = getmillisecs();
261
+
238
262
  if (tt == Train_default) {
239
263
  Clustering clus(d, K, cp);
240
264
  clus.train(
@@ -256,6 +280,7 @@ void ResidualQuantizer::train(size_t n, const float* x) {
256
280
  } else {
257
281
  FAISS_THROW_MSG("train type not supported");
258
282
  }
283
+ clustering_time += (getmillisecs() - t1) / 1000;
259
284
 
260
285
  memcpy(this->codebooks.data() + codebook_offsets[m] * d,
261
286
  codebooks.data(),
@@ -268,21 +293,38 @@ void ResidualQuantizer::train(size_t n, const float* x) {
268
293
  std::vector<float> new_residuals(n * new_beam_size * d);
269
294
  std::vector<float> new_distances(n * new_beam_size);
270
295
 
271
- beam_search_encode_step(
272
- d,
273
- K,
274
- codebooks.data(),
275
- n,
276
- cur_beam_size,
277
- residuals.data(),
278
- m,
279
- codes.data(),
280
- new_beam_size,
281
- new_codes.data(),
282
- new_residuals.data(),
283
- new_distances.data(),
284
- assign_index.get());
296
+ size_t bs;
297
+ { // determine batch size
298
+ size_t mem = memory_per_point();
299
+ if (n > 1 && mem * n > max_mem_distances) {
300
+ // then split queries to reduce temp memory
301
+ bs = std::max(max_mem_distances / mem, size_t(1));
302
+ } else {
303
+ bs = n;
304
+ }
305
+ }
285
306
 
307
+ for (size_t i0 = 0; i0 < n; i0 += bs) {
308
+ size_t i1 = std::min(i0 + bs, n);
309
+
310
+ /* printf("i0: %ld i1: %ld K %d ntotal assign index %ld\n",
311
+ i0, i1, K, assign_index->ntotal); */
312
+
313
+ beam_search_encode_step(
314
+ d,
315
+ K,
316
+ codebooks.data(),
317
+ i1 - i0,
318
+ cur_beam_size,
319
+ residuals.data() + i0 * cur_beam_size * d,
320
+ m,
321
+ codes.data() + i0 * cur_beam_size * m,
322
+ new_beam_size,
323
+ new_codes.data() + i0 * new_beam_size * (m + 1),
324
+ new_residuals.data() + i0 * new_beam_size * d,
325
+ new_distances.data() + i0 * new_beam_size,
326
+ assign_index.get());
327
+ }
286
328
  codes.swap(new_codes);
287
329
  residuals.swap(new_residuals);
288
330
  distances.swap(new_distances);
@@ -293,20 +335,57 @@ void ResidualQuantizer::train(size_t n, const float* x) {
293
335
  }
294
336
 
295
337
  if (verbose) {
296
- printf("[%.3f s] train stage %d, %d bits, kmeans objective %g, "
297
- "total distance %g, beam_size %d->%d\n",
338
+ printf("[%.3f s, %.3f s clustering] train stage %d, %d bits, kmeans objective %g, "
339
+ "total distance %g, beam_size %d->%d (batch size %zd)\n",
298
340
  (getmillisecs() - t0) / 1000,
341
+ clustering_time,
299
342
  m,
300
343
  int(nbits[m]),
301
344
  obj,
302
345
  sum_distances,
303
346
  cur_beam_size,
304
- new_beam_size);
347
+ new_beam_size,
348
+ bs);
305
349
  }
306
350
  cur_beam_size = new_beam_size;
307
351
  }
308
352
 
353
+ // find min and max norms
354
+ std::vector<float> norms(n);
355
+
356
+ for (size_t i = 0; i < n; i++) {
357
+ norms[i] = fvec_L2sqr(
358
+ x + i * d, residuals.data() + i * cur_beam_size * d, d);
359
+ }
360
+
361
+ // fvec_norms_L2sqr(norms.data(), x, d, n);
362
+
363
+ norm_min = HUGE_VALF;
364
+ norm_max = -HUGE_VALF;
365
+ for (idx_t i = 0; i < n; i++) {
366
+ if (norms[i] < norm_min) {
367
+ norm_min = norms[i];
368
+ }
369
+ if (norms[i] > norm_max) {
370
+ norm_max = norms[i];
371
+ }
372
+ }
373
+
374
+ if (search_type == ST_norm_cqint8 || search_type == ST_norm_cqint4) {
375
+ size_t k = (1 << 8);
376
+ if (search_type == ST_norm_cqint4) {
377
+ k = (1 << 4);
378
+ }
379
+ Clustering1D clus(k);
380
+ clus.train_exact(n, norms.data());
381
+ qnorm.add(clus.k, clus.centroids.data());
382
+ }
383
+
309
384
  is_trained = true;
385
+
386
+ if (!(train_type & Skip_codebook_tables)) {
387
+ compute_codebook_tables();
388
+ }
310
389
  }
311
390
 
312
391
  size_t ResidualQuantizer::memory_per_point(int beam_size) const {
@@ -341,22 +420,76 @@ void ResidualQuantizer::compute_codes(
341
420
  return;
342
421
  }
343
422
 
344
- std::vector<float> residuals(max_beam_size * n * d);
345
423
  std::vector<int32_t> codes(max_beam_size * M * n);
424
+ std::vector<float> norms;
346
425
  std::vector<float> distances(max_beam_size * n);
347
426
 
348
- refine_beam(
349
- n,
350
- 1,
351
- x,
352
- max_beam_size,
353
- codes.data(),
354
- residuals.data(),
355
- distances.data());
427
+ if (use_beam_LUT == 0) {
428
+ std::vector<float> residuals(max_beam_size * n * d);
429
+
430
+ refine_beam(
431
+ n,
432
+ 1,
433
+ x,
434
+ max_beam_size,
435
+ codes.data(),
436
+ residuals.data(),
437
+ distances.data());
438
+
439
+ if (search_type == ST_norm_float || search_type == ST_norm_qint8 ||
440
+ search_type == ST_norm_qint4) {
441
+ norms.resize(n);
442
+ // recover the norms of reconstruction as
443
+ // || original_vector - residual ||^2
444
+ for (size_t i = 0; i < n; i++) {
445
+ norms[i] = fvec_L2sqr(
446
+ x + i * d, residuals.data() + i * max_beam_size * d, d);
447
+ }
448
+ }
449
+ } else if (use_beam_LUT == 1) {
450
+ FAISS_THROW_IF_NOT_MSG(
451
+ codebook_cross_products.size() ==
452
+ total_codebook_size * total_codebook_size,
453
+ "call compute_codebook_tables first");
454
+
455
+ std::vector<float> query_norms(n);
456
+ fvec_norms_L2sqr(query_norms.data(), x, d, n);
457
+
458
+ std::vector<float> query_cp(n * total_codebook_size);
459
+ {
460
+ FINTEGER ti = total_codebook_size, di = d, ni = n;
461
+ float zero = 0, one = 1;
462
+ sgemm_("Transposed",
463
+ "Not transposed",
464
+ &ti,
465
+ &ni,
466
+ &di,
467
+ &one,
468
+ codebooks.data(),
469
+ &di,
470
+ x,
471
+ &di,
472
+ &zero,
473
+ query_cp.data(),
474
+ &ti);
475
+ }
356
476
 
477
+ refine_beam_LUT(
478
+ n,
479
+ query_norms.data(),
480
+ query_cp.data(),
481
+ max_beam_size,
482
+ codes.data(),
483
+ distances.data());
484
+ }
357
485
  // pack only the first code of the beam (hence the ld_codes=M *
358
486
  // max_beam_size)
359
- pack_codes(n, codes.data(), codes_out, M * max_beam_size);
487
+ pack_codes(
488
+ n,
489
+ codes.data(),
490
+ codes_out,
491
+ M * max_beam_size,
492
+ norms.size() > 0 ? norms.data() : nullptr);
360
493
  }
361
494
 
362
495
  void ResidualQuantizer::refine_beam(
@@ -445,4 +578,181 @@ void ResidualQuantizer::refine_beam(
445
578
  }
446
579
  }
447
580
 
581
+ /*******************************************************************
582
+ * Functions using the dot products between codebook entries
583
+ *******************************************************************/
584
+
585
+ void ResidualQuantizer::compute_codebook_tables() {
586
+ codebook_cross_products.resize(total_codebook_size * total_codebook_size);
587
+ cent_norms.resize(total_codebook_size);
588
+ // stricly speaking we could use ssyrk
589
+ {
590
+ FINTEGER ni = total_codebook_size;
591
+ FINTEGER di = d;
592
+ float zero = 0, one = 1;
593
+ sgemm_("Transposed",
594
+ "Not transposed",
595
+ &ni,
596
+ &ni,
597
+ &di,
598
+ &one,
599
+ codebooks.data(),
600
+ &di,
601
+ codebooks.data(),
602
+ &di,
603
+ &zero,
604
+ codebook_cross_products.data(),
605
+ &ni);
606
+ }
607
+ for (size_t i = 0; i < total_codebook_size; i++) {
608
+ cent_norms[i] = codebook_cross_products[i + i * total_codebook_size];
609
+ }
610
+ }
611
+
612
+ void beam_search_encode_step_tab(
613
+ size_t K,
614
+ size_t n,
615
+ size_t beam_size, // input sizes
616
+ const float* codebook_cross_norms, // size K * ldc
617
+ size_t ldc, // >= K
618
+ const uint64_t* codebook_offsets, // m
619
+ const float* query_cp, // size n * ldqc
620
+ size_t ldqc, // >= K
621
+ const float* cent_norms_i, // size K
622
+ size_t m,
623
+ const int32_t* codes, // n * beam_size * m
624
+ const float* distances, // n * beam_size
625
+ size_t new_beam_size,
626
+ int32_t* new_codes, // n * new_beam_size * (m + 1)
627
+ float* new_distances) // n * new_beam_size
628
+ {
629
+ FAISS_THROW_IF_NOT(ldc >= K);
630
+
631
+ #pragma omp parallel for if (n > 100)
632
+ for (int64_t i = 0; i < n; i++) {
633
+ std::vector<float> cent_distances(beam_size * K);
634
+ std::vector<float> cd_common(K);
635
+
636
+ const int32_t* codes_i = codes + i * m * beam_size;
637
+ const float* query_cp_i = query_cp + i * ldqc;
638
+ const float* distances_i = distances + i * beam_size;
639
+
640
+ for (size_t k = 0; k < K; k++) {
641
+ cd_common[k] = cent_norms_i[k] - 2 * query_cp_i[k];
642
+ }
643
+
644
+ for (size_t b = 0; b < beam_size; b++) {
645
+ std::vector<float> dp(K);
646
+
647
+ for (size_t m1 = 0; m1 < m; m1++) {
648
+ size_t c = codes_i[b * m + m1];
649
+ const float* cb =
650
+ &codebook_cross_norms[(codebook_offsets[m1] + c) * ldc];
651
+ fvec_add(K, cb, dp.data(), dp.data());
652
+ }
653
+
654
+ for (size_t k = 0; k < K; k++) {
655
+ cent_distances[b * K + k] =
656
+ distances_i[b] + cd_common[k] + 2 * dp[k];
657
+ }
658
+ }
659
+
660
+ using C = CMax<float, int>;
661
+ int32_t* new_codes_i = new_codes + i * (m + 1) * new_beam_size;
662
+ float* new_distances_i = new_distances + i * new_beam_size;
663
+
664
+ const float* cent_distances_i = cent_distances.data();
665
+
666
+ // then we have to select the best results
667
+ for (int i = 0; i < new_beam_size; i++) {
668
+ new_distances_i[i] = C::neutral();
669
+ }
670
+ std::vector<int> perm(new_beam_size, -1);
671
+ heap_addn<C>(
672
+ new_beam_size,
673
+ new_distances_i,
674
+ perm.data(),
675
+ cent_distances_i,
676
+ nullptr,
677
+ beam_size * K);
678
+ heap_reorder<C>(new_beam_size, new_distances_i, perm.data());
679
+
680
+ for (int j = 0; j < new_beam_size; j++) {
681
+ int js = perm[j] / K;
682
+ int ls = perm[j] % K;
683
+ if (m > 0) {
684
+ memcpy(new_codes_i, codes_i + js * m, sizeof(*codes) * m);
685
+ }
686
+ new_codes_i[m] = ls;
687
+ new_codes_i += m + 1;
688
+ }
689
+ }
690
+ }
691
+
692
+ void ResidualQuantizer::refine_beam_LUT(
693
+ size_t n,
694
+ const float* query_norms, // size n
695
+ const float* query_cp, //
696
+ int out_beam_size,
697
+ int32_t* out_codes,
698
+ float* out_distances) const {
699
+ int beam_size = 1;
700
+
701
+ std::vector<int32_t> codes;
702
+ std::vector<float> distances(query_norms, query_norms + n);
703
+ double t0 = getmillisecs();
704
+
705
+ for (int m = 0; m < M; m++) {
706
+ int K = 1 << nbits[m];
707
+
708
+ int new_beam_size = std::min(beam_size * K, out_beam_size);
709
+ std::vector<int32_t> new_codes(n * new_beam_size * (m + 1));
710
+ std::vector<float> new_distances(n * new_beam_size);
711
+
712
+ beam_search_encode_step_tab(
713
+ K,
714
+ n,
715
+ beam_size,
716
+ codebook_cross_products.data() + codebook_offsets[m],
717
+ total_codebook_size,
718
+ codebook_offsets.data(),
719
+ query_cp + codebook_offsets[m],
720
+ total_codebook_size,
721
+ cent_norms.data() + codebook_offsets[m],
722
+ m,
723
+ codes.data(),
724
+ distances.data(),
725
+ new_beam_size,
726
+ new_codes.data(),
727
+ new_distances.data());
728
+
729
+ codes.swap(new_codes);
730
+ distances.swap(new_distances);
731
+ beam_size = new_beam_size;
732
+
733
+ if (verbose) {
734
+ float sum_distances = 0;
735
+ for (int j = 0; j < distances.size(); j++) {
736
+ sum_distances += distances[j];
737
+ }
738
+ printf("[%.3f s] encode stage %d, %d bits, "
739
+ "total error %g, beam_size %d\n",
740
+ (getmillisecs() - t0) / 1000,
741
+ m,
742
+ int(nbits[m]),
743
+ sum_distances,
744
+ beam_size);
745
+ }
746
+ }
747
+
748
+ if (out_codes) {
749
+ memcpy(out_codes, codes.data(), codes.size() * sizeof(codes[0]));
750
+ }
751
+ if (out_distances) {
752
+ memcpy(out_distances,
753
+ distances.data(),
754
+ distances.size() * sizeof(distances[0]));
755
+ }
756
+ }
757
+
448
758
  } // namespace faiss
@@ -25,18 +25,32 @@ namespace faiss {
25
25
  struct ResidualQuantizer : AdditiveQuantizer {
26
26
  /// initialization
27
27
  enum train_type_t {
28
- Train_default, ///< regular k-means
29
- Train_progressive_dim, ///< progressive dim clustering
28
+ Train_default = 0, ///< regular k-means
29
+ Train_progressive_dim = 1, ///< progressive dim clustering
30
+ Train_default_Train_top_beam = 1024,
31
+ Train_progressive_dim_Train_top_beam = 1025,
32
+ Train_default_Skip_codebook_tables = 2048,
33
+ Train_progressive_dim_Skip_codebook_tables = 2049,
34
+ Train_default_Train_top_beam_Skip_codebook_tables = 3072,
35
+ Train_progressive_dim_Train_top_beam_Skip_codebook_tables = 3073,
30
36
  };
31
37
 
38
+ train_type_t train_type;
39
+
32
40
  // set this bit on train_type if beam is to be trained only on the
33
41
  // first element of the beam (faster but less accurate)
34
42
  static const int Train_top_beam = 1024;
35
- train_type_t train_type;
43
+
44
+ // set this bit to not autmatically compute the codebook tables
45
+ // after training
46
+ static const int Skip_codebook_tables = 2048;
36
47
 
37
48
  /// beam size used for training and for encoding
38
49
  int max_beam_size;
39
50
 
51
+ /// use LUT for beam search
52
+ int use_beam_LUT;
53
+
40
54
  /// distance matrixes with beam search can get large, so use this
41
55
  /// to batch computations at encoding time.
42
56
  size_t max_mem_distances;
@@ -47,12 +61,16 @@ struct ResidualQuantizer : AdditiveQuantizer {
47
61
  /// if non-NULL, use this index for assignment
48
62
  ProgressiveDimIndexFactory* assign_index_factory;
49
63
 
50
- ResidualQuantizer(size_t d, const std::vector<size_t>& nbits);
64
+ ResidualQuantizer(
65
+ size_t d,
66
+ const std::vector<size_t>& nbits,
67
+ Search_type_t search_type = ST_decompress);
51
68
 
52
69
  ResidualQuantizer(
53
- size_t d, /* dimensionality of the input vectors */
54
- size_t M, /* number of subquantizers */
55
- size_t nbits); /* number of bit per subvector index */
70
+ size_t d, /* dimensionality of the input vectors */
71
+ size_t M, /* number of subquantizers */
72
+ size_t nbits, /* number of bit per subvector index */
73
+ Search_type_t search_type = ST_decompress);
56
74
 
57
75
  ResidualQuantizer();
58
76
 
@@ -85,12 +103,32 @@ struct ResidualQuantizer : AdditiveQuantizer {
85
103
  float* new_residuals = nullptr,
86
104
  float* new_distances = nullptr) const;
87
105
 
106
+ void refine_beam_LUT(
107
+ size_t n,
108
+ const float* query_norms,
109
+ const float* query_cp,
110
+ int new_beam_size,
111
+ int32_t* new_codes,
112
+ float* new_distances = nullptr) const;
113
+
88
114
  /** Beam search can consume a lot of memory. This function estimates the
89
115
  * amount of mem used by refine_beam to adjust the batch size
90
116
  *
91
117
  * @param beam_size if != -1, override the beam size
92
118
  */
93
119
  size_t memory_per_point(int beam_size = -1) const;
120
+
121
+ /** Cross products used in codebook tables
122
+ *
123
+ * These are used to keep trak of norms of centroids.
124
+ */
125
+ void compute_codebook_tables();
126
+
127
+ /// dot products of all codebook vectors with each other
128
+ /// size total_codebook_size * total_codebook_size
129
+ std::vector<float> codebook_cross_products;
130
+ /// norms of all vectors
131
+ std::vector<float> cent_norms;
94
132
  };
95
133
 
96
134
  /** Encode a residual by sampling from a centroid table.
@@ -127,4 +165,24 @@ void beam_search_encode_step(
127
165
  float* new_distances,
128
166
  Index* assign_index = nullptr);
129
167
 
168
+ /** Encode a set of vectors using their dot products with the codebooks
169
+ *
170
+ */
171
+ void beam_search_encode_step_tab(
172
+ size_t K,
173
+ size_t n,
174
+ size_t beam_size, // input sizes
175
+ const float* codebook_cross_norms, // size K * ldc
176
+ size_t ldc, // >= K
177
+ const uint64_t* codebook_offsets, // m
178
+ const float* query_cp, // size n * ldqc
179
+ size_t ldqc, // >= K
180
+ const float* cent_norms_i, // size K
181
+ size_t m,
182
+ const int32_t* codes, // n * beam_size * m
183
+ const float* distances, // n * beam_size
184
+ size_t new_beam_size,
185
+ int32_t* new_codes, // n * new_beam_size * (m + 1)
186
+ float* new_distances); // n * new_beam_size
187
+
130
188
  }; // namespace faiss
@@ -1335,12 +1335,9 @@ namespace {
1335
1335
  template <class DCClass>
1336
1336
  struct IVFSQScannerIP : InvertedListScanner {
1337
1337
  DCClass dc;
1338
- bool store_pairs, by_residual;
1338
+ bool by_residual;
1339
1339
 
1340
- size_t code_size;
1341
-
1342
- idx_t list_no; /// current list (set to 0 for Flat index
1343
- float accu0; /// added to all distances
1340
+ float accu0; /// added to all distances
1344
1341
 
1345
1342
  IVFSQScannerIP(
1346
1343
  int d,
@@ -1348,12 +1345,10 @@ struct IVFSQScannerIP : InvertedListScanner {
1348
1345
  size_t code_size,
1349
1346
  bool store_pairs,
1350
1347
  bool by_residual)
1351
- : dc(d, trained),
1352
- store_pairs(store_pairs),
1353
- by_residual(by_residual),
1354
- code_size(code_size),
1355
- list_no(0),
1356
- accu0(0) {}
1348
+ : dc(d, trained), by_residual(by_residual), accu0(0) {
1349
+ this->store_pairs = store_pairs;
1350
+ this->code_size = code_size;
1351
+ }
1357
1352
 
1358
1353
  void set_query(const float* query) override {
1359
1354
  dc.set_query(query);
@@ -1411,10 +1406,8 @@ template <class DCClass>
1411
1406
  struct IVFSQScannerL2 : InvertedListScanner {
1412
1407
  DCClass dc;
1413
1408
 
1414
- bool store_pairs, by_residual;
1415
- size_t code_size;
1409
+ bool by_residual;
1416
1410
  const Index* quantizer;
1417
- idx_t list_no; /// current inverted list
1418
1411
  const float* x; /// current query
1419
1412
 
1420
1413
  std::vector<float> tmp;
@@ -1427,13 +1420,13 @@ struct IVFSQScannerL2 : InvertedListScanner {
1427
1420
  bool store_pairs,
1428
1421
  bool by_residual)
1429
1422
  : dc(d, trained),
1430
- store_pairs(store_pairs),
1431
1423
  by_residual(by_residual),
1432
- code_size(code_size),
1433
1424
  quantizer(quantizer),
1434
- list_no(0),
1435
1425
  x(nullptr),
1436
- tmp(d) {}
1426
+ tmp(d) {
1427
+ this->store_pairs = store_pairs;
1428
+ this->code_size = code_size;
1429
+ }
1437
1430
 
1438
1431
  void set_query(const float* query) override {
1439
1432
  x = query;
@@ -1443,8 +1436,8 @@ struct IVFSQScannerL2 : InvertedListScanner {
1443
1436
  }
1444
1437
 
1445
1438
  void set_list(idx_t list_no, float /*coarse_dis*/) override {
1439
+ this->list_no = list_no;
1446
1440
  if (by_residual) {
1447
- this->list_no = list_no;
1448
1441
  // shift of x_in wrt centroid
1449
1442
  quantizer->compute_residual(x, tmp.data(), list_no);
1450
1443
  dc.set_query(tmp.data());