faiss 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/faiss/index.cpp +25 -6
  4. data/ext/faiss/index_binary.cpp +17 -4
  5. data/ext/faiss/kmeans.cpp +6 -6
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +2 -3
  8. data/vendor/faiss/faiss/AutoTune.h +1 -1
  9. data/vendor/faiss/faiss/Clustering.cpp +2 -2
  10. data/vendor/faiss/faiss/Clustering.h +2 -2
  11. data/vendor/faiss/faiss/IVFlib.cpp +1 -2
  12. data/vendor/faiss/faiss/IVFlib.h +1 -1
  13. data/vendor/faiss/faiss/Index.h +10 -10
  14. data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
  15. data/vendor/faiss/faiss/Index2Layer.h +2 -2
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
  18. data/vendor/faiss/faiss/IndexBinary.h +7 -7
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
  25. data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
  26. data/vendor/faiss/faiss/IndexFastScan.h +107 -7
  27. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
  29. data/vendor/faiss/faiss/IndexHNSW.h +1 -1
  30. data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
  31. data/vendor/faiss/faiss/IndexIDMap.h +6 -6
  32. data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
  33. data/vendor/faiss/faiss/IndexIVF.h +5 -5
  34. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
  35. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
  36. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
  37. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
  38. data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
  39. data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
  40. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
  41. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
  42. data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
  43. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  44. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
  45. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
  46. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
  47. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
  48. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
  49. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
  50. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
  51. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  52. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  53. data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
  54. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
  55. data/vendor/faiss/faiss/IndexPQ.h +1 -1
  56. data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
  57. data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
  58. data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
  59. data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
  60. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
  61. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
  62. data/vendor/faiss/faiss/IndexShards.cpp +1 -1
  63. data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
  64. data/vendor/faiss/faiss/MetricType.h +1 -1
  65. data/vendor/faiss/faiss/VectorTransform.h +2 -2
  66. data/vendor/faiss/faiss/clone_index.cpp +3 -1
  67. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  68. data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
  69. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
  70. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
  71. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
  72. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
  73. data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
  74. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
  75. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
  76. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
  77. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
  78. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
  79. data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
  80. data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
  81. data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
  82. data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
  83. data/vendor/faiss/faiss/impl/HNSW.h +4 -4
  84. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
  85. data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
  86. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
  87. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
  88. data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
  89. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
  90. data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
  91. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  92. data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
  93. data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
  94. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
  95. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
  96. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  97. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
  98. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  99. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
  100. data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
  101. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
  102. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
  103. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  104. data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
  105. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
  106. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
  107. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
  108. data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
  109. data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
  110. data/vendor/faiss/faiss/impl/io.cpp +2 -2
  111. data/vendor/faiss/faiss/impl/io.h +4 -4
  112. data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
  113. data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
  114. data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
  115. data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
  116. data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
  117. data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
  118. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
  119. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
  120. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
  121. data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
  122. data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
  123. data/vendor/faiss/faiss/index_factory.cpp +43 -1
  124. data/vendor/faiss/faiss/index_factory.h +1 -1
  125. data/vendor/faiss/faiss/index_io.h +1 -1
  126. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
  127. data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
  128. data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
  129. data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
  130. data/vendor/faiss/faiss/utils/Heap.h +3 -3
  131. data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
  132. data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
  133. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
  134. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
  135. data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
  136. data/vendor/faiss/faiss/utils/distances.h +2 -2
  137. data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
  138. data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
  139. data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
  140. data/vendor/faiss/faiss/utils/hamming.h +1 -1
  141. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
  142. data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
  143. data/vendor/faiss/faiss/utils/partitioning.h +2 -2
  144. data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
  145. data/vendor/faiss/faiss/utils/random.cpp +1 -1
  146. data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
  147. data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
  148. data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
  149. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
  150. data/vendor/faiss/faiss/utils/utils.cpp +5 -2
  151. data/vendor/faiss/faiss/utils/utils.h +2 -2
  152. metadata +12 -1
@@ -27,10 +27,12 @@
27
27
  #include <faiss/IndexIVFAdditiveQuantizer.h>
28
28
  #include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
29
29
  #include <faiss/IndexIVFFlat.h>
30
+ #include <faiss/IndexIVFFlatPanorama.h>
30
31
  #include <faiss/IndexIVFPQ.h>
31
32
  #include <faiss/IndexIVFPQFastScan.h>
32
33
  #include <faiss/IndexIVFPQR.h>
33
34
  #include <faiss/IndexIVFRaBitQ.h>
35
+ #include <faiss/IndexIVFRaBitQFastScan.h>
34
36
  #include <faiss/IndexIVFSpectralHash.h>
35
37
  #include <faiss/IndexLSH.h>
36
38
  #include <faiss/IndexLattice.h>
@@ -39,6 +41,7 @@
39
41
  #include <faiss/IndexPQFastScan.h>
40
42
  #include <faiss/IndexPreTransform.h>
41
43
  #include <faiss/IndexRaBitQ.h>
44
+ #include <faiss/IndexRaBitQFastScan.h>
42
45
  #include <faiss/IndexRefine.h>
43
46
  #include <faiss/IndexRowwiseMinMax.h>
44
47
  #include <faiss/IndexScalarQuantizer.h>
@@ -49,6 +52,9 @@
49
52
  #include <faiss/IndexBinaryHNSW.h>
50
53
  #include <faiss/IndexBinaryHash.h>
51
54
  #include <faiss/IndexBinaryIVF.h>
55
+ #include <faiss/IndexIDMap.h>
56
+ #include <algorithm>
57
+ #include <cctype>
52
58
  #include <string>
53
59
 
54
60
  namespace faiss {
@@ -326,6 +332,10 @@ IndexIVF* parse_IndexIVF(
326
332
  if (match("FlatDedup")) {
327
333
  return new IndexIVFFlatDedup(get_q(), d, nlist, mt, own_il);
328
334
  }
335
+ if (match("FlatPanorama([0-9]+)?")) {
336
+ int nlevels = mres_to_int(sm[1], 8); // default to 8 levels
337
+ return new IndexIVFFlatPanorama(get_q(), d, nlist, nlevels, mt, own_il);
338
+ }
329
339
  if (match(sq_pattern)) {
330
340
  return new IndexIVFScalarQuantizer(
331
341
  get_q(),
@@ -450,6 +460,10 @@ IndexIVF* parse_IndexIVF(
450
460
  if (match(rabitq_pattern)) {
451
461
  return new IndexIVFRaBitQ(get_q(), d, nlist, mt, own_il);
452
462
  }
463
+ if (match("RaBitQfs(_[0-9]+)?")) {
464
+ int bbs = mres_to_int(sm[1], 32, 1);
465
+ return new IndexIVFRaBitQFastScan(get_q(), d, nlist, mt, bbs, own_il);
466
+ }
453
467
  return nullptr;
454
468
  }
455
469
 
@@ -676,6 +690,12 @@ Index* parse_other_indexes(
676
690
  return new IndexRaBitQ(d, metric);
677
691
  }
678
692
 
693
+ // IndexRaBitQFastScan
694
+ if (match("RaBitQfs(_[0-9]+)?")) {
695
+ int bbs = mres_to_int(sm[1], 32, 1);
696
+ return new IndexRaBitQFastScan(d, metric, bbs);
697
+ }
698
+
679
699
  return nullptr;
680
700
  }
681
701
 
@@ -934,6 +954,28 @@ IndexBinary* index_binary_factory(
934
954
  bool own_invlists) {
935
955
  IndexBinary* index = nullptr;
936
956
 
957
+ std::smatch sm;
958
+ std::string desc_str(description);
959
+
960
+ // Handle IDMap2 and IDMap wrappers (prefix or suffix)
961
+ if (re_match(desc_str, "(.+),IDMap2", sm) ||
962
+ re_match(desc_str, "IDMap2,(.+)", sm)) {
963
+ IndexBinary* sub_index =
964
+ index_binary_factory(d, sm[1].str().c_str(), own_invlists);
965
+ IndexBinaryIDMap2* idmap2 = new IndexBinaryIDMap2(sub_index);
966
+ idmap2->own_fields = true;
967
+ return idmap2;
968
+ }
969
+
970
+ if (re_match(desc_str, "(.+),IDMap", sm) ||
971
+ re_match(desc_str, "IDMap,(.+)", sm)) {
972
+ IndexBinary* sub_index =
973
+ index_binary_factory(d, sm[1].str().c_str(), own_invlists);
974
+ IndexBinaryIDMap* idmap = new IndexBinaryIDMap(sub_index);
975
+ idmap->own_fields = true;
976
+ return idmap;
977
+ }
978
+
937
979
  int ncentroids = -1;
938
980
  int M, nhash, b;
939
981
 
@@ -959,7 +1001,7 @@ IndexBinary* index_binary_factory(
959
1001
  } else if (sscanf(description, "BHash%d", &b) == 1) {
960
1002
  index = new IndexBinaryHash(d, b);
961
1003
 
962
- } else if (std::string(description) == "BFlat") {
1004
+ } else if (desc_str == "BFlat") {
963
1005
  index = new IndexBinaryFlat(d);
964
1006
 
965
1007
  } else {
@@ -12,7 +12,7 @@
12
12
 
13
13
  namespace faiss {
14
14
 
15
- /** Build and index with the sequence of processing steps described in
15
+ /** Build an index with the sequence of processing steps described in
16
16
  * the string. */
17
17
  Index* index_factory(
18
18
  int d,
@@ -16,7 +16,7 @@
16
16
  * object that abstracts the medium.
17
17
  *
18
18
  * The read functions return objects that should be deallocated with
19
- * delete. All references within these objectes are owned by the
19
+ * delete. All references within these objects are owned by the
20
20
  * object.
21
21
  */
22
22
 
@@ -346,6 +346,211 @@ void ArrayInvertedLists::permute_invlists(const idx_t* map) {
346
346
 
347
347
  ArrayInvertedLists::~ArrayInvertedLists() {}
348
348
 
349
+ /***********************************************
350
+ * ArrayInvertedListsPanorama implementation
351
+ **********************************************/
352
+
353
+ ArrayInvertedListsPanorama::ArrayInvertedListsPanorama(
354
+ size_t nlist,
355
+ size_t code_size,
356
+ size_t n_levels)
357
+ : ArrayInvertedLists(nlist, code_size),
358
+ n_levels(n_levels),
359
+ level_width(
360
+ (((code_size / sizeof(float)) + n_levels - 1) / n_levels) *
361
+ sizeof(float)) {
362
+ FAISS_THROW_IF_NOT(n_levels > 0);
363
+ FAISS_THROW_IF_NOT(code_size % sizeof(float) == 0);
364
+ FAISS_THROW_IF_NOT_MSG(
365
+ !use_iterator,
366
+ "IndexIVFFlatPanorama does not support iterators, use vanilla IndexIVFFlat instead");
367
+ FAISS_ASSERT(level_width % sizeof(float) == 0);
368
+
369
+ cum_sums.resize(nlist);
370
+ }
371
+
372
+ const float* ArrayInvertedListsPanorama::get_cum_sums(size_t list_no) const {
373
+ assert(list_no < nlist);
374
+ return cum_sums[list_no].data();
375
+ }
376
+
377
+ size_t ArrayInvertedListsPanorama::add_entries(
378
+ size_t list_no,
379
+ size_t n_entry,
380
+ const idx_t* ids_in,
381
+ const uint8_t* code) {
382
+ assert(list_no < nlist);
383
+ size_t o = ids[list_no].size();
384
+
385
+ ids[list_no].resize(o + n_entry);
386
+ memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
387
+
388
+ size_t new_size = o + n_entry;
389
+ size_t num_batches = (new_size + kBatchSize - 1) / kBatchSize;
390
+ codes[list_no].resize(num_batches * kBatchSize * code_size);
391
+ cum_sums[list_no].resize(num_batches * kBatchSize * (n_levels + 1));
392
+
393
+ copy_codes_to_level_layout(list_no, o, n_entry, code);
394
+ compute_cumulative_sums(list_no, o, n_entry, code);
395
+
396
+ return o;
397
+ }
398
+
399
+ void ArrayInvertedListsPanorama::update_entries(
400
+ size_t list_no,
401
+ size_t offset,
402
+ size_t n_entry,
403
+ const idx_t* ids_in,
404
+ const uint8_t* code) {
405
+ assert(list_no < nlist);
406
+ assert(n_entry + offset <= ids[list_no].size());
407
+
408
+ memcpy(&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
409
+ copy_codes_to_level_layout(list_no, offset, n_entry, code);
410
+ compute_cumulative_sums(list_no, offset, n_entry, code);
411
+ }
412
+
413
+ void ArrayInvertedListsPanorama::resize(size_t list_no, size_t new_size) {
414
+ ids[list_no].resize(new_size);
415
+
416
+ size_t num_batches = (new_size + kBatchSize - 1) / kBatchSize;
417
+ codes[list_no].resize(num_batches * kBatchSize * code_size);
418
+ cum_sums[list_no].resize(num_batches * kBatchSize * (n_levels + 1));
419
+ }
420
+
421
+ const uint8_t* ArrayInvertedListsPanorama::get_single_code(
422
+ size_t list_no,
423
+ size_t offset) const {
424
+ assert(list_no < nlist);
425
+ assert(offset < ids[list_no].size());
426
+
427
+ uint8_t* recons_buffer = new uint8_t[code_size];
428
+
429
+ const uint8_t* codes_base = codes[list_no].data();
430
+
431
+ size_t batch_no = offset / kBatchSize;
432
+ size_t pos_in_batch = offset % kBatchSize;
433
+ size_t batch_offset = batch_no * kBatchSize * code_size;
434
+
435
+ for (size_t level = 0; level < n_levels; level++) {
436
+ size_t level_offset = level * level_width * kBatchSize;
437
+ const uint8_t* src = codes_base + batch_offset + level_offset +
438
+ pos_in_batch * level_width;
439
+ uint8_t* dest = recons_buffer + level * level_width;
440
+ size_t copy_size =
441
+ std::min(level_width, code_size - level * level_width);
442
+ memcpy(dest, src, copy_size);
443
+ }
444
+
445
+ return recons_buffer;
446
+ }
447
+
448
+ void ArrayInvertedListsPanorama::release_codes(
449
+ size_t list_no,
450
+ const uint8_t* codes) const {
451
+ // Only delete if it's heap-allocated (from get_single_code).
452
+ // If it's from get_codes (raw storage), it will be codes[list_no].data()
453
+ if (codes != this->codes[list_no].data()) {
454
+ delete[] codes;
455
+ }
456
+ }
457
+
458
+ InvertedListsIterator* ArrayInvertedListsPanorama::get_iterator(
459
+ size_t /* list_no */,
460
+ void* /* inverted_list_context */) const {
461
+ FAISS_THROW_MSG(
462
+ "IndexIVFFlatPanorama does not support iterators, use vanilla IndexIVFFlat instead");
463
+ return nullptr;
464
+ }
465
+
466
+ void ArrayInvertedListsPanorama::compute_cumulative_sums(
467
+ size_t list_no,
468
+ size_t offset,
469
+ size_t n_entry,
470
+ const uint8_t* code) {
471
+ // Cast to float* is safe here as we guarantee codes are always float
472
+ // vectors for `IndexIVFFlatPanorama` (verified by the constructor).
473
+ const float* vectors = reinterpret_cast<const float*>(code);
474
+ const size_t d = code_size / sizeof(float);
475
+
476
+ std::vector<float> suffix_sums(d + 1);
477
+
478
+ for (size_t entry_idx = 0; entry_idx < n_entry; entry_idx++) {
479
+ size_t current_pos = offset + entry_idx;
480
+ size_t batch_no = current_pos / kBatchSize;
481
+ size_t pos_in_batch = current_pos % kBatchSize;
482
+
483
+ const float* vector = vectors + entry_idx * d;
484
+
485
+ // Compute suffix sums of squared values.
486
+ suffix_sums[d] = 0.0f;
487
+ for (int j = d - 1; j >= 0; j--) {
488
+ float squared_val = vector[j] * vector[j];
489
+ suffix_sums[j] = suffix_sums[j + 1] + squared_val;
490
+ }
491
+
492
+ // Store cumulative sums in batch-oriented layout.
493
+ size_t cumsum_batch_offset = batch_no * kBatchSize * (n_levels + 1);
494
+ float* cumsum_base = cum_sums[list_no].data();
495
+
496
+ const size_t level_width_floats = level_width / sizeof(float);
497
+ for (size_t level = 0; level < n_levels; level++) {
498
+ size_t start_idx = level * level_width_floats;
499
+ size_t cumsum_offset =
500
+ cumsum_batch_offset + level * kBatchSize + pos_in_batch;
501
+ if (start_idx < d) {
502
+ cumsum_base[cumsum_offset] = sqrt(suffix_sums[start_idx]);
503
+ } else {
504
+ cumsum_base[cumsum_offset] = 0.0f;
505
+ }
506
+ }
507
+
508
+ // Last level sum is always 0.
509
+ size_t cumsum_offset =
510
+ cumsum_batch_offset + n_levels * kBatchSize + pos_in_batch;
511
+ cumsum_base[cumsum_offset] = 0.0f;
512
+ }
513
+ }
514
+
515
+ // Helper method to copy codes into level-oriented batch layout at a given
516
+ // offset in the list.
517
+ void ArrayInvertedListsPanorama::copy_codes_to_level_layout(
518
+ size_t list_no,
519
+ size_t offset,
520
+ size_t n_entry,
521
+ const uint8_t* code) {
522
+ uint8_t* codes_base = codes[list_no].data();
523
+ size_t current_pos = offset;
524
+ for (size_t entry_idx = 0; entry_idx < n_entry;) {
525
+ // Determine which batch we're in and position within that batch.
526
+ size_t batch_no = current_pos / kBatchSize;
527
+ size_t pos_in_batch = current_pos % kBatchSize;
528
+ size_t entries_in_this_batch =
529
+ std::min(n_entry - entry_idx, kBatchSize - pos_in_batch);
530
+
531
+ // Copy entries into level-oriented layout for this batch.
532
+ size_t batch_offset = batch_no * kBatchSize * code_size;
533
+ for (size_t level = 0; level < n_levels; level++) {
534
+ size_t level_offset = level * level_width * kBatchSize;
535
+ size_t start_byte = level * level_width;
536
+ size_t copy_size =
537
+ std::min(level_width, code_size - level * level_width);
538
+
539
+ for (size_t i = 0; i < entries_in_this_batch; i++) {
540
+ const uint8_t* src =
541
+ code + (entry_idx + i) * code_size + start_byte;
542
+ uint8_t* dest = codes_base + batch_offset + level_offset +
543
+ (pos_in_batch + i) * level_width;
544
+
545
+ memcpy(dest, src, copy_size);
546
+ }
547
+ }
548
+
549
+ entry_idx += entries_in_this_batch;
550
+ current_pos += entries_in_this_batch;
551
+ }
552
+ }
553
+
349
554
  /*****************************************************************
350
555
  * Meta-inverted list implementations
351
556
  *****************************************************************/
@@ -276,6 +276,68 @@ struct ArrayInvertedLists : InvertedLists {
276
276
  ~ArrayInvertedLists() override;
277
277
  };
278
278
 
279
+ /// Level-oriented storage as defined in the IVFFlat section of Panorama
280
+ /// (https://www.arxiv.org/pdf/2510.00566).
281
+ struct ArrayInvertedListsPanorama : ArrayInvertedLists {
282
+ static constexpr size_t kBatchSize = 128;
283
+ std::vector<MaybeOwnedVector<float>> cum_sums;
284
+ const size_t n_levels;
285
+ const size_t level_width; // in code units
286
+
287
+ ArrayInvertedListsPanorama(size_t nlist, size_t code_size, size_t n_levels);
288
+
289
+ const float* get_cum_sums(size_t list_no) const;
290
+
291
+ size_t add_entries(
292
+ size_t list_no,
293
+ size_t n_entry,
294
+ const idx_t* ids,
295
+ const uint8_t* code) override;
296
+
297
+ void update_entries(
298
+ size_t list_no,
299
+ size_t offset,
300
+ size_t n_entry,
301
+ const idx_t* ids,
302
+ const uint8_t* code) override;
303
+
304
+ void resize(size_t list_no, size_t new_size) override;
305
+
306
+ /// Panorama's layout make it impractical to support iterators as defined
307
+ /// by Faiss (i.e. `InvertedListsIterator` API). The iterator would require
308
+ /// to allocate and reassemble the vector at each call.
309
+ /// Hence, we override this method to throw an error, this effectively
310
+ /// disables the `iterate_codes` and `iterate_codes_range` methods.
311
+ InvertedListsIterator* get_iterator(
312
+ size_t list_no,
313
+ void* inverted_list_context = nullptr) const override;
314
+
315
+ /// Reconstructs a single code from level-oriented storage to flat format.
316
+ const uint8_t* get_single_code(size_t list_no, size_t offset)
317
+ const override;
318
+
319
+ /// Frees codes returned by `get_single_code`.
320
+ void release_codes(size_t list_no, const uint8_t* codes) const override;
321
+
322
+ private:
323
+ /// Helper method to copy codes into level-oriented batch layout at a given
324
+ /// offset in the list.
325
+ void copy_codes_to_level_layout(
326
+ size_t list_no,
327
+ size_t offset,
328
+ size_t n_entry,
329
+ const uint8_t* code);
330
+
331
+ /// Helper method to compute the cumulative sums of the codes.
332
+ /// The cumsums also follow the level-oriented batch layout to minimize the
333
+ /// number of random memory accesses.
334
+ void compute_cumulative_sums(
335
+ size_t list_no,
336
+ size_t offset,
337
+ size_t n_entry,
338
+ const uint8_t* code);
339
+ };
340
+
279
341
  /*****************************************************************
280
342
  * Meta-inverted lists
281
343
  *
@@ -25,7 +25,7 @@ inline bool is_aligned_pointer(const void* x) {
25
25
  }
26
26
 
27
27
  // class that manages suitably aligned arrays for SIMD
28
- // T should be a POV type. The default alignment is 32 for AVX
28
+ // T should be a POD type. The default alignment is 32 for AVX
29
29
  template <class T, int A = 32>
30
30
  struct AlignedTableTightAlloc {
31
31
  T* ptr;
@@ -139,7 +139,7 @@ void HeapArray<C>::per_line_extrema(T* out_val, TI* out_ids) const {
139
139
  }
140
140
  }
141
141
 
142
- // explicit instanciations
142
+ // explicit instantiations
143
143
 
144
144
  template struct HeapArray<CMin<float, int64_t>>;
145
145
  template struct HeapArray<CMax<float, int64_t>>;
@@ -238,7 +238,7 @@ void merge_knn_results(
238
238
  }
239
239
  }
240
240
 
241
- // explicit instanciations
241
+ // explicit instantiations
242
242
  #define INSTANTIATE(C, distance_t) \
243
243
  template void merge_knn_results<int64_t, C<distance_t, int>>( \
244
244
  size_t, \
@@ -150,7 +150,7 @@ inline void heap_replace_top(
150
150
  bh_ids[i] = id;
151
151
  }
152
152
 
153
- /* Partial instanciation for heaps with TI = int64_t */
153
+ /* Partial instantiation for heaps with TI = int64_t */
154
154
 
155
155
  template <typename T>
156
156
  inline void minheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
@@ -393,7 +393,7 @@ inline void heap_addn(
393
393
  }
394
394
  }
395
395
 
396
- /* Partial instanciation for heaps with TI = int64_t */
396
+ /* Partial instantiation for heaps with TI = int64_t */
397
397
 
398
398
  template <typename T>
399
399
  inline void minheap_addn(
@@ -489,7 +489,7 @@ struct HeapArray {
489
489
  return val + key * k;
490
490
  }
491
491
 
492
- /// Correspponding identifiers
492
+ /// Corresponding identifiers
493
493
  TI* get_ids(size_t key) {
494
494
  return ids + key * k;
495
495
  }
@@ -71,7 +71,7 @@ Tensor2DTemplate<T> Tensor2DTemplate<T>::column(size_t j) const {
71
71
  return out;
72
72
  }
73
73
 
74
- // explicit template instanciation
74
+ // explicit template instantiation
75
75
  template struct Tensor2DTemplate<float>;
76
76
  template struct Tensor2DTemplate<int32_t>;
77
77
 
@@ -75,7 +75,7 @@ struct Embedding {
75
75
  };
76
76
 
77
77
  /// Feed forward layer that expands to a hidden dimension, applies a ReLU non
78
- /// linearity and maps back to the orignal dimension
78
+ /// linearity and maps back to the original dimension
79
79
  struct FFN {
80
80
  Linear linear1, linear2;
81
81
 
@@ -103,7 +103,7 @@ struct QINCoStep {
103
103
  return residual_blocks[i];
104
104
  }
105
105
 
106
- /** encode a set of vectors x with intial estimate xhat. Optionally return
106
+ /** encode a set of vectors x with initial estimate xhat. Optionally return
107
107
  * the delta to be added to xhat to form the new xhat */
108
108
  nn::Int32Tensor2D encode(
109
109
  const nn::Tensor2D& xhat,
@@ -141,7 +141,7 @@ struct QINCo : NeuralNetCodec {
141
141
 
142
142
  nn::Int32Tensor2D encode(const nn::Tensor2D& x) const override;
143
143
 
144
- virtual ~QINCo() {}
144
+ virtual ~QINCo() override {}
145
145
  };
146
146
 
147
147
  } // namespace faiss
@@ -50,8 +50,8 @@
50
50
  // for j in range(0, NBUCKETS):
51
51
  // idx = beam * n + i * NBUCKETS + j
52
52
  // if distances[idx] < local_min_distances[j]:
53
- // local_min_distances[i] = distances[idx]
54
- // local_min_indices[i] = indices[idx]
53
+ // local_min_distances[j] = distances[idx]
54
+ // local_min_indices[j] = indices[idx]
55
55
  //
56
56
  // for j in range(0, NBUCKETS):
57
57
  // heap.push(local_min_distances[j], local_min_indices[j])
@@ -106,7 +106,7 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
106
106
  distance_candidate,
107
107
  _CMP_LE_OS);
108
108
 
109
- // // blend seems to be slower that min
109
+ // // blend seems to be slower than min
110
110
  // const __m256 min_distances_new = _mm256_blendv_ps(
111
111
  // distance_candidate,
112
112
  // min_distances_i[j][p],
@@ -120,7 +120,7 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
120
120
  min_indices_i[j][p]),
121
121
  comparison));
122
122
 
123
- // // blend seems to be slower that min
123
+ // // blend seems to be slower than min
124
124
  // const __m256 max_distances_new = _mm256_blendv_ps(
125
125
  // min_distances_i[j][p],
126
126
  // distance_candidate,
@@ -21,7 +21,7 @@
21
21
  /// It seems that only the limited number of combinations are
22
22
  /// meaningful, because of the limited supply of SIMD registers.
23
23
  /// Also, certain combinations, such as B32_D1 and B16_D1, were concluded
24
- /// to be not very precise in benchmarks, so ones were not introduced.
24
+ /// to be not very precise in benchmarks, so they were not introduced.
25
25
  ///
26
26
  /// TODO: Consider d-ary SIMD heap.
27
27
 
@@ -324,7 +324,7 @@ void knn_inner_product(
324
324
  * vector y, for the L2 distance
325
325
  * @param x query vectors, size nx * d
326
326
  * @param y database vectors, size ny * d
327
- * @param res result heap strcture, which also provides k. Sorted on output
327
+ * @param res result heap structure, which also provides k. Sorted on output
328
328
  * @param y_norm2 (optional) norms for the y vectors (nullptr or size ny)
329
329
  * @param sel search in this subset of vectors
330
330
  */
@@ -389,7 +389,7 @@ void knn_inner_products_by_idx(
389
389
  * @param x query vectors, size nx * d
390
390
  * @param y database vectors, size (max(ids) + 1) * d
391
391
  * @param subset subset of database vectors to consider, size (nx, nsubset)
392
- * @param res rIDesult structure
392
+ * @param res result structure
393
393
  * @param ld_subset stride for the subset array. -1: use nsubset, 0: all queries
394
394
  * process the same subset
395
395
  */
@@ -5,6 +5,8 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
+ #pragma once
9
+
8
10
  /** In this file are the implementations of extra metrics beyond L2
9
11
  * and inner product */
10
12
 
@@ -188,7 +190,7 @@ inline float VectorDistance<METRIC_GOWER>::operator()(
188
190
 
189
191
  /***************************************************************************
190
192
  * Dispatching function that takes a metric type and a consumer object
191
- * the consumer object should contain a retun type T and a operation template
193
+ * the consumer object should contain a return type T and a operation template
192
194
  * function f() that is called to perform the operation. The first argument
193
195
  * of the function is the VectorDistance object. The rest are passed in as is.
194
196
  **************************************************************************/
@@ -5,6 +5,8 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
+ #pragma once
9
+
8
10
  namespace faiss {
9
11
 
10
12
  // BitstringWriter and BitstringReader functions
@@ -257,12 +257,13 @@ void hammings_knn_mc(
257
257
 
258
258
  std::vector<HCounterState<HammingComputer>> cs;
259
259
  for (size_t i = 0; i < na; ++i) {
260
- cs.push_back(HCounterState<HammingComputer>(
261
- all_counters.data() + i * nBuckets,
262
- all_ids_per_dis.get() + i * nBuckets * k,
263
- a + i * bytes_per_code,
264
- 8 * bytes_per_code,
265
- k));
260
+ cs.push_back(
261
+ HCounterState<HammingComputer>(
262
+ all_counters.data() + i * nBuckets,
263
+ all_ids_per_dis.get() + i * nBuckets * k,
264
+ a + i * bytes_per_code,
265
+ 8 * bytes_per_code,
266
+ k));
266
267
  }
267
268
 
268
269
  const size_t block_size = hamming_batch_size;
@@ -14,7 +14,7 @@
14
14
  * fvecs2bitvecs).
15
15
  *
16
16
  * User-defined type hamdis_t is used for distances because at this time
17
- * it is still uncler clear how we will need to balance
17
+ * it is still unclear clear how we will need to balance
18
18
  * - flexibility in vector size (may need 16- or even 8-bit vectors)
19
19
  * - memory usage
20
20
  * - cache-misses when dealing with large volumes of data (fewer bits is better)
@@ -30,8 +30,7 @@ inline int popcount64(uint64_t x) {
30
30
  // This table was moved from .cpp to .h file, because
31
31
  // otherwise it was causing compilation errors while trying to
32
32
  // compile swig modules on Windows.
33
- // todo for C++17: switch to 'inline constexpr'
34
- static constexpr uint8_t hamdis_tab_ham_bytes[256] = {
33
+ inline constexpr uint8_t hamdis_tab_ham_bytes[256] = {
35
34
  0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
36
35
  2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
37
36
  2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
@@ -140,7 +140,7 @@ typename C::T partition_fuzzy_median3(
140
140
  using T = typename C::T;
141
141
 
142
142
  // here we use bissection with a median of 3 to find the threshold and
143
- // compress the arrays afterwards. So it's a n*log(n) algoirithm rather than
143
+ // compress the arrays afterwards. So it's a n*log(n) algorithm rather than
144
144
  // qselect's O(n) but it avoids shuffling around the array.
145
145
 
146
146
  FAISS_THROW_IF_NOT(n >= 3);
@@ -350,7 +350,7 @@ int simd_compress_array(
350
350
  }
351
351
  }
352
352
 
353
- // handle remaining, only striclty lt ones.
353
+ // handle remaining, only strictly lt ones.
354
354
  for (; i0 + 15 < n; i0 += 16) {
355
355
  simd16uint16 v(vals + i0);
356
356
  simd16uint16 max2 = max_func<C>(v, thr16);
@@ -506,7 +506,7 @@ uint16_t simd_partition_fuzzy_with_bounds(
506
506
 
507
507
  uint64_t t2 = get_cy();
508
508
 
509
- partition_stats.bissect_cycles += t1 - t0;
509
+ partition_stats.bisect_cycles += t1 - t0;
510
510
  partition_stats.compress_cycles += t2 - t1;
511
511
 
512
512
  return thresh;
@@ -662,7 +662,7 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
662
662
  }
663
663
  }
664
664
 
665
- IFV printf("end bissection: thresh=%d q=%ld n_eq=%ld\n", thresh, q, n_eq);
665
+ IFV printf("end bisection: thresh=%d q=%ld n_eq=%ld\n", thresh, q, n_eq);
666
666
 
667
667
  if (!C::is_max) {
668
668
  if (n_eq == 0) {
@@ -762,7 +762,7 @@ typename C::T partition_fuzzy(
762
762
  vals, ids, n, q_min, q_max, q_out);
763
763
  }
764
764
 
765
- // explicit template instanciations
765
+ // explicit template instantiations
766
766
 
767
767
  template float partition_fuzzy<CMin<float, int64_t>>(
768
768
  float* vals,