faiss 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/faiss/index.cpp +25 -6
  4. data/ext/faiss/index_binary.cpp +17 -4
  5. data/ext/faiss/kmeans.cpp +6 -6
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +2 -3
  8. data/vendor/faiss/faiss/AutoTune.h +1 -1
  9. data/vendor/faiss/faiss/Clustering.cpp +2 -2
  10. data/vendor/faiss/faiss/Clustering.h +2 -2
  11. data/vendor/faiss/faiss/IVFlib.cpp +1 -2
  12. data/vendor/faiss/faiss/IVFlib.h +1 -1
  13. data/vendor/faiss/faiss/Index.h +10 -10
  14. data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
  15. data/vendor/faiss/faiss/Index2Layer.h +2 -2
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
  18. data/vendor/faiss/faiss/IndexBinary.h +7 -7
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
  25. data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
  26. data/vendor/faiss/faiss/IndexFastScan.h +107 -7
  27. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
  29. data/vendor/faiss/faiss/IndexHNSW.h +1 -1
  30. data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
  31. data/vendor/faiss/faiss/IndexIDMap.h +6 -6
  32. data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
  33. data/vendor/faiss/faiss/IndexIVF.h +5 -5
  34. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
  35. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
  36. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
  37. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
  38. data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
  39. data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
  40. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
  41. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
  42. data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
  43. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  44. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
  45. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
  46. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
  47. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
  48. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
  49. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
  50. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
  51. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  52. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  53. data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
  54. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
  55. data/vendor/faiss/faiss/IndexPQ.h +1 -1
  56. data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
  57. data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
  58. data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
  59. data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
  60. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
  61. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
  62. data/vendor/faiss/faiss/IndexShards.cpp +1 -1
  63. data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
  64. data/vendor/faiss/faiss/MetricType.h +1 -1
  65. data/vendor/faiss/faiss/VectorTransform.h +2 -2
  66. data/vendor/faiss/faiss/clone_index.cpp +3 -1
  67. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  68. data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
  69. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
  70. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
  71. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
  72. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
  73. data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
  74. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
  75. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
  76. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
  77. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
  78. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
  79. data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
  80. data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
  81. data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
  82. data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
  83. data/vendor/faiss/faiss/impl/HNSW.h +4 -4
  84. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
  85. data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
  86. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
  87. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
  88. data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
  89. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
  90. data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
  91. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  92. data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
  93. data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
  94. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
  95. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
  96. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  97. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
  98. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  99. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
  100. data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
  101. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
  102. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
  103. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  104. data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
  105. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
  106. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
  107. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
  108. data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
  109. data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
  110. data/vendor/faiss/faiss/impl/io.cpp +2 -2
  111. data/vendor/faiss/faiss/impl/io.h +4 -4
  112. data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
  113. data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
  114. data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
  115. data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
  116. data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
  117. data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
  118. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
  119. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
  120. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
  121. data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
  122. data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
  123. data/vendor/faiss/faiss/index_factory.cpp +43 -1
  124. data/vendor/faiss/faiss/index_factory.h +1 -1
  125. data/vendor/faiss/faiss/index_io.h +1 -1
  126. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
  127. data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
  128. data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
  129. data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
  130. data/vendor/faiss/faiss/utils/Heap.h +3 -3
  131. data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
  132. data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
  133. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
  134. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
  135. data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
  136. data/vendor/faiss/faiss/utils/distances.h +2 -2
  137. data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
  138. data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
  139. data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
  140. data/vendor/faiss/faiss/utils/hamming.h +1 -1
  141. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
  142. data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
  143. data/vendor/faiss/faiss/utils/partitioning.h +2 -2
  144. data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
  145. data/vendor/faiss/faiss/utils/random.cpp +1 -1
  146. data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
  147. data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
  148. data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
  149. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
  150. data/vendor/faiss/faiss/utils/utils.cpp +5 -2
  151. data/vendor/faiss/faiss/utils/utils.h +2 -2
  152. metadata +12 -1
@@ -8,31 +8,20 @@
8
8
  #include <faiss/impl/RaBitQuantizer.h>
9
9
 
10
10
  #include <faiss/impl/FaissAssert.h>
11
+ #include <faiss/impl/RaBitQUtils.h>
11
12
  #include <faiss/utils/distances.h>
12
13
  #include <faiss/utils/rabitq_simd.h>
13
14
  #include <algorithm>
14
15
  #include <cmath>
15
16
  #include <cstring>
16
- #include <limits>
17
17
  #include <memory>
18
18
  #include <vector>
19
19
 
20
20
  namespace faiss {
21
21
 
22
- struct FactorsData {
23
- // ||or - c||^2 - ((metric==IP) ? ||or||^2 : 0)
24
- float or_minus_c_l2sqr = 0;
25
- float dp_multiplier = 0;
26
- };
27
-
28
- struct QueryFactorsData {
29
- float c1 = 0;
30
- float c2 = 0;
31
- float c34 = 0;
32
-
33
- float qr_to_c_L2sqr = 0;
34
- float qr_norm_L2sqr = 0;
35
- };
22
+ // Import shared utilities from RaBitQUtils
23
+ using rabitq_utils::FactorsData;
24
+ using rabitq_utils::QueryFactorsData;
36
25
 
37
26
  static size_t get_code_size(const size_t d) {
38
27
  return (d + 7) / 8 + sizeof(FactorsData);
@@ -65,19 +54,9 @@ void RaBitQuantizer::compute_codes_core(
65
54
  return;
66
55
  }
67
56
 
68
- // compute some helper constants
69
- const float inv_d_sqrt = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
70
-
71
57
  // compute codes
72
58
  #pragma omp parallel for if (n > 1000)
73
59
  for (int64_t i = 0; i < n; i++) {
74
- // ||or - c||^2
75
- float norm_L2sqr = 0;
76
- // ||or||^2, which is equal to ||P(or)||^2 and ||P^(-1)(or)||^2
77
- float or_L2sqr = 0;
78
- // dot product
79
- float dp_oO = 0;
80
-
81
60
  // the code
82
61
  uint8_t* code = codes + i * code_size;
83
62
  FactorsData* fac = reinterpret_cast<FactorsData*>(code + (d + 7) / 8);
@@ -87,46 +66,25 @@ void RaBitQuantizer::compute_codes_core(
87
66
  memset(code, 0, code_size);
88
67
  }
89
68
 
90
- for (size_t j = 0; j < d; j++) {
91
- const float or_minus_c = x[i * d + j] -
92
- ((centroid_in == nullptr) ? 0 : centroid_in[j]);
93
- norm_L2sqr += or_minus_c * or_minus_c;
94
- or_L2sqr += x[i * d + j] * x[i * d + j];
69
+ const float* x_row = x + i * d;
95
70
 
96
- const bool xb = (or_minus_c > 0);
71
+ // Use shared utilities for computing factors
72
+ *fac = rabitq_utils::compute_vector_factors(
73
+ x_row, d, centroid_in, metric_type);
97
74
 
98
- dp_oO += xb ? or_minus_c : (-or_minus_c);
75
+ // Pack bits into standard RaBitQ format
76
+ for (size_t j = 0; j < d; j++) {
77
+ const float x_val = x_row[j];
78
+ const float centroid_val =
79
+ (centroid_in == nullptr) ? 0.0f : centroid_in[j];
80
+ const float or_minus_c = x_val - centroid_val;
81
+ const bool xb = (or_minus_c > 0.0f);
99
82
 
100
83
  // store the output data
101
- if (code != nullptr) {
102
- if (xb) {
103
- // enable a particular bit
104
- code[j / 8] |= (1 << (j % 8));
105
- }
84
+ if (code != nullptr && xb) {
85
+ rabitq_utils::set_bit_standard(code, j);
106
86
  }
107
87
  }
108
-
109
- // compute factors
110
-
111
- // compute the inverse norm
112
- const float inv_norm_L2 =
113
- (std::abs(norm_L2sqr) < std::numeric_limits<float>::epsilon())
114
- ? 1.0f
115
- : (1.0f / std::sqrt(norm_L2sqr));
116
- dp_oO *= inv_norm_L2;
117
- dp_oO *= inv_d_sqrt;
118
-
119
- const float inv_dp_oO =
120
- (std::abs(dp_oO) < std::numeric_limits<float>::epsilon())
121
- ? 1.0f
122
- : (1.0f / dp_oO);
123
-
124
- fac->or_minus_c_l2sqr = norm_L2sqr;
125
- if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
126
- fac->or_minus_c_l2sqr -= or_L2sqr;
127
- }
128
-
129
- fac->dp_multiplier = inv_dp_oO * std::sqrt(norm_L2sqr);
130
88
  }
131
89
  }
132
90
 
@@ -310,6 +268,7 @@ struct RaBitDistanceComputerQ : RaBitDistanceComputer {
310
268
 
311
269
  // the number of bits for SQ quantization of the query (qb > 0)
312
270
  uint8_t qb = 8;
271
+ bool centered = false;
313
272
  // the smallest value divisible by 8 that is not smaller than dim
314
273
  size_t popcount_aligned_dim = 0;
315
274
 
@@ -329,57 +288,35 @@ float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) {
329
288
  metric_type == MetricType::METRIC_INNER_PRODUCT));
330
289
 
331
290
  // split the code into parts
291
+ size_t size = (d + 7) / 8;
332
292
  const uint8_t* binary_data = code;
333
- const FactorsData* fac =
334
- reinterpret_cast<const FactorsData*>(code + (d + 7) / 8);
293
+ const FactorsData* fac = reinterpret_cast<const FactorsData*>(code + size);
335
294
 
336
- // // this is the baseline code
337
- // //
338
- // // compute <q,o> using integers
339
- // size_t dot_qo = 0;
340
- // for (size_t i = 0; i < d; i++) {
341
- // // extract i-th bit
342
- // const uint8_t masker = (1 << (i % 8));
343
- // const uint8_t bit = ((binary_data[i / 8] & masker) == masker) ? 1 :
344
- // 0;
345
- //
346
- // // accumulate dp
347
- // dot_qo += bit * rotated_qq[i];
348
- // }
349
-
350
- // this is the scheme for popcount
351
- const size_t di_8b = (d + 7) / 8;
352
- const size_t di_64b = (di_8b / 8) * 8;
353
-
354
- // Use the optimized popcount function from rabitq_simd.h
355
- float dot_qo =
356
- rabitq_dp_popcnt(rearranged_rotated_qq.data(), binary_data, d, qb);
295
+ // this is ||or - c||^2 - (IP ? ||or||^2 : 0)
296
+ float final_dot = 0;
297
+ if (centered) {
298
+ int64_t int_dot = ((1 << qb) - 1) * d;
299
+ int_dot -= 2 *
300
+ rabitq::bitwise_xor_dot_product(
301
+ rearranged_rotated_qq.data(), binary_data, size, qb);
302
+ final_dot += int_dot * query_fac.int_dot_scale;
303
+ } else {
304
+ // See RaBitDistanceComputerNotQ::distance_to_code() for baseline code.
305
+ auto dot_qo = rabitq::bitwise_and_dot_product(
306
+ rearranged_rotated_qq.data(), binary_data, size, qb);
357
307
 
358
- // It was a willful decision (after the discussion) to not to pre-cache
359
- // the sum of all bits, just in order to reduce the overhead per vector.
360
- uint64_t sum_q = 0;
361
- {
308
+ // It was a willful decision (after the discussion) to not to pre-cache
309
+ // the sum of all bits, just in order to reduce the overhead per vector.
362
310
  // process 64-bit popcounts
363
- for (size_t i = 0; i < di_64b; i += 8) {
364
- const auto yv = *(const uint64_t*)(binary_data + i);
365
- sum_q += __builtin_popcountll(yv);
366
- }
367
-
368
- // process leftovers
369
- for (size_t i = di_64b; i < di_8b; i++) {
370
- const auto yv = *(binary_data + i);
371
- sum_q += __builtin_popcount(yv);
372
- }
311
+ auto sum_q = rabitq::popcount(binary_data, size);
312
+ // dot-product itself
313
+ final_dot += query_fac.c1 * dot_qo;
314
+ // normalizer coefficients
315
+ final_dot += query_fac.c2 * sum_q;
316
+ // normalizer coefficients
317
+ final_dot -= query_fac.c34;
373
318
  }
374
319
 
375
- float final_dot = 0;
376
- // dot-product itself
377
- final_dot += query_fac.c1 * dot_qo;
378
- // normalizer coefficients
379
- final_dot += query_fac.c2 * sum_q;
380
- // normalizer coefficients
381
- final_dot -= query_fac.c34;
382
-
383
320
  // this is ||or - c||^2 - (IP ? ||or||^2 : 0)
384
321
  const float or_c_l2sqr = fac->or_minus_c_l2sqr;
385
322
 
@@ -402,57 +339,23 @@ float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) {
402
339
  }
403
340
  }
404
341
 
342
+ // Use shared constant from RaBitQUtils
343
+ using rabitq_utils::Z_MAX_BY_QB;
344
+
405
345
  void RaBitDistanceComputerQ::set_query(const float* x) {
406
346
  FAISS_ASSERT(x != nullptr);
407
347
  FAISS_ASSERT(
408
348
  (metric_type == MetricType::METRIC_L2 ||
409
349
  metric_type == MetricType::METRIC_INNER_PRODUCT));
350
+ FAISS_THROW_IF_NOT(qb <= 8);
351
+ FAISS_THROW_IF_NOT(qb > 0);
410
352
 
411
- // compute the distance from the query to the centroid
412
- if (centroid != nullptr) {
413
- query_fac.qr_to_c_L2sqr = fvec_L2sqr(x, centroid, d);
414
- } else {
415
- query_fac.qr_to_c_L2sqr = fvec_norm_L2sqr(x, d);
416
- }
417
-
418
- // allocate space
419
- rotated_qq.resize(d);
420
-
421
- // rotate the query
422
- std::vector<float> rotated_q(d);
423
- for (size_t i = 0; i < d; i++) {
424
- rotated_q[i] = x[i] - ((centroid == nullptr) ? 0 : centroid[i]);
425
- }
426
-
427
- // compute some numbers
428
- const float inv_d = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
429
-
430
- // quantize the query. compute min and max
431
- float v_min = std::numeric_limits<float>::max();
432
- float v_max = std::numeric_limits<float>::lowest();
433
- for (size_t i = 0; i < d; i++) {
434
- const float v_q = rotated_q[i];
435
- v_min = std::min(v_min, v_q);
436
- v_max = std::max(v_max, v_q);
437
- }
438
-
439
- const float pow_2_qb = 1 << qb;
440
-
441
- const float delta = (v_max - v_min) / (pow_2_qb - 1);
442
- const float inv_delta = 1.0f / delta;
443
-
444
- size_t sum_qq = 0;
445
- for (int32_t i = 0; i < d; i++) {
446
- const float v_q = rotated_q[i];
447
-
448
- // a default non-randomized SQ
449
- const int v_qq = std::round((v_q - v_min) * inv_delta);
450
-
451
- rotated_qq[i] = std::min(255, std::max(0, v_qq));
452
- sum_qq += v_qq;
453
- }
353
+ // Use shared utilities for core query factor computation
354
+ std::vector<float> rotated_q;
355
+ query_fac = rabitq_utils::compute_query_factors(
356
+ x, d, centroid, qb, centered, metric_type, rotated_q, rotated_qq);
454
357
 
455
- // rearrange the query vector
358
+ // Rearrange the query vector for SIMD operations (RaBitQuantizer-specific)
456
359
  popcount_aligned_dim = ((d + 7) / 8) * 8;
457
360
  size_t offset = (d + 7) / 8;
458
361
 
@@ -466,20 +369,12 @@ void RaBitDistanceComputerQ::set_query(const float* x) {
466
369
  bit ? (1 << (idim % 8)) : 0;
467
370
  }
468
371
  }
469
-
470
- query_fac.c1 = 2 * delta * inv_d;
471
- query_fac.c2 = 2 * v_min * inv_d;
472
- query_fac.c34 = inv_d * (delta * sum_qq + d * v_min);
473
-
474
- if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
475
- // precompute if needed
476
- query_fac.qr_norm_L2sqr = fvec_norm_L2sqr(x, d);
477
- }
478
372
  }
479
373
 
480
374
  FlatCodesDistanceComputer* RaBitQuantizer::get_distance_computer(
481
375
  uint8_t qb,
482
- const float* centroid_in) const {
376
+ const float* centroid_in,
377
+ bool centered) const {
483
378
  if (qb == 0) {
484
379
  auto dc = std::make_unique<RaBitDistanceComputerNotQ>();
485
380
  dc->metric_type = metric_type;
@@ -493,6 +388,7 @@ FlatCodesDistanceComputer* RaBitQuantizer::get_distance_computer(
493
388
  dc->d = d;
494
389
  dc->centroid = centroid_in;
495
390
  dc->qb = qb;
391
+ dc->centered = centered;
496
392
 
497
393
  return dc.release();
498
394
  }
@@ -72,7 +72,8 @@ struct RaBitQuantizer : Quantizer {
72
72
  // specify qb > 0 to have SQ qb-bits query
73
73
  FlatCodesDistanceComputer* get_distance_computer(
74
74
  uint8_t qb,
75
- const float* centroid_in = nullptr) const;
75
+ const float* centroid_in = nullptr,
76
+ bool centered = false) const;
76
77
  };
77
78
 
78
79
  } // namespace faiss
@@ -49,7 +49,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
49
49
  * first element of the beam (faster but less accurate) */
50
50
  static const int Train_top_beam = 1024;
51
51
 
52
- /** set this bit to *not* autmatically compute the codebook tables
52
+ /** set this bit to *not* automatically compute the codebook tables
53
53
  * after training */
54
54
  static const int Skip_codebook_tables = 2048;
55
55
 
@@ -26,11 +26,11 @@ namespace faiss {
26
26
  * The classes below are intended to be used as template arguments
27
27
  * they handle results for batches of queries (size nq).
28
28
  * They can be called in two ways:
29
- * - by instanciating a SingleResultHandler that tracks results for a single
29
+ * - by instantiating a SingleResultHandler that tracks results for a single
30
30
  * query
31
31
  * - with begin_multiple/add_results/end_multiple calls where a whole block of
32
32
  * results is submitted
33
- * All classes are templated on C which to define wheter the min or the max of
33
+ * All classes are templated on C which to define whether the min or the max of
34
34
  * results is to be kept, and on sel, so that the codepaths for with / without
35
35
  * selector can be separated at compile time.
36
36
  *****************************************************************/
@@ -306,7 +306,7 @@ struct HeapBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
306
306
  *
307
307
  * A reservoir is a result array of size capacity > n (number of requested
308
308
  * results) all results below a threshold are stored in an arbitrary order.
309
- *When the capacity is reached, a new threshold is chosen by partitionning
309
+ *When the capacity is reached, a new threshold is chosen by partitioning
310
310
  *the distance array.
311
311
  *****************************************************************/
312
312
 
@@ -572,7 +572,7 @@ struct RangeSearchBlockResultHandler : BlockResultHandler<C, use_sel> {
572
572
  RangeSearchPartialResult* pres;
573
573
  // there is one RangeSearchPartialResult structure per j0
574
574
  // (= block of columns of the large distance matrix)
575
- // it is a bit tricky to find the poper PartialResult structure
575
+ // it is a bit tricky to find the proper PartialResult structure
576
576
  // because the inner loop is on db not on queries.
577
577
 
578
578
  if (pr < j0s.size() && j0 == j0s[pr]) {
@@ -321,7 +321,7 @@ struct Codec6bit {
321
321
  static FAISS_ALWAYS_INLINE __m256
322
322
  decode_8_components(const uint8_t* code, int i) {
323
323
  // // Faster code for Intel CPUs or AMD Zen3+, just keeping it here
324
- // // for the reference, maybe, it becomes used oned day.
324
+ // // for the reference, maybe, it becomes used one day.
325
325
  // const uint16_t* data16 = (const uint16_t*)(code + (i >> 2) * 3);
326
326
  // const uint32_t* data32 = (const uint32_t*)data16;
327
327
  // const uint64_t val = *data32 + ((uint64_t)data16[2] << 32);
@@ -40,7 +40,7 @@ struct ScalarQuantizer : Quantizer {
40
40
  QuantizerType qtype = QT_8bit;
41
41
 
42
42
  /** The uniform encoder can estimate the range of representable
43
- * values of the unform encoder using different statistics. Here
43
+ * values of the uniform encoder using different statistics. Here
44
44
  * rs = rangestat_arg */
45
45
 
46
46
  // rangestat_arg.
@@ -5,6 +5,8 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
+ #pragma once
9
+
8
10
  #include <faiss/impl/FaissAssert.h>
9
11
  #include <exception>
10
12
  #include <iostream>
@@ -75,10 +77,11 @@ void ThreadedIndex<IndexT>::addIndex(IndexT* index) {
75
77
  }
76
78
  }
77
79
 
78
- indices_.emplace_back(std::make_pair(
79
- index,
80
- std::unique_ptr<WorkerThread>(
81
- isThreaded_ ? new WorkerThread : nullptr)));
80
+ indices_.emplace_back(
81
+ std::make_pair(
82
+ index,
83
+ std::unique_ptr<WorkerThread>(
84
+ isThreaded_ ? new WorkerThread : nullptr)));
82
85
 
83
86
  onAfterAddIndex(index);
84
87
  }
@@ -29,11 +29,13 @@
29
29
  #include <faiss/IndexIVFAdditiveQuantizer.h>
30
30
  #include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
31
31
  #include <faiss/IndexIVFFlat.h>
32
+ #include <faiss/IndexIVFFlatPanorama.h>
32
33
  #include <faiss/IndexIVFIndependentQuantizer.h>
33
34
  #include <faiss/IndexIVFPQ.h>
34
35
  #include <faiss/IndexIVFPQFastScan.h>
35
36
  #include <faiss/IndexIVFPQR.h>
36
37
  #include <faiss/IndexIVFRaBitQ.h>
38
+ #include <faiss/IndexIVFRaBitQFastScan.h>
37
39
  #include <faiss/IndexIVFSpectralHash.h>
38
40
  #include <faiss/IndexLSH.h>
39
41
  #include <faiss/IndexLattice.h>
@@ -43,6 +45,7 @@
43
45
  #include <faiss/IndexPQFastScan.h>
44
46
  #include <faiss/IndexPreTransform.h>
45
47
  #include <faiss/IndexRaBitQ.h>
48
+ #include <faiss/IndexRaBitQFastScan.h>
46
49
  #include <faiss/IndexRefine.h>
47
50
  #include <faiss/IndexRowwiseMinMax.h>
48
51
  #include <faiss/IndexScalarQuantizer.h>
@@ -68,9 +71,10 @@ namespace faiss {
68
71
  **************************************************************/
69
72
 
70
73
  // This is a baseline functionality for reading mmapped and zerocopied vector.
71
- // * if `beforeknown_size` is defined, then a size of the vector won't be read.
74
+ // * if `beforeknown_size` is defined, then a size of the vector won't be
75
+ // read.
72
76
  // * if `size_multiplier` is defined, then a size will be multiplied by it.
73
- // * returns true is the case was handled; ownerwise, false
77
+ // * returns true is the case was handled; otherwise, false
74
78
  template <typename VectorT>
75
79
  bool read_vector_base(
76
80
  VectorT& target,
@@ -181,7 +185,7 @@ void read_vector(VectorT& target, IOReader* f) {
181
185
  // a replacement for READXBVECTOR
182
186
  template <typename VectorT>
183
187
  void read_xb_vector(VectorT& target, IOReader* f) {
184
- // size is not known beforehand, nultiply the size 4x
188
+ // size is not known beforehand, multiply the size 4x
185
189
  if (read_vector_base<VectorT>(target, f, std::nullopt, 4)) {
186
190
  return;
187
191
  }
@@ -325,6 +329,34 @@ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
325
329
  "read_InvertedLists:"
326
330
  " WARN! inverted lists not stored with IVF object\n");
327
331
  return nullptr;
332
+ } else if (h == fourcc("ilpn") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
333
+ size_t nlist, code_size, n_levels;
334
+ READ1(nlist);
335
+ READ1(code_size);
336
+ READ1(n_levels);
337
+ auto ailp = new ArrayInvertedListsPanorama(nlist, code_size, n_levels);
338
+ std::vector<size_t> sizes(nlist);
339
+ read_ArrayInvertedLists_sizes(f, sizes);
340
+ for (size_t i = 0; i < nlist; i++) {
341
+ ailp->ids[i].resize(sizes[i]);
342
+ size_t num_elems =
343
+ ((sizes[i] + ArrayInvertedListsPanorama::kBatchSize - 1) /
344
+ ArrayInvertedListsPanorama::kBatchSize) *
345
+ ArrayInvertedListsPanorama::kBatchSize;
346
+ ailp->codes[i].resize(num_elems * code_size);
347
+ ailp->cum_sums[i].resize(num_elems * (n_levels + 1));
348
+ }
349
+ for (size_t i = 0; i < nlist; i++) {
350
+ size_t n = sizes[i];
351
+ if (n > 0) {
352
+ read_vector_with_known_size(
353
+ ailp->codes[i], f, ailp->codes[i].size());
354
+ read_vector_with_known_size(ailp->ids[i], f, n);
355
+ read_vector_with_known_size(
356
+ ailp->cum_sums[i], f, ailp->cum_sums[i].size());
357
+ }
358
+ }
359
+ return ailp;
328
360
  } else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
329
361
  auto ails = new ArrayInvertedLists(0, 0);
330
362
  READ1(ails->nlist);
@@ -927,6 +959,13 @@ Index* read_index(IOReader* f, int io_flags) {
927
959
  }
928
960
  read_InvertedLists(ivfl, f, io_flags);
929
961
  idx = ivfl;
962
+ } else if (h == fourcc("IwPn")) {
963
+ IndexIVFFlatPanorama* ivfp = new IndexIVFFlatPanorama();
964
+ read_ivf_header(ivfp, f);
965
+ ivfp->code_size = ivfp->d * sizeof(float);
966
+ READ1(ivfp->n_levels);
967
+ read_InvertedLists(ivfp, f, io_flags);
968
+ idx = ivfp;
930
969
  } else if (h == fourcc("IwFl")) {
931
970
  IndexIVFFlat* ivfl = new IndexIVFFlat();
932
971
  read_ivf_header(ivfl, f);
@@ -1224,6 +1263,27 @@ Index* read_index(IOReader* f, int io_flags) {
1224
1263
  imm->own_fields = true;
1225
1264
 
1226
1265
  idx = imm;
1266
+ } else if (h == fourcc("Irfs")) {
1267
+ IndexRaBitQFastScan* idxqfs = new IndexRaBitQFastScan();
1268
+ read_index_header(idxqfs, f);
1269
+ read_RaBitQuantizer(&idxqfs->rabitq, f);
1270
+ READVECTOR(idxqfs->center);
1271
+ READ1(idxqfs->qb);
1272
+ READVECTOR(idxqfs->factors_storage);
1273
+ READ1(idxqfs->bbs);
1274
+ READ1(idxqfs->ntotal2);
1275
+ READ1(idxqfs->M2);
1276
+ READ1(idxqfs->code_size);
1277
+
1278
+ // Need to initialize the FastScan base class fields
1279
+ const size_t M_fastscan = (idxqfs->d + 3) / 4;
1280
+ constexpr size_t nbits_fastscan = 4;
1281
+ idxqfs->M = M_fastscan;
1282
+ idxqfs->nbits = nbits_fastscan;
1283
+ idxqfs->ksub = (1 << nbits_fastscan);
1284
+
1285
+ READVECTOR(idxqfs->codes);
1286
+ idx = idxqfs;
1227
1287
  } else if (h == fourcc("Ixrq")) {
1228
1288
  IndexRaBitQ* idxq = new IndexRaBitQ();
1229
1289
  read_index_header(idxq, f);
@@ -1242,6 +1302,30 @@ Index* read_index(IOReader* f, int io_flags) {
1242
1302
  READ1(ivrq->qb);
1243
1303
  read_InvertedLists(ivrq, f, io_flags);
1244
1304
  idx = ivrq;
1305
+ } else if (h == fourcc("Iwrf")) {
1306
+ IndexIVFRaBitQFastScan* ivrqfs = new IndexIVFRaBitQFastScan();
1307
+ read_ivf_header(ivrqfs, f);
1308
+ read_RaBitQuantizer(&ivrqfs->rabitq, f);
1309
+ READ1(ivrqfs->by_residual);
1310
+ READ1(ivrqfs->code_size);
1311
+ READ1(ivrqfs->bbs);
1312
+ READ1(ivrqfs->qbs2);
1313
+ READ1(ivrqfs->M2);
1314
+ READ1(ivrqfs->implem);
1315
+ READ1(ivrqfs->qb);
1316
+ READ1(ivrqfs->centered);
1317
+ READVECTOR(ivrqfs->factors_storage);
1318
+
1319
+ // Initialize FastScan base class fields
1320
+ const size_t M_fastscan = (ivrqfs->d + 3) / 4;
1321
+ constexpr size_t nbits_fastscan = 4;
1322
+ ivrqfs->M = M_fastscan;
1323
+ ivrqfs->nbits = nbits_fastscan;
1324
+ ivrqfs->ksub = (1 << nbits_fastscan);
1325
+
1326
+ read_InvertedLists(ivrqfs, f, io_flags);
1327
+ ivrqfs->init_code_packer();
1328
+ idx = ivrqfs;
1245
1329
  } else {
1246
1330
  FAISS_THROW_FMT(
1247
1331
  "Index type 0x%08x (\"%s\") not recognized",