faiss 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/ext/faiss/index.cpp +36 -10
  4. data/ext/faiss/index_binary.cpp +19 -6
  5. data/ext/faiss/kmeans.cpp +6 -6
  6. data/ext/faiss/numo.hpp +273 -123
  7. data/lib/faiss/version.rb +1 -1
  8. data/vendor/faiss/faiss/AutoTune.cpp +2 -3
  9. data/vendor/faiss/faiss/AutoTune.h +1 -1
  10. data/vendor/faiss/faiss/Clustering.cpp +2 -2
  11. data/vendor/faiss/faiss/Clustering.h +2 -2
  12. data/vendor/faiss/faiss/IVFlib.cpp +1 -2
  13. data/vendor/faiss/faiss/IVFlib.h +1 -1
  14. data/vendor/faiss/faiss/Index.h +10 -10
  15. data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
  16. data/vendor/faiss/faiss/Index2Layer.h +2 -2
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
  19. data/vendor/faiss/faiss/IndexBinary.h +7 -7
  20. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
  22. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  23. data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
  24. data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
  26. data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
  27. data/vendor/faiss/faiss/IndexFastScan.h +107 -7
  28. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  29. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
  30. data/vendor/faiss/faiss/IndexHNSW.h +1 -1
  31. data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
  32. data/vendor/faiss/faiss/IndexIDMap.h +6 -6
  33. data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
  34. data/vendor/faiss/faiss/IndexIVF.h +5 -5
  35. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
  36. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
  37. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
  38. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
  39. data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
  40. data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
  41. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
  42. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
  43. data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
  44. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  45. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
  46. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
  47. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
  48. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
  49. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
  50. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
  51. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  53. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  54. data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
  55. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
  56. data/vendor/faiss/faiss/IndexPQ.h +1 -1
  57. data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
  58. data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
  59. data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
  60. data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
  61. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
  62. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
  63. data/vendor/faiss/faiss/IndexShards.cpp +1 -1
  64. data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
  65. data/vendor/faiss/faiss/MetricType.h +1 -1
  66. data/vendor/faiss/faiss/VectorTransform.h +2 -2
  67. data/vendor/faiss/faiss/clone_index.cpp +3 -1
  68. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  69. data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
  70. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
  71. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
  72. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
  73. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
  74. data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
  75. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
  76. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
  77. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
  78. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
  79. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
  80. data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
  81. data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
  82. data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
  83. data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
  84. data/vendor/faiss/faiss/impl/HNSW.h +4 -4
  85. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
  86. data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
  87. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
  88. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
  89. data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
  90. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
  91. data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
  92. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  93. data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
  94. data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
  95. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
  96. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
  97. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  98. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
  99. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  100. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
  101. data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
  102. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
  103. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
  104. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  105. data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
  106. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
  107. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
  108. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
  109. data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
  110. data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
  111. data/vendor/faiss/faiss/impl/io.cpp +2 -2
  112. data/vendor/faiss/faiss/impl/io.h +4 -4
  113. data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
  114. data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
  115. data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
  116. data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
  117. data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
  118. data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
  119. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
  120. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
  121. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
  122. data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
  123. data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
  124. data/vendor/faiss/faiss/index_factory.cpp +43 -1
  125. data/vendor/faiss/faiss/index_factory.h +1 -1
  126. data/vendor/faiss/faiss/index_io.h +1 -1
  127. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
  128. data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
  129. data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
  130. data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
  131. data/vendor/faiss/faiss/utils/Heap.h +3 -3
  132. data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
  133. data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
  134. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
  135. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
  136. data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
  137. data/vendor/faiss/faiss/utils/distances.h +2 -2
  138. data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
  139. data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
  140. data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
  141. data/vendor/faiss/faiss/utils/hamming.h +1 -1
  142. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
  143. data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
  144. data/vendor/faiss/faiss/utils/partitioning.h +2 -2
  145. data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
  146. data/vendor/faiss/faiss/utils/random.cpp +1 -1
  147. data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
  148. data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
  149. data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
  150. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
  151. data/vendor/faiss/faiss/utils/utils.cpp +5 -2
  152. data/vendor/faiss/faiss/utils/utils.h +2 -2
  153. metadata +14 -3
@@ -0,0 +1,149 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <vector>
11
+
12
+ #include <faiss/IndexFastScan.h>
13
+ #include <faiss/IndexRaBitQ.h>
14
+ #include <faiss/impl/RaBitQUtils.h>
15
+ #include <faiss/impl/RaBitQuantizer.h>
16
+ #include <faiss/impl/simd_result_handlers.h>
17
+ #include <faiss/utils/Heap.h>
18
+ #include <faiss/utils/simdlib.h>
19
+
20
+ namespace faiss {
21
+
22
+ // Import shared utilities from RaBitQUtils
23
+ using rabitq_utils::FactorsData;
24
+ using rabitq_utils::QueryFactorsData;
25
+
26
+ /** Fast-scan version of RaBitQ index that processes 32 database vectors at a
27
+ * time using SIMD operations. Similar to IndexPQFastScan but adapted for
28
+ * RaBitQ's bit-level quantization with factors.
29
+ *
30
+ * The key differences from IndexRaBitQ:
31
+ * - Processes vectors in batches of 32
32
+ * - Uses 4-bit groupings for SIMD optimization (4 dimensions per 4-bit unit)
33
+ * - Separates factors from quantized bits for efficient processing
34
+ * - Leverages existing PQ4 FastScan infrastructure where possible
35
+ */
36
+ struct IndexRaBitQFastScan : IndexFastScan {
37
+ /// RaBitQ quantizer for encoding/decoding
38
+ RaBitQuantizer rabitq;
39
+
40
+ /// Center of all points (same as IndexRaBitQ)
41
+ std::vector<float> center;
42
+
43
+ /// Extracted factors storage for batch processing
44
+ /// Size: ntotal, stores factors separately from packed codes
45
+ std::vector<FactorsData> factors_storage;
46
+
47
+ /// Default number of bits to quantize a query with
48
+ uint8_t qb = 8;
49
+
50
+ // quantize the query with a zero-centered scalar quantizer.
51
+ bool centered = false;
52
+
53
+ IndexRaBitQFastScan();
54
+
55
+ explicit IndexRaBitQFastScan(
56
+ idx_t d,
57
+ MetricType metric = METRIC_L2,
58
+ int bbs = 32);
59
+
60
+ /// build from an existing IndexRaBitQ
61
+ explicit IndexRaBitQFastScan(const IndexRaBitQ& orig, int bbs = 32);
62
+
63
+ void train(idx_t n, const float* x) override;
64
+
65
+ void add(idx_t n, const float* x) override;
66
+
67
+ void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
68
+
69
+ void compute_float_LUT(
70
+ float* lut,
71
+ idx_t n,
72
+ const float* x,
73
+ const FastScanDistancePostProcessing& context) const override;
74
+
75
+ void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
76
+
77
+ void search(
78
+ idx_t n,
79
+ const float* x,
80
+ idx_t k,
81
+ float* distances,
82
+ idx_t* labels,
83
+ const SearchParameters* params = nullptr) const override;
84
+
85
+ /// Override to create RaBitQ-specific handlers
86
+ void* make_knn_handler(
87
+ bool is_max,
88
+ int /*impl*/,
89
+ idx_t n,
90
+ idx_t k,
91
+ size_t /*ntotal*/,
92
+ float* distances,
93
+ idx_t* labels,
94
+ const IDSelector* sel,
95
+ const FastScanDistancePostProcessing& context) const override;
96
+ };
97
+
98
+ /** SIMD result handler for RaBitQ FastScan that applies distance corrections
99
+ * and maintains heaps directly during SIMD operations.
100
+ *
101
+ * This handler processes batches of 32 distance computations from SIMD kernels,
102
+ * applies RaBitQ-specific adjustments (factors and normalizers), and
103
+ * immediately updates result heaps without intermediate storage. This
104
+ * eliminates the need for post-processing and provides significant memory and
105
+ * performance benefits.
106
+ *
107
+ * Key optimizations:
108
+ * - Direct heap integration (no intermediate result storage)
109
+ * - Batch-level computation of normalizers and query factors
110
+ * - Preserves exact mathematical equivalence to original RaBitQ distances
111
+ * @tparam C Comparator type (CMin/CMax) for heap operations
112
+ * @tparam with_id_map Whether to use id mapping (similar to HeapHandler)
113
+ */
114
+ template <class C, bool with_id_map = false>
115
+ struct RaBitQHeapHandler
116
+ : simd_result_handlers::ResultHandlerCompare<C, with_id_map> {
117
+ using RHC = simd_result_handlers::ResultHandlerCompare<C, with_id_map>;
118
+ using RHC::normalizers;
119
+
120
+ const IndexRaBitQFastScan* rabitq_index;
121
+ float* heap_distances; // [nq * k]
122
+ int64_t* heap_labels; // [nq * k]
123
+ const size_t nq, k;
124
+ const FastScanDistancePostProcessing&
125
+ context; // Processing context with query offset
126
+
127
+ // Use float-based comparator for heap operations
128
+ using Cfloat = typename std::conditional<
129
+ C::is_max,
130
+ CMax<float, int64_t>,
131
+ CMin<float, int64_t>>::type;
132
+
133
+ RaBitQHeapHandler(
134
+ const IndexRaBitQFastScan* index,
135
+ size_t nq_val,
136
+ size_t k_val,
137
+ float* distances,
138
+ int64_t* labels,
139
+ const IDSelector* sel_in,
140
+ const FastScanDistancePostProcessing& context);
141
+
142
+ void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) final;
143
+
144
+ void begin(const float* norms);
145
+
146
+ void end();
147
+ };
148
+
149
+ } // namespace faiss
@@ -264,7 +264,7 @@ void IndexShardsTemplate<IndexT>::search(
264
264
  }
265
265
  }
266
266
 
267
- // explicit instanciations
267
+ // explicit instantiations
268
268
  template struct IndexShardsTemplate<Index>;
269
269
  template struct IndexShardsTemplate<IndexBinary>;
270
270
 
@@ -77,7 +77,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
77
77
  if (d > 1024) {
78
78
  do_comment(
79
79
  "indexing this many dimensions is hard, "
80
- "please consider dimensionality reducution (with PCAMatrix)\n");
80
+ "please consider dimensionality reduction (with PCAMatrix)\n");
81
81
  }
82
82
 
83
83
  hash_value = hash_bytes((const uint8_t*)x, n * d * sizeof(*x));
@@ -125,7 +125,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
125
125
  }
126
126
  }
127
127
 
128
- // invalid vecor stats
128
+ // invalid vector stats
129
129
  if (n_valid == n) {
130
130
  do_comment("no NaN or Infs in data\n");
131
131
  } else {
@@ -229,7 +229,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
229
229
  } else {
230
230
  do_comment(
231
231
  "%zd dimensions are too large "
232
- "wrt. their variance, may loose precision "
232
+ "wrt. their variance, may lose precision "
233
233
  "in IndexFlatL2 (use CenteringTransform)\n",
234
234
  n_dangerous_range);
235
235
  }
@@ -35,7 +35,7 @@ enum MetricType {
35
35
 
36
36
  /// sum_i(min(a_i, b_i)) / sum_i(max(a_i, b_i)) where a_i, b_i > 0
37
37
  METRIC_Jaccard,
38
- /// Squared Eucliden distance, ignoring NaNs
38
+ /// Squared Euclidean distance, ignoring NaNs
39
39
  METRIC_NaNEuclidean,
40
40
  /// Gower's distance - numeric dimensions are in [0,1] and categorical
41
41
  /// dimensions are negative integers
@@ -37,7 +37,7 @@ struct VectorTransform {
37
37
  * nothing by default.
38
38
  *
39
39
  * @param n nb of training vectors
40
- * @param x training vecors, size n * d
40
+ * @param x training vectors, size n * d
41
41
  */
42
42
  virtual void train(idx_t n, const float* x);
43
43
 
@@ -249,7 +249,7 @@ struct OPQMatrix : LinearTransform {
249
249
  void train(idx_t n, const float* x) override;
250
250
  };
251
251
 
252
- /** remap dimensions for intput vectors, possibly inserting 0s
252
+ /** remap dimensions for input vectors, possibly inserting 0s
253
253
  * strictly speaking this is also a linear transform but we don't want
254
254
  * to compute it with matrix multiplies */
255
255
  struct RemapDimensionsTransform : VectorTransform {
@@ -23,6 +23,7 @@
23
23
  #include <faiss/IndexIVF.h>
24
24
  #include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
25
25
  #include <faiss/IndexIVFFlat.h>
26
+ #include <faiss/IndexIVFFlatPanorama.h>
26
27
  #include <faiss/IndexIVFPQ.h>
27
28
  #include <faiss/IndexIVFPQFastScan.h>
28
29
  #include <faiss/IndexIVFPQR.h>
@@ -97,6 +98,7 @@ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
97
98
 
98
99
  TRYCLONE(IndexIVFFlatDedup, ivf)
99
100
  TRYCLONE(IndexIVFFlat, ivf)
101
+ TRYCLONE(IndexIVFFlatPanorama, ivf)
100
102
 
101
103
  TRYCLONE(IndexIVFSpectralHash, ivf)
102
104
 
@@ -152,7 +154,7 @@ IndexNSG* clone_IndexNSG(const IndexNSG* insg) {
152
154
  TRYCLONE(IndexNSGPQ, insg)
153
155
  TRYCLONE(IndexNSGSQ, insg)
154
156
  TRYCLONE(IndexNSG, insg) {
155
- FAISS_THROW_MSG("clone not supported for this type of IndexNNDescent");
157
+ FAISS_THROW_MSG("clone not supported for this type of IndexNSG");
156
158
  }
157
159
  }
158
160
 
@@ -239,7 +239,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
239
239
  config.device = device;
240
240
  GpuIndexCagra* res =
241
241
  new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
242
- res->copyFromEx(icg, icg->get_numeric_type());
242
+ res->copyFrom_ex(icg, icg->get_numeric_type());
243
243
  return res;
244
244
  }
245
245
  #endif
@@ -77,13 +77,13 @@ class GpuIndex : public faiss::Index {
77
77
  /// as needed
78
78
  /// Handles paged adds if the add set is too large; calls addInternal_
79
79
  void add(idx_t, const float* x) override;
80
- void addEx(idx_t, const void* x, NumericType numeric_type) override;
80
+ void add_ex(idx_t, const void* x, NumericType numeric_type) override;
81
81
 
82
82
  /// `x` and `ids` can be resident on the CPU or any GPU; copies are
83
83
  /// performed as needed
84
84
  /// Handles paged adds if the add set is too large; calls addInternal_
85
85
  void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
86
- void add_with_idsEx(
86
+ void add_with_ids_ex(
87
87
  idx_t n,
88
88
  const void* x,
89
89
  NumericType numeric_type,
@@ -103,7 +103,7 @@ class GpuIndex : public faiss::Index {
103
103
  float* distances,
104
104
  idx_t* labels,
105
105
  const SearchParameters* params = nullptr) const override;
106
- void searchEx(
106
+ void search_ex(
107
107
  idx_t n,
108
108
  const void* x,
109
109
  NumericType numeric_type,
@@ -139,7 +139,7 @@ class GpuIndex : public faiss::Index {
139
139
  protected:
140
140
  /// Copy what we need from the CPU equivalent
141
141
  void copyFrom(const faiss::Index* index);
142
- void copyFromEx(const faiss::Index* index, NumericType numeric_type) {
142
+ void copyFrom_ex(const faiss::Index* index, NumericType numeric_type) {
143
143
  if (numeric_type == NumericType::Float32) {
144
144
  copyFrom(index);
145
145
  } else {
@@ -149,7 +149,7 @@ class GpuIndex : public faiss::Index {
149
149
 
150
150
  /// Copy what we have to the CPU equivalent
151
151
  void copyTo(faiss::Index* index) const;
152
- void copyToEx(faiss::Index* index, NumericType numeric_type) {
152
+ void copyTo_ex(faiss::Index* index, NumericType numeric_type) {
153
153
  if (numeric_type == NumericType::Float32) {
154
154
  copyTo(index);
155
155
  } else {
@@ -165,7 +165,7 @@ class GpuIndex : public faiss::Index {
165
165
  /// All data is guaranteed to be resident on our device
166
166
  virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
167
167
 
168
- virtual void addImplEx_(
168
+ virtual void addImpl_ex_(
169
169
  idx_t n,
170
170
  const void* x,
171
171
  NumericType numeric_type,
@@ -187,7 +187,7 @@ class GpuIndex : public faiss::Index {
187
187
  idx_t* labels,
188
188
  const SearchParameters* params) const = 0;
189
189
 
190
- virtual void searchImplEx_(
190
+ virtual void searchImpl_ex_(
191
191
  idx_t n,
192
192
  const void* x,
193
193
  NumericType numeric_type,
@@ -212,7 +212,7 @@ class GpuIndex : public faiss::Index {
212
212
  /// Handles paged adds if the add set is too large, passes to
213
213
  /// addImpl_ to actually perform the add for the current page
214
214
  void addPaged_(idx_t n, const float* x, const idx_t* ids);
215
- void addPagedEx_(
215
+ void addPaged_ex_(
216
216
  idx_t n,
217
217
  const void* x,
218
218
  NumericType numeric_type,
@@ -220,7 +220,7 @@ class GpuIndex : public faiss::Index {
220
220
 
221
221
  /// Calls addImpl_ for a single page of GPU-resident data
222
222
  void addPage_(idx_t n, const float* x, const idx_t* ids);
223
- void addPageEx_(
223
+ void addPage_ex_(
224
224
  idx_t n,
225
225
  const void* x,
226
226
  NumericType numeric_type,
@@ -235,7 +235,7 @@ class GpuIndex : public faiss::Index {
235
235
  idx_t* outIndicesData,
236
236
  const SearchParameters* params) const;
237
237
 
238
- void searchNonPagedEx_(
238
+ void searchNonPaged_ex_(
239
239
  idx_t n,
240
240
  const void* x,
241
241
  NumericType numeric_type,
@@ -253,7 +253,7 @@ class GpuIndex : public faiss::Index {
253
253
  float* outDistancesData,
254
254
  idx_t* outIndicesData,
255
255
  const SearchParameters* params) const;
256
- void searchFromCpuPagedEx_(
256
+ void searchFromCpuPaged_ex_(
257
257
  idx_t n,
258
258
  const void* x,
259
259
  NumericType numeric_type,
@@ -111,7 +111,7 @@ struct GpuIndexBinaryCagra : public IndexBinary {
111
111
  const SearchParameters* search_params) const;
112
112
 
113
113
  protected:
114
- /// Manages streans, cuBLAS handles and scratch memory for devices
114
+ /// Manages streams, cuBLAS handles and scratch memory for devices
115
115
  std::shared_ptr<GpuResources> resources_;
116
116
 
117
117
  /// Configuration options
@@ -86,7 +86,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
86
86
  idx_t* outIndicesData) const;
87
87
 
88
88
  protected:
89
- /// Manages streans, cuBLAS handles and scratch memory for devices
89
+ /// Manages streams, cuBLAS handles and scratch memory for devices
90
90
  std::shared_ptr<GpuResources> resources_;
91
91
 
92
92
  /// Configuration options
@@ -135,7 +135,7 @@ struct IVFPQSearchCagraConfig {
135
135
  ///
136
136
  /// The use of low-precision types reduces the amount of shared memory
137
137
  /// required at search time, so fast shared memory kernels can be used even
138
- /// for datasets with large dimansionality. Note that the recall is slightly
138
+ /// for datasets with large dimensionality. Note that the recall is slightly
139
139
  /// degraded when low-precision type is selected.
140
140
 
141
141
  cudaDataType_t lut_dtype = CUDA_R_32F;
@@ -166,6 +166,10 @@ struct IVFPQSearchCagraConfig {
166
166
  /// negative effects on the search performance if tweaked incorrectly.
167
167
 
168
168
  double preferred_shmem_carveout = 1.0;
169
+
170
+ /// Set the internal batch size to improve GPU utilization at the cost of
171
+ /// larger memory footprint.
172
+ uint32_t max_internal_batch_size = 4096;
169
173
  };
170
174
 
171
175
  struct GpuIndexCagraConfig : public GpuIndexConfig {
@@ -258,7 +262,7 @@ struct GpuIndexCagra : public GpuIndex {
258
262
  /// the base dataset. Use this function when you want to add vectors with
259
263
  /// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107
260
264
  void add(idx_t n, const float* x) override;
261
- void addEx(idx_t n, const void* x, NumericType numeric_type) override;
265
+ void add_ex(idx_t n, const void* x, NumericType numeric_type) override;
262
266
 
263
267
  /// Trains CAGRA based on the given vector data.
264
268
  /// NB: The use of the train function here is to build the CAGRA graph on
@@ -266,12 +270,12 @@ struct GpuIndexCagra : public GpuIndex {
266
270
  /// of vectors (without IDs) to the index. There is no external quantizer to
267
271
  /// be trained here.
268
272
  void train(idx_t n, const float* x) override;
269
- void trainEx(idx_t n, const void* x, NumericType numeric_type) override;
273
+ void train_ex(idx_t n, const void* x, NumericType numeric_type) override;
270
274
 
271
275
  /// Initialize ourselves from the given CPU index; will overwrite
272
276
  /// all data in ourselves
273
277
  void copyFrom(const faiss::IndexHNSWCagra* index);
274
- void copyFromEx(
278
+ void copyFrom_ex(
275
279
  const faiss::IndexHNSWCagra* index,
276
280
  NumericType numeric_type);
277
281
 
@@ -289,7 +293,7 @@ struct GpuIndexCagra : public GpuIndex {
289
293
  bool addImplRequiresIDs_() const override;
290
294
 
291
295
  void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
292
- void addImplEx_(
296
+ void addImpl_ex_(
293
297
  idx_t n,
294
298
  const void* x,
295
299
  NumericType numeric_type,
@@ -303,7 +307,7 @@ struct GpuIndexCagra : public GpuIndex {
303
307
  float* distances,
304
308
  idx_t* labels,
305
309
  const SearchParameters* search_params) const override;
306
- void searchImplEx_(
310
+ void searchImpl_ex_(
307
311
  idx_t n,
308
312
  const void* x,
309
313
  NumericType numeric_type,
@@ -5,6 +5,8 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
+ #pragma once
9
+
8
10
  #include <faiss/impl/FaissAssert.h>
9
11
 
10
12
  namespace faiss {
@@ -1,3 +1,10 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
1
8
  #include <faiss/gpu/GpuIcmEncoder.h>
2
9
  #include <faiss/gpu/StandardGpuResources.h>
3
10
  #include <faiss/gpu/test/TestUtils.h>
@@ -493,7 +493,7 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
493
493
  // Construct a positive test set
494
494
  auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
495
495
 
496
- // Put all vecs on positive size
496
+ // Put all vecs on positive side
497
497
  for (auto& f : queryVecs) {
498
498
  f = std::abs(f);
499
499
  }
@@ -404,7 +404,7 @@ void AdditiveQuantizer::compute_LUT(
404
404
  namespace {
405
405
 
406
406
  /* compute inner products of one query with all centroids, given a look-up
407
- * table of all inner producst with codebook entries */
407
+ * table of all inner products with codebook entries */
408
408
  void compute_inner_prod_with_LUT(
409
409
  const AdditiveQuantizer& aq,
410
410
  const float* LUT,
@@ -212,7 +212,7 @@ struct AdditiveQuantizer : Quantizer {
212
212
  idx_t* labels,
213
213
  const float* centroid_norms) const;
214
214
 
215
- virtual ~AdditiveQuantizer();
215
+ virtual ~AdditiveQuantizer() override;
216
216
  };
217
217
 
218
218
  } // namespace faiss
@@ -36,7 +36,7 @@ RangeSearchResult::RangeSearchResult(size_t nq, bool alloc_lims) : nq(nq) {
36
36
  /// for each query
37
37
  void RangeSearchResult::do_allocation() {
38
38
  // works only if all the partial results are aggregated
39
- // simulatenously
39
+ // simultaneously
40
40
  FAISS_THROW_IF_NOT(labels == nullptr && distances == nullptr);
41
41
  size_t ofs = 0;
42
42
  for (int i = 0; i < nq; i++) {
@@ -86,7 +86,7 @@ void BufferList::append_buffer() {
86
86
  wp = 0;
87
87
  }
88
88
 
89
- /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
89
+ /// copy elements ofs:ofs+n-1 seen as linear data in the buffers to
90
90
  /// tables dest_ids, dest_dis
91
91
  void BufferList::copy_range(
92
92
  size_t ofs,
@@ -80,7 +80,7 @@ struct BufferList {
80
80
  /// add one result, possibly appending a new buffer if needed
81
81
  void add(idx_t id, float dis);
82
82
 
83
- /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
83
+ /// copy elements ofs:ofs+n-1 seen as linear data in the buffers to
84
84
  /// tables dest_ids, dest_dis
85
85
  void copy_range(size_t ofs, size_t n, idx_t* dest_ids, float* dest_dis);
86
86
  };
@@ -38,14 +38,14 @@ struct CodePacker {
38
38
  // code_size
39
39
  ) const = 0;
40
40
 
41
- // pack all code in a block
41
+ // pack all codes in a block
42
42
  virtual void pack_all(
43
43
  const uint8_t* flat_codes, // codes to write to the block, size
44
44
  // (nvec * code_size)
45
45
  uint8_t* block // block to write to (size block_size)
46
46
  ) const;
47
47
 
48
- // unpack all code in a block
48
+ // unpack all codes in a block
49
49
  virtual void unpack_all(
50
50
  const uint8_t* block, // block to read from (size block_size)
51
51
  uint8_t* flat_codes // where to write the resulting codes size (nvec
@@ -60,7 +60,7 @@ struct DistanceComputer {
60
60
  };
61
61
 
62
62
  /* Wrap the distance computer into one that negates the
63
- distances. This makes supporting INNER_PRODUCE search easier */
63
+ distances. This makes supporting INNER_PRODUCT search easier */
64
64
 
65
65
  struct NegativeDistanceComputer : DistanceComputer {
66
66
  /// owned by this
@@ -100,7 +100,7 @@ struct NegativeDistanceComputer : DistanceComputer {
100
100
  return -basedis->symmetric_dis(i, j);
101
101
  }
102
102
 
103
- virtual ~NegativeDistanceComputer() {
103
+ virtual ~NegativeDistanceComputer() override {
104
104
  delete basedis;
105
105
  }
106
106
  };
@@ -125,7 +125,7 @@ struct FlatCodesDistanceComputer : DistanceComputer {
125
125
  /// compute distance of current query to an encoded vector
126
126
  virtual float distance_to_code(const uint8_t* code) = 0;
127
127
 
128
- virtual ~FlatCodesDistanceComputer() {}
128
+ virtual ~FlatCodesDistanceComputer() override {}
129
129
  };
130
130
 
131
131
  } // namespace faiss
@@ -0,0 +1,53 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cstddef>
11
+
12
+ namespace faiss {
13
+
14
+ // Forward declarations
15
+ struct NormTableScaler;
16
+
17
+ namespace rabitq_utils {
18
+ struct QueryFactorsData;
19
+ }
20
+
21
+ /**
22
+ * Simple context object that holds processors for FastScan operations.
23
+ * */
24
+ struct FastScanDistancePostProcessing {
25
+ /// Norm scaling processor for Additive Quantizers (nullptr if not needed)
26
+ const NormTableScaler* norm_scaler = nullptr;
27
+
28
+ /// Query factors data pointer for RaBitQ (nullptr if not needed)
29
+ /// This pointer should point to the beginning of the relevant
30
+ /// QueryFactorsData subset for this context.
31
+ rabitq_utils::QueryFactorsData* query_factors = nullptr;
32
+
33
+ /// The nprobe value used when allocating query_factors storage.
34
+ /// This is needed because the allocation size (n * nprobe) may use a
35
+ /// different nprobe than index->nprobe if search params override it.
36
+ /// Set to 0 to use index->nprobe as fallback.
37
+ size_t nprobe = 0;
38
+
39
+ /// Default constructor - no processing
40
+ FastScanDistancePostProcessing() = default;
41
+
42
+ /// Check if norm scaling is enabled
43
+ bool has_norm_scaling() const {
44
+ return norm_scaler != nullptr;
45
+ }
46
+
47
+ /// Check if query factors processing is enabled
48
+ bool has_query_processing() const {
49
+ return query_factors != nullptr;
50
+ }
51
+ };
52
+
53
+ } // namespace faiss
@@ -60,7 +60,7 @@ HNSW::HNSW(int M) : rng(12345) {
60
60
 
61
61
  int HNSW::random_level() {
62
62
  double f = rng.rand_float();
63
- // could be a bit faster with bissection
63
+ // could be a bit faster with bisection
64
64
  for (int level = 0; level < assign_probas.size(); level++) {
65
65
  if (f < assign_probas[level]) {
66
66
  return level;
@@ -31,7 +31,7 @@ namespace faiss {
31
31
  * Yu. A. Malkov, D. A. Yashunin, arXiv 2017
32
32
  *
33
33
  * This implementation is heavily influenced by the NMSlib
34
- * implementation by Yury Malkov and Leonid Boystov
34
+ * implementation by Yury Malkov and Leonid Boytsov
35
35
  * (https://github.com/searchivarius/nmslib)
36
36
  *
37
37
  * The HNSW object stores only the neighbor link structure, see
@@ -61,7 +61,7 @@ struct HNSW {
61
61
 
62
62
  typedef std::pair<float, storage_idx_t> Node;
63
63
 
64
- /** Heap structure that allows fast
64
+ /** Heap structure that allows fast access and updates.
65
65
  */
66
66
  struct MinimaxHeap {
67
67
  int n;
@@ -87,7 +87,7 @@ struct HNSW {
87
87
  int count_below(float thresh);
88
88
  };
89
89
 
90
- /// to sort pairs of (id, distance) from nearest to fathest or the reverse
90
+ /// to sort pairs of (id, distance) from nearest to farthest or the reverse
91
91
  struct NodeDistCloser {
92
92
  float d;
93
93
  int id;
@@ -160,7 +160,7 @@ struct HNSW {
160
160
  /// nb of neighbors for this level
161
161
  int nb_neighbors(int layer_no) const;
162
162
 
163
- /// cumumlative nb up to (and excluding) this level
163
+ /// cumulative nb up to (and excluding) this level
164
164
  int cum_nb_neighbors(int layer_no) const;
165
165
 
166
166
  /// range of entries in the neighbors table of vertex no at layer_no
@@ -31,7 +31,7 @@ void IDSelectorRange::find_sorted_ids_bounds(
31
31
  *jmin_out = *jmax_out = 0;
32
32
  return;
33
33
  }
34
- // bissection to find imin
34
+ // bisection to find imin
35
35
  if (ids[0] >= imin) {
36
36
  *jmin_out = 0;
37
37
  } else {
@@ -46,7 +46,7 @@ void IDSelectorRange::find_sorted_ids_bounds(
46
46
  }
47
47
  *jmin_out = j1;
48
48
  }
49
- // bissection to find imax
49
+ // bisection to find imax
50
50
  if (*jmin_out == list_size || ids[*jmin_out] >= imax) {
51
51
  *jmax_out = *jmin_out;
52
52
  } else {