faiss 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +39 -29
  5. data/vendor/faiss/faiss/Clustering.cpp +4 -2
  6. data/vendor/faiss/faiss/IVFlib.cpp +14 -7
  7. data/vendor/faiss/faiss/Index.h +72 -3
  8. data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
  9. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
  11. data/vendor/faiss/faiss/IndexBinary.h +46 -3
  12. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
  13. data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
  16. data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
  17. data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
  19. data/vendor/faiss/faiss/IndexHNSW.h +10 -1
  20. data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
  21. data/vendor/faiss/faiss/IndexIDMap.h +20 -0
  22. data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
  23. data/vendor/faiss/faiss/IndexIVF.h +16 -1
  24. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
  25. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
  26. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
  27. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
  28. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
  29. data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
  30. data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
  31. data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
  32. data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
  33. data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
  34. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
  36. data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
  37. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  38. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
  39. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  42. data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
  43. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
  44. data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
  45. data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
  46. data/vendor/faiss/faiss/IndexPQ.h +1 -0
  47. data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
  48. data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
  49. data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
  50. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
  51. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
  52. data/vendor/faiss/faiss/IndexShards.cpp +7 -6
  53. data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
  54. data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
  55. data/vendor/faiss/faiss/MetricType.h +5 -3
  56. data/vendor/faiss/faiss/clone_index.cpp +2 -4
  57. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
  58. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
  59. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
  60. data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
  61. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
  62. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
  63. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
  64. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
  66. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
  67. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
  68. data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
  69. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
  70. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
  71. data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
  72. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
  73. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
  74. data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
  75. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
  76. data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
  77. data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
  78. data/vendor/faiss/faiss/impl/io.cpp +9 -5
  79. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
  80. data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
  81. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
  82. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
  83. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
  84. data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
  85. data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
  86. data/vendor/faiss/faiss/index_factory.cpp +49 -33
  87. data/vendor/faiss/faiss/index_factory.h +8 -2
  88. data/vendor/faiss/faiss/index_io.h +0 -3
  89. data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
  90. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
  91. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
  92. data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
  93. data/vendor/faiss/faiss/utils/Heap.h +23 -12
  94. data/vendor/faiss/faiss/utils/distances.cpp +42 -21
  95. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  96. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
  97. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
  98. data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
  99. data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
  100. data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
  101. data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
  102. data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
  103. data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
  104. data/vendor/faiss/faiss/utils/random.cpp +14 -7
  105. data/vendor/faiss/faiss/utils/utils.cpp +0 -3
  106. metadata +5 -2
@@ -41,21 +41,25 @@ void GpuParameterSpace::initialize(const Index* index) {
41
41
  return;
42
42
  }
43
43
  if (DC(IndexReplicas)) {
44
- if (ix->count() == 0)
44
+ if (ix->count() == 0) {
45
45
  return;
46
+ }
46
47
  index = ix->at(0);
47
48
  }
48
49
  if (DC(IndexShards)) {
49
- if (ix->count() == 0)
50
+ if (ix->count() == 0) {
50
51
  return;
52
+ }
51
53
  index = ix->at(0);
52
54
  }
53
55
  if (DC(GpuIndexIVF)) {
54
56
  ParameterRange& pr = add_range("nprobe");
55
57
  for (int i = 0; i < 12; i++) {
56
58
  size_t nprobe = 1 << i;
57
- if (nprobe >= ix->getNumLists() || nprobe > getMaxKSelection())
59
+ if (nprobe >= ix->getNumLists() ||
60
+ nprobe > getMaxKSelection(false)) {
58
61
  break;
62
+ }
59
63
  pr.values.push_back(nprobe);
60
64
  }
61
65
 
@@ -79,8 +83,9 @@ void GpuParameterSpace::set_index_parameter(
79
83
  const std::string& name,
80
84
  double val) const {
81
85
  if (DC(IndexReplicas)) {
82
- for (int i = 0; i < ix->count(); i++)
86
+ for (int i = 0; i < ix->count(); i++) {
83
87
  set_index_parameter(ix->at(i), name, val);
88
+ }
84
89
  return;
85
90
  }
86
91
  if (name == "nprobe") {
@@ -15,6 +15,7 @@
15
15
  #include <faiss/IndexBinaryFlat.h>
16
16
  #include <faiss/IndexFlat.h>
17
17
  #if defined USE_NVIDIA_CUVS
18
+ #include <faiss/IndexBinaryHNSW.h>
18
19
  #include <faiss/IndexHNSW.h>
19
20
  #endif
20
21
  #include <faiss/IndexIVF.h>
@@ -28,14 +29,13 @@
28
29
  #include <faiss/gpu/GpuIndex.h>
29
30
  #include <faiss/gpu/GpuIndexBinaryFlat.h>
30
31
  #if defined USE_NVIDIA_CUVS
32
+ #include <faiss/gpu/GpuIndexBinaryCagra.h>
31
33
  #include <faiss/gpu/GpuIndexCagra.h>
32
34
  #endif
33
35
  #include <faiss/gpu/GpuIndexFlat.h>
34
36
  #include <faiss/gpu/GpuIndexIVFFlat.h>
35
37
  #include <faiss/gpu/GpuIndexIVFPQ.h>
36
38
  #include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
37
- #include <faiss/gpu/utils/DeviceUtils.h>
38
- #include <faiss/impl/FaissAssert.h>
39
39
  #include <faiss/index_io.h>
40
40
 
41
41
  namespace faiss {
@@ -95,6 +95,9 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
95
95
  #if defined USE_NVIDIA_CUVS
96
96
  else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
97
97
  IndexHNSWCagra* res = new IndexHNSWCagra();
98
+ if (icg->get_numeric_type() != faiss::NumericType::Float32) {
99
+ res->base_level_only = true;
100
+ }
98
101
  icg->copyTo(res);
99
102
  return res;
100
103
  }
@@ -236,7 +239,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
236
239
  config.device = device;
237
240
  GpuIndexCagra* res =
238
241
  new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
239
- res->copyFrom(icg);
242
+ res->copyFromEx(icg, icg->get_numeric_type());
240
243
  return res;
241
244
  }
242
245
  #endif
@@ -290,14 +293,16 @@ void ToGpuClonerMultiple::copy_ivf_shard(
290
293
  idx_t i0 = i * index_ivf->ntotal / n;
291
294
  idx_t i1 = (i + 1) * index_ivf->ntotal / n;
292
295
 
293
- if (verbose)
296
+ if (verbose) {
294
297
  printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
298
+ }
295
299
  index_ivf->copy_subset_to(
296
300
  *idx2, InvertedLists::SUBSET_TYPE_ID_RANGE, i0, i1);
297
301
  FAISS_ASSERT(idx2->ntotal == i1 - i0);
298
302
  } else if (shard_type == 1) {
299
- if (verbose)
303
+ if (verbose) {
300
304
  printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
305
+ }
301
306
  index_ivf->copy_subset_to(
302
307
  *idx2, InvertedLists::SUBSET_TYPE_ID_MOD, n, i);
303
308
  } else if (shard_type == 4) {
@@ -527,7 +532,15 @@ faiss::IndexBinary* index_binary_gpu_to_cpu(
527
532
  IndexBinaryFlat* ret = new IndexBinaryFlat();
528
533
  ii->copyTo(ret);
529
534
  return ret;
530
- } else {
535
+ }
536
+ #if defined USE_NVIDIA_CUVS
537
+ else if (auto ii = dynamic_cast<const GpuIndexBinaryCagra*>(gpu_index)) {
538
+ IndexBinaryHNSWCagra* ret = new IndexBinaryHNSWCagra();
539
+ ii->copyTo(ret);
540
+ return ret;
541
+ }
542
+ #endif
543
+ else {
531
544
  FAISS_THROW_MSG("cannot clone this type of index");
532
545
  }
533
546
  }
@@ -540,11 +553,20 @@ faiss::IndexBinary* index_binary_cpu_to_gpu(
540
553
  if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
541
554
  GpuIndexBinaryFlatConfig config;
542
555
  config.device = device;
543
- if (options) {
544
- config.use_cuvs = options->use_cuvs;
545
- }
546
556
  return new GpuIndexBinaryFlat(provider, ii, config);
547
- } else {
557
+ }
558
+ #if defined USE_NVIDIA_CUVS
559
+ else if (
560
+ auto ii = dynamic_cast<const faiss::IndexBinaryHNSWCagra*>(index)) {
561
+ GpuIndexCagraConfig config;
562
+ config.device = device;
563
+ GpuIndexBinaryCagra* res =
564
+ new GpuIndexBinaryCagra(provider, ii->d, config);
565
+ res->copyFrom(ii);
566
+ return res;
567
+ }
568
+ #endif
569
+ else {
548
570
  FAISS_THROW_MSG("cannot clone this type of index");
549
571
  }
550
572
  }
@@ -77,11 +77,17 @@ class GpuIndex : public faiss::Index {
77
77
  /// as needed
78
78
  /// Handles paged adds if the add set is too large; calls addInternal_
79
79
  void add(idx_t, const float* x) override;
80
+ void addEx(idx_t, const void* x, NumericType numeric_type) override;
80
81
 
81
82
  /// `x` and `ids` can be resident on the CPU or any GPU; copies are
82
83
  /// performed as needed
83
84
  /// Handles paged adds if the add set is too large; calls addInternal_
84
85
  void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
86
+ void add_with_idsEx(
87
+ idx_t n,
88
+ const void* x,
89
+ NumericType numeric_type,
90
+ const idx_t* xids) override;
85
91
 
86
92
  /// `x` and `labels` can be resident on the CPU or any GPU; copies are
87
93
  /// performed as needed
@@ -97,6 +103,14 @@ class GpuIndex : public faiss::Index {
97
103
  float* distances,
98
104
  idx_t* labels,
99
105
  const SearchParameters* params = nullptr) const override;
106
+ void searchEx(
107
+ idx_t n,
108
+ const void* x,
109
+ NumericType numeric_type,
110
+ idx_t k,
111
+ float* distances,
112
+ idx_t* labels,
113
+ const SearchParameters* params = nullptr) const override;
100
114
 
101
115
  /// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
102
116
  /// any GPU; copies are performed as needed
@@ -125,9 +139,23 @@ class GpuIndex : public faiss::Index {
125
139
  protected:
126
140
  /// Copy what we need from the CPU equivalent
127
141
  void copyFrom(const faiss::Index* index);
142
+ void copyFromEx(const faiss::Index* index, NumericType numeric_type) {
143
+ if (numeric_type == NumericType::Float32) {
144
+ copyFrom(index);
145
+ } else {
146
+ FAISS_THROW_MSG("GpuIndex::copyFrom: unsupported numeric type");
147
+ }
148
+ }
128
149
 
129
150
  /// Copy what we have to the CPU equivalent
130
151
  void copyTo(faiss::Index* index) const;
152
+ void copyToEx(faiss::Index* index, NumericType numeric_type) {
153
+ if (numeric_type == NumericType::Float32) {
154
+ copyTo(index);
155
+ } else {
156
+ FAISS_THROW_MSG("GpuIndex::copyTo: unsupported numeric type");
157
+ }
158
+ }
131
159
 
132
160
  /// Does addImpl_ require IDs? If so, and no IDs are provided, we will
133
161
  /// generate them sequentially based on the order in which the IDs are added
@@ -137,6 +165,18 @@ class GpuIndex : public faiss::Index {
137
165
  /// All data is guaranteed to be resident on our device
138
166
  virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
139
167
 
168
+ virtual void addImplEx_(
169
+ idx_t n,
170
+ const void* x,
171
+ NumericType numeric_type,
172
+ const idx_t* ids) {
173
+ if (numeric_type == NumericType::Float32) {
174
+ addImpl_(n, static_cast<const float*>(x), ids);
175
+ } else {
176
+ FAISS_THROW_MSG("GpuIndex::addImpl_: unsupported numeric type");
177
+ }
178
+ };
179
+
140
180
  /// Overridden to actually perform the search
141
181
  /// All data is guaranteed to be resident on our device
142
182
  virtual void searchImpl_(
@@ -147,13 +187,44 @@ class GpuIndex : public faiss::Index {
147
187
  idx_t* labels,
148
188
  const SearchParameters* params) const = 0;
149
189
 
190
+ virtual void searchImplEx_(
191
+ idx_t n,
192
+ const void* x,
193
+ NumericType numeric_type,
194
+ int k,
195
+ float* distances,
196
+ idx_t* labels,
197
+ const SearchParameters* params) const {
198
+ if (numeric_type == NumericType::Float32) {
199
+ searchImpl_(
200
+ n,
201
+ static_cast<const float*>(x),
202
+ k,
203
+ distances,
204
+ labels,
205
+ params);
206
+ } else {
207
+ FAISS_THROW_MSG("GpuIndex::searchImpl_: unsupported numeric type");
208
+ }
209
+ }
210
+
150
211
  private:
151
212
  /// Handles paged adds if the add set is too large, passes to
152
213
  /// addImpl_ to actually perform the add for the current page
153
214
  void addPaged_(idx_t n, const float* x, const idx_t* ids);
215
+ void addPagedEx_(
216
+ idx_t n,
217
+ const void* x,
218
+ NumericType numeric_type,
219
+ const idx_t* ids);
154
220
 
155
221
  /// Calls addImpl_ for a single page of GPU-resident data
156
222
  void addPage_(idx_t n, const float* x, const idx_t* ids);
223
+ void addPageEx_(
224
+ idx_t n,
225
+ const void* x,
226
+ NumericType numeric_type,
227
+ const idx_t* ids);
157
228
 
158
229
  /// Calls searchImpl_ for a single page of GPU-resident data
159
230
  void searchNonPaged_(
@@ -164,6 +235,15 @@ class GpuIndex : public faiss::Index {
164
235
  idx_t* outIndicesData,
165
236
  const SearchParameters* params) const;
166
237
 
238
+ void searchNonPagedEx_(
239
+ idx_t n,
240
+ const void* x,
241
+ NumericType numeric_type,
242
+ int k,
243
+ float* outDistancesData,
244
+ idx_t* outIndicesData,
245
+ const SearchParameters* params) const;
246
+
167
247
  /// Calls searchImpl_ for a single page of GPU-resident data,
168
248
  /// handling paging of the data and copies from the CPU
169
249
  void searchFromCpuPaged_(
@@ -173,6 +253,14 @@ class GpuIndex : public faiss::Index {
173
253
  float* outDistancesData,
174
254
  idx_t* outIndicesData,
175
255
  const SearchParameters* params) const;
256
+ void searchFromCpuPagedEx_(
257
+ idx_t n,
258
+ const void* x,
259
+ NumericType numeric_type,
260
+ int k,
261
+ float* outDistancesData,
262
+ idx_t* outIndicesData,
263
+ const SearchParameters* params) const;
176
264
 
177
265
  protected:
178
266
  /// Manages streams, cuBLAS handles and scratch memory for devices
@@ -0,0 +1,125 @@
1
+ // @lint-ignore-every LICENSELINT
2
+ /**
3
+ * Copyright (c) Facebook, Inc. and its affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+ /*
9
+ * Copyright (c) 2025, NVIDIA CORPORATION.
10
+ *
11
+ * Licensed under the Apache License, Version 2.0 (the "License");
12
+ * you may not use this file except in compliance with the License.
13
+ * You may obtain a copy of the License at
14
+ *
15
+ * http://www.apache.org/licenses/LICENSE-2.0
16
+ *
17
+ * Unless required by applicable law or agreed to in writing, software
18
+ * distributed under the License is distributed on an "AS IS" BASIS,
19
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
+ * See the License for the specific language governing permissions and
21
+ * limitations under the License.
22
+ */
23
+
24
+ #pragma once
25
+
26
+ #include <faiss/IndexBinary.h>
27
+ #include <faiss/IndexBinaryHNSW.h>
28
+ #include <faiss/gpu/GpuIndexCagra.h>
29
+
30
+ #include <memory>
31
+
32
+ namespace faiss {
33
+ namespace gpu {
34
+
35
+ class BinaryCuvsCagra;
36
+
37
+ struct GpuIndexBinaryCagra : public IndexBinary {
38
+ public:
39
+ GpuIndexBinaryCagra(
40
+ GpuResourcesProvider* provider,
41
+ int dims,
42
+ GpuIndexCagraConfig config = GpuIndexCagraConfig());
43
+
44
+ ~GpuIndexBinaryCagra() override;
45
+
46
+ int getDevice() const;
47
+
48
+ /// Returns a reference to our GpuResources object that manages memory,
49
+ /// stream and handle resources on the GPU
50
+ std::shared_ptr<GpuResources> getResources();
51
+
52
+ /// Trains CAGRA based on the given vector data and add them along with ids.
53
+ /// NB: The use of the add function here is to build the CAGRA graph on
54
+ /// the base dataset. Use this function when you want to add vectors with
55
+ /// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107
56
+ void add(idx_t n, const uint8_t* x) override;
57
+
58
+ /// Trains CAGRA based on the given vector data.
59
+ /// NB: The use of the train function here is to build the CAGRA graph on
60
+ /// the base dataset and is currently the only function to add the full set
61
+ /// of vectors (without IDs) to the index. There is no external quantizer to
62
+ /// be trained here.
63
+ void train(idx_t n, const uint8_t* x) override;
64
+
65
+ /// Initialize ourselves from the given CPU index; will overwrite
66
+ /// all data in ourselves
67
+ void copyFrom(const faiss::IndexBinaryHNSWCagra* index);
68
+
69
+ /// Copy ourselves to the given CPU index; will overwrite all data
70
+ /// in the index instance
71
+ void copyTo(faiss::IndexBinaryHNSWCagra* index) const;
72
+
73
+ void reset() override;
74
+
75
+ std::vector<idx_t> get_knngraph() const;
76
+
77
+ void search(
78
+ idx_t n,
79
+ const uint8_t* x,
80
+ // faiss::IndexBinary has idx_t for k
81
+ idx_t k,
82
+ int* distances,
83
+ faiss::idx_t* labels,
84
+ const faiss::SearchParameters* params = nullptr) const override;
85
+
86
+ protected:
87
+ /// Called from search when the input data is on the CPU;
88
+ /// potentially allows for pinned memory usage
89
+ void searchFromCpuPaged_(
90
+ idx_t n,
91
+ const uint8_t* x,
92
+ int k,
93
+ int* outDistancesData,
94
+ idx_t* outIndicesData,
95
+ const SearchParameters* search_params) const;
96
+
97
+ void searchNonPaged_(
98
+ idx_t n,
99
+ const uint8_t* x,
100
+ int k,
101
+ int* outDistancesData,
102
+ idx_t* outIndicesData,
103
+ const SearchParameters* search_params) const;
104
+
105
+ void searchImpl_(
106
+ idx_t n,
107
+ const uint8_t* x,
108
+ int k,
109
+ int* distances,
110
+ idx_t* labels,
111
+ const SearchParameters* search_params) const;
112
+
113
+ protected:
114
+ /// Manages streans, cuBLAS handles and scratch memory for devices
115
+ std::shared_ptr<GpuResources> resources_;
116
+
117
+ /// Configuration options
118
+ const GpuIndexCagraConfig cagraConfig_;
119
+
120
+ /// Instance that we own; contains the cuVS index
121
+ std::shared_ptr<BinaryCuvsCagra> index_;
122
+ };
123
+
124
+ } // namespace gpu
125
+ } // namespace faiss
@@ -6,7 +6,7 @@
6
6
  * LICENSE file in the root directory of this source tree.
7
7
  */
8
8
  /*
9
- * Copyright (c) 2024, NVIDIA CORPORATION.
9
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
10
10
  *
11
11
  * Licensed under the Apache License, Version 2.0 (the "License");
12
12
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,9 @@
27
27
  #include <faiss/gpu/GpuIndex.h>
28
28
  #include <faiss/gpu/GpuIndexIVFPQ.h>
29
29
 
30
+ #include <variant>
31
+ #include "faiss/Index.h"
32
+
30
33
  namespace faiss {
31
34
  struct IndexHNSWCagra;
32
35
  }
@@ -34,13 +37,16 @@ struct IndexHNSWCagra;
34
37
  namespace faiss {
35
38
  namespace gpu {
36
39
 
40
+ template <typename data_t>
37
41
  class CuvsCagra;
38
42
 
39
43
  enum class graph_build_algo {
40
44
  /// Use IVF-PQ to build all-neighbors knn graph
41
45
  IVF_PQ,
42
46
  /// Use NN-Descent to build all-neighbors knn graph
43
- NN_DESCENT
47
+ NN_DESCENT,
48
+ /// Use iterative search to build knn graph
49
+ ITERATIVE_SEARCH
44
50
  };
45
51
 
46
52
  /// A type for specifying how PQ codebooks are created.
@@ -116,7 +122,6 @@ struct IVFPQBuildCagraConfig {
116
122
  /// the algorithm always allocates the minimum amount of memory required to
117
123
  /// store the given number of records. Set this flag to `true` if you prefer
118
124
  /// to use as little GPU memory for the database as possible.
119
-
120
125
  bool conservative_memory_allocation = false;
121
126
  };
122
127
 
@@ -177,6 +182,9 @@ struct GpuIndexCagraConfig : public GpuIndexConfig {
177
182
  std::shared_ptr<IVFPQSearchCagraConfig> ivf_pq_search_params{nullptr};
178
183
  float refine_rate = 2.0f;
179
184
  bool store_dataset = true;
185
+
186
+ /// Whether to use MST optimization to guarantee graph connectivity.
187
+ bool guarantee_connectivity = false;
180
188
  };
181
189
 
182
190
  enum class search_algo {
@@ -250,6 +258,7 @@ struct GpuIndexCagra : public GpuIndex {
250
258
  /// the base dataset. Use this function when you want to add vectors with
251
259
  /// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107
252
260
  void add(idx_t n, const float* x) override;
261
+ void addEx(idx_t n, const void* x, NumericType numeric_type) override;
253
262
 
254
263
  /// Trains CAGRA based on the given vector data.
255
264
  /// NB: The use of the train function here is to build the CAGRA graph on
@@ -257,10 +266,14 @@ struct GpuIndexCagra : public GpuIndex {
257
266
  /// of vectors (without IDs) to the index. There is no external quantizer to
258
267
  /// be trained here.
259
268
  void train(idx_t n, const float* x) override;
269
+ void trainEx(idx_t n, const void* x, NumericType numeric_type) override;
260
270
 
261
271
  /// Initialize ourselves from the given CPU index; will overwrite
262
272
  /// all data in ourselves
263
273
  void copyFrom(const faiss::IndexHNSWCagra* index);
274
+ void copyFromEx(
275
+ const faiss::IndexHNSWCagra* index,
276
+ NumericType numeric_type);
264
277
 
265
278
  /// Copy ourselves to the given CPU index; will overwrite all data
266
279
  /// in the index instance
@@ -270,10 +283,17 @@ struct GpuIndexCagra : public GpuIndex {
270
283
 
271
284
  std::vector<idx_t> get_knngraph() const;
272
285
 
286
+ faiss::NumericType get_numeric_type() const;
287
+
273
288
  protected:
274
289
  bool addImplRequiresIDs_() const override;
275
290
 
276
291
  void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
292
+ void addImplEx_(
293
+ idx_t n,
294
+ const void* x,
295
+ NumericType numeric_type,
296
+ const idx_t* ids) override;
277
297
 
278
298
  /// Called from GpuIndex for search
279
299
  void searchImpl_(
@@ -283,12 +303,27 @@ struct GpuIndexCagra : public GpuIndex {
283
303
  float* distances,
284
304
  idx_t* labels,
285
305
  const SearchParameters* search_params) const override;
306
+ void searchImplEx_(
307
+ idx_t n,
308
+ const void* x,
309
+ NumericType numeric_type,
310
+ int k,
311
+ float* distances,
312
+ idx_t* labels,
313
+ const SearchParameters* search_params) const override;
286
314
 
287
315
  /// Our configuration options
288
316
  const GpuIndexCagraConfig cagraConfig_;
289
317
 
318
+ faiss::NumericType numeric_type_;
319
+
290
320
  /// Instance that we own; contains the inverted lists
291
- std::shared_ptr<CuvsCagra> index_;
321
+ std::variant<
322
+ std::monostate,
323
+ std::shared_ptr<CuvsCagra<float>>,
324
+ std::shared_ptr<CuvsCagra<half>>,
325
+ std::shared_ptr<CuvsCagra<int8_t>>>
326
+ index_;
292
327
  };
293
328
 
294
329
  } // namespace gpu
@@ -17,13 +17,13 @@ namespace gpu {
17
17
  /// Returns the maximum k-selection value supported based on the CUDA SDK that
18
18
  /// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
19
19
  /// non-CUDA files
20
- int getMaxKSelection();
20
+ int getMaxKSelection(bool use_cuvs = false);
21
21
 
22
22
  // Validate the k parameter for search
23
- void validateKSelect(int k);
23
+ void validateKSelect(int k, bool use_cuvs = false);
24
24
 
25
25
  // Validate the nprobe parameter for search
26
- void validateNProbe(size_t nprobe);
26
+ void validateNProbe(size_t nprobe, bool use_cuvs = false);
27
27
 
28
28
  } // namespace gpu
29
29
  } // namespace faiss
@@ -79,7 +79,7 @@ void testGpuIndexBinaryFlat(int kOverride = -1) {
79
79
 
80
80
  int k = kOverride > 0
81
81
  ? kOverride
82
- : faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection());
82
+ : faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection(false));
83
83
  int numVecs = faiss::gpu::randVal(k + 1, 20000);
84
84
  int numQuery = faiss::gpu::randVal(1, 1000);
85
85
 
@@ -56,7 +56,8 @@ void testFlat(const TestFlatOptions& opt) {
56
56
  int k = opt.useFloat16
57
57
  ? std::min(faiss::gpu::randVal(1, 50), numVecs)
58
58
  : std::min(
59
- faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection()),
59
+ faiss::gpu::randVal(
60
+ 1, faiss::gpu::getMaxKSelection(opt.use_cuvs)),
60
61
  numVecs);
61
62
  if (opt.kOverride > 0) {
62
63
  k = opt.kOverride;
@@ -164,7 +165,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
164
165
 
165
166
  // At least one test for the k > 1024 select
166
167
  TEST(TestGpuIndexFlat, L2_k_2048) {
167
- if (faiss::gpu::getMaxKSelection() >= 2048) {
168
+ if (faiss::gpu::getMaxKSelection(false) >= 2048) {
168
169
  TestFlatOptions opt;
169
170
  opt.metric = faiss::MetricType::METRIC_L2;
170
171
  opt.useFloat16 = false;
@@ -0,0 +1,41 @@
1
+ // @lint-ignore-every LICENSELINT
2
+ /**
3
+ * Copyright (c) Facebook, Inc. and its affiliates.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+ /*
9
+ * Copyright (c) 2025, NVIDIA CORPORATION.
10
+ *
11
+ * Licensed under the Apache License, Version 2.0 (the "License");
12
+ * you may not use this file except in compliance with the License.
13
+ * You may obtain a copy of the License at
14
+ *
15
+ * http://www.apache.org/licenses/LICENSE-2.0
16
+ *
17
+ * Unless required by applicable law or agreed to in writing, software
18
+ * distributed under the License is distributed on an "AS IS" BASIS,
19
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
+ * See the License for the specific language governing permissions and
21
+ * limitations under the License.
22
+ */
23
+
24
+ #include <cuvs/core/bitset.hpp>
25
+ #include <faiss/gpu/GpuResources.h>
26
+ #include <faiss/impl/IDSelector.h>
27
+
28
+ #pragma GCC visibility push(default)
29
+ namespace faiss::gpu {
30
+ /// Convert a Faiss IDSelector to a cuvs::core::bitset_view
31
+ /// @param res The GpuResources object to use for the conversion
32
+ /// @param selector The Faiss IDSelector to convert
33
+ /// @param bitset The cuvs::core::bitset_view to store the result
34
+ /// @param num_threads Number of threads to use for the conversion. If 0, the
35
+ /// number of threads is set to the number of available threads.
36
+ void convert_to_bitset(
37
+ faiss::gpu::GpuResources* res,
38
+ const faiss::IDSelector& selector,
39
+ cuvs::core::bitset_view<uint32_t, uint32_t> bitset,
40
+ int num_threads = 0);
41
+ } // namespace faiss::gpu
@@ -168,23 +168,26 @@ void RangeSearchPartialResult::merge(
168
168
  std::vector<RangeSearchPartialResult*>& partial_results,
169
169
  bool do_delete) {
170
170
  int npres = partial_results.size();
171
- if (npres == 0)
171
+ if (npres == 0) {
172
172
  return;
173
+ }
173
174
  RangeSearchResult* result = partial_results[0]->res;
174
175
  size_t nx = result->nq;
175
176
 
176
177
  // count
177
178
  for (const RangeSearchPartialResult* pres : partial_results) {
178
- if (!pres)
179
+ if (!pres) {
179
180
  continue;
181
+ }
180
182
  for (const RangeQueryResult& qres : pres->queries) {
181
183
  result->lims[qres.qno] += qres.nres;
182
184
  }
183
185
  }
184
186
  result->do_allocation();
185
187
  for (int j = 0; j < npres; j++) {
186
- if (!partial_results[j])
188
+ if (!partial_results[j]) {
187
189
  continue;
190
+ }
188
191
  partial_results[j]->copy_result(true);
189
192
  if (do_delete) {
190
193
  delete partial_results[j];