faiss 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +39 -29
- data/vendor/faiss/faiss/Clustering.cpp +4 -2
- data/vendor/faiss/faiss/IVFlib.cpp +14 -7
- data/vendor/faiss/faiss/Index.h +72 -3
- data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
- data/vendor/faiss/faiss/IndexBinary.h +46 -3
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
- data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
- data/vendor/faiss/faiss/IndexHNSW.h +10 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
- data/vendor/faiss/faiss/IndexIDMap.h +20 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
- data/vendor/faiss/faiss/IndexIVF.h +16 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
- data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
- data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
- data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
- data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
- data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -0
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
- data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
- data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
- data/vendor/faiss/faiss/IndexShards.cpp +7 -6
- data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
- data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
- data/vendor/faiss/faiss/MetricType.h +5 -3
- data/vendor/faiss/faiss/clone_index.cpp +2 -4
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
- data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
- data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
- data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
- data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
- data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
- data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
- data/vendor/faiss/faiss/impl/io.cpp +9 -5
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
- data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
- data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
- data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +49 -33
- data/vendor/faiss/faiss/index_factory.h +8 -2
- data/vendor/faiss/faiss/index_io.h +0 -3
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
- data/vendor/faiss/faiss/utils/Heap.h +23 -12
- data/vendor/faiss/faiss/utils/distances.cpp +42 -21
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
- data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
- data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
- data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
- data/vendor/faiss/faiss/utils/random.cpp +14 -7
- data/vendor/faiss/faiss/utils/utils.cpp +0 -3
- metadata +5 -2
@@ -41,21 +41,25 @@ void GpuParameterSpace::initialize(const Index* index) {
|
|
41
41
|
return;
|
42
42
|
}
|
43
43
|
if (DC(IndexReplicas)) {
|
44
|
-
if (ix->count() == 0)
|
44
|
+
if (ix->count() == 0) {
|
45
45
|
return;
|
46
|
+
}
|
46
47
|
index = ix->at(0);
|
47
48
|
}
|
48
49
|
if (DC(IndexShards)) {
|
49
|
-
if (ix->count() == 0)
|
50
|
+
if (ix->count() == 0) {
|
50
51
|
return;
|
52
|
+
}
|
51
53
|
index = ix->at(0);
|
52
54
|
}
|
53
55
|
if (DC(GpuIndexIVF)) {
|
54
56
|
ParameterRange& pr = add_range("nprobe");
|
55
57
|
for (int i = 0; i < 12; i++) {
|
56
58
|
size_t nprobe = 1 << i;
|
57
|
-
if (nprobe >= ix->getNumLists() ||
|
59
|
+
if (nprobe >= ix->getNumLists() ||
|
60
|
+
nprobe > getMaxKSelection(false)) {
|
58
61
|
break;
|
62
|
+
}
|
59
63
|
pr.values.push_back(nprobe);
|
60
64
|
}
|
61
65
|
|
@@ -79,8 +83,9 @@ void GpuParameterSpace::set_index_parameter(
|
|
79
83
|
const std::string& name,
|
80
84
|
double val) const {
|
81
85
|
if (DC(IndexReplicas)) {
|
82
|
-
for (int i = 0; i < ix->count(); i++)
|
86
|
+
for (int i = 0; i < ix->count(); i++) {
|
83
87
|
set_index_parameter(ix->at(i), name, val);
|
88
|
+
}
|
84
89
|
return;
|
85
90
|
}
|
86
91
|
if (name == "nprobe") {
|
@@ -15,6 +15,7 @@
|
|
15
15
|
#include <faiss/IndexBinaryFlat.h>
|
16
16
|
#include <faiss/IndexFlat.h>
|
17
17
|
#if defined USE_NVIDIA_CUVS
|
18
|
+
#include <faiss/IndexBinaryHNSW.h>
|
18
19
|
#include <faiss/IndexHNSW.h>
|
19
20
|
#endif
|
20
21
|
#include <faiss/IndexIVF.h>
|
@@ -28,14 +29,13 @@
|
|
28
29
|
#include <faiss/gpu/GpuIndex.h>
|
29
30
|
#include <faiss/gpu/GpuIndexBinaryFlat.h>
|
30
31
|
#if defined USE_NVIDIA_CUVS
|
32
|
+
#include <faiss/gpu/GpuIndexBinaryCagra.h>
|
31
33
|
#include <faiss/gpu/GpuIndexCagra.h>
|
32
34
|
#endif
|
33
35
|
#include <faiss/gpu/GpuIndexFlat.h>
|
34
36
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
35
37
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
36
38
|
#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
|
37
|
-
#include <faiss/gpu/utils/DeviceUtils.h>
|
38
|
-
#include <faiss/impl/FaissAssert.h>
|
39
39
|
#include <faiss/index_io.h>
|
40
40
|
|
41
41
|
namespace faiss {
|
@@ -95,6 +95,9 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
|
|
95
95
|
#if defined USE_NVIDIA_CUVS
|
96
96
|
else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
|
97
97
|
IndexHNSWCagra* res = new IndexHNSWCagra();
|
98
|
+
if (icg->get_numeric_type() != faiss::NumericType::Float32) {
|
99
|
+
res->base_level_only = true;
|
100
|
+
}
|
98
101
|
icg->copyTo(res);
|
99
102
|
return res;
|
100
103
|
}
|
@@ -236,7 +239,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
236
239
|
config.device = device;
|
237
240
|
GpuIndexCagra* res =
|
238
241
|
new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
|
239
|
-
res->
|
242
|
+
res->copyFromEx(icg, icg->get_numeric_type());
|
240
243
|
return res;
|
241
244
|
}
|
242
245
|
#endif
|
@@ -290,14 +293,16 @@ void ToGpuClonerMultiple::copy_ivf_shard(
|
|
290
293
|
idx_t i0 = i * index_ivf->ntotal / n;
|
291
294
|
idx_t i1 = (i + 1) * index_ivf->ntotal / n;
|
292
295
|
|
293
|
-
if (verbose)
|
296
|
+
if (verbose) {
|
294
297
|
printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
|
298
|
+
}
|
295
299
|
index_ivf->copy_subset_to(
|
296
300
|
*idx2, InvertedLists::SUBSET_TYPE_ID_RANGE, i0, i1);
|
297
301
|
FAISS_ASSERT(idx2->ntotal == i1 - i0);
|
298
302
|
} else if (shard_type == 1) {
|
299
|
-
if (verbose)
|
303
|
+
if (verbose) {
|
300
304
|
printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
|
305
|
+
}
|
301
306
|
index_ivf->copy_subset_to(
|
302
307
|
*idx2, InvertedLists::SUBSET_TYPE_ID_MOD, n, i);
|
303
308
|
} else if (shard_type == 4) {
|
@@ -527,7 +532,15 @@ faiss::IndexBinary* index_binary_gpu_to_cpu(
|
|
527
532
|
IndexBinaryFlat* ret = new IndexBinaryFlat();
|
528
533
|
ii->copyTo(ret);
|
529
534
|
return ret;
|
530
|
-
}
|
535
|
+
}
|
536
|
+
#if defined USE_NVIDIA_CUVS
|
537
|
+
else if (auto ii = dynamic_cast<const GpuIndexBinaryCagra*>(gpu_index)) {
|
538
|
+
IndexBinaryHNSWCagra* ret = new IndexBinaryHNSWCagra();
|
539
|
+
ii->copyTo(ret);
|
540
|
+
return ret;
|
541
|
+
}
|
542
|
+
#endif
|
543
|
+
else {
|
531
544
|
FAISS_THROW_MSG("cannot clone this type of index");
|
532
545
|
}
|
533
546
|
}
|
@@ -540,11 +553,20 @@ faiss::IndexBinary* index_binary_cpu_to_gpu(
|
|
540
553
|
if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
|
541
554
|
GpuIndexBinaryFlatConfig config;
|
542
555
|
config.device = device;
|
543
|
-
if (options) {
|
544
|
-
config.use_cuvs = options->use_cuvs;
|
545
|
-
}
|
546
556
|
return new GpuIndexBinaryFlat(provider, ii, config);
|
547
|
-
}
|
557
|
+
}
|
558
|
+
#if defined USE_NVIDIA_CUVS
|
559
|
+
else if (
|
560
|
+
auto ii = dynamic_cast<const faiss::IndexBinaryHNSWCagra*>(index)) {
|
561
|
+
GpuIndexCagraConfig config;
|
562
|
+
config.device = device;
|
563
|
+
GpuIndexBinaryCagra* res =
|
564
|
+
new GpuIndexBinaryCagra(provider, ii->d, config);
|
565
|
+
res->copyFrom(ii);
|
566
|
+
return res;
|
567
|
+
}
|
568
|
+
#endif
|
569
|
+
else {
|
548
570
|
FAISS_THROW_MSG("cannot clone this type of index");
|
549
571
|
}
|
550
572
|
}
|
@@ -77,11 +77,17 @@ class GpuIndex : public faiss::Index {
|
|
77
77
|
/// as needed
|
78
78
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
79
79
|
void add(idx_t, const float* x) override;
|
80
|
+
void addEx(idx_t, const void* x, NumericType numeric_type) override;
|
80
81
|
|
81
82
|
/// `x` and `ids` can be resident on the CPU or any GPU; copies are
|
82
83
|
/// performed as needed
|
83
84
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
84
85
|
void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
|
86
|
+
void add_with_idsEx(
|
87
|
+
idx_t n,
|
88
|
+
const void* x,
|
89
|
+
NumericType numeric_type,
|
90
|
+
const idx_t* xids) override;
|
85
91
|
|
86
92
|
/// `x` and `labels` can be resident on the CPU or any GPU; copies are
|
87
93
|
/// performed as needed
|
@@ -97,6 +103,14 @@ class GpuIndex : public faiss::Index {
|
|
97
103
|
float* distances,
|
98
104
|
idx_t* labels,
|
99
105
|
const SearchParameters* params = nullptr) const override;
|
106
|
+
void searchEx(
|
107
|
+
idx_t n,
|
108
|
+
const void* x,
|
109
|
+
NumericType numeric_type,
|
110
|
+
idx_t k,
|
111
|
+
float* distances,
|
112
|
+
idx_t* labels,
|
113
|
+
const SearchParameters* params = nullptr) const override;
|
100
114
|
|
101
115
|
/// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
|
102
116
|
/// any GPU; copies are performed as needed
|
@@ -125,9 +139,23 @@ class GpuIndex : public faiss::Index {
|
|
125
139
|
protected:
|
126
140
|
/// Copy what we need from the CPU equivalent
|
127
141
|
void copyFrom(const faiss::Index* index);
|
142
|
+
void copyFromEx(const faiss::Index* index, NumericType numeric_type) {
|
143
|
+
if (numeric_type == NumericType::Float32) {
|
144
|
+
copyFrom(index);
|
145
|
+
} else {
|
146
|
+
FAISS_THROW_MSG("GpuIndex::copyFrom: unsupported numeric type");
|
147
|
+
}
|
148
|
+
}
|
128
149
|
|
129
150
|
/// Copy what we have to the CPU equivalent
|
130
151
|
void copyTo(faiss::Index* index) const;
|
152
|
+
void copyToEx(faiss::Index* index, NumericType numeric_type) {
|
153
|
+
if (numeric_type == NumericType::Float32) {
|
154
|
+
copyTo(index);
|
155
|
+
} else {
|
156
|
+
FAISS_THROW_MSG("GpuIndex::copyTo: unsupported numeric type");
|
157
|
+
}
|
158
|
+
}
|
131
159
|
|
132
160
|
/// Does addImpl_ require IDs? If so, and no IDs are provided, we will
|
133
161
|
/// generate them sequentially based on the order in which the IDs are added
|
@@ -137,6 +165,18 @@ class GpuIndex : public faiss::Index {
|
|
137
165
|
/// All data is guaranteed to be resident on our device
|
138
166
|
virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
|
139
167
|
|
168
|
+
virtual void addImplEx_(
|
169
|
+
idx_t n,
|
170
|
+
const void* x,
|
171
|
+
NumericType numeric_type,
|
172
|
+
const idx_t* ids) {
|
173
|
+
if (numeric_type == NumericType::Float32) {
|
174
|
+
addImpl_(n, static_cast<const float*>(x), ids);
|
175
|
+
} else {
|
176
|
+
FAISS_THROW_MSG("GpuIndex::addImpl_: unsupported numeric type");
|
177
|
+
}
|
178
|
+
};
|
179
|
+
|
140
180
|
/// Overridden to actually perform the search
|
141
181
|
/// All data is guaranteed to be resident on our device
|
142
182
|
virtual void searchImpl_(
|
@@ -147,13 +187,44 @@ class GpuIndex : public faiss::Index {
|
|
147
187
|
idx_t* labels,
|
148
188
|
const SearchParameters* params) const = 0;
|
149
189
|
|
190
|
+
virtual void searchImplEx_(
|
191
|
+
idx_t n,
|
192
|
+
const void* x,
|
193
|
+
NumericType numeric_type,
|
194
|
+
int k,
|
195
|
+
float* distances,
|
196
|
+
idx_t* labels,
|
197
|
+
const SearchParameters* params) const {
|
198
|
+
if (numeric_type == NumericType::Float32) {
|
199
|
+
searchImpl_(
|
200
|
+
n,
|
201
|
+
static_cast<const float*>(x),
|
202
|
+
k,
|
203
|
+
distances,
|
204
|
+
labels,
|
205
|
+
params);
|
206
|
+
} else {
|
207
|
+
FAISS_THROW_MSG("GpuIndex::searchImpl_: unsupported numeric type");
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
150
211
|
private:
|
151
212
|
/// Handles paged adds if the add set is too large, passes to
|
152
213
|
/// addImpl_ to actually perform the add for the current page
|
153
214
|
void addPaged_(idx_t n, const float* x, const idx_t* ids);
|
215
|
+
void addPagedEx_(
|
216
|
+
idx_t n,
|
217
|
+
const void* x,
|
218
|
+
NumericType numeric_type,
|
219
|
+
const idx_t* ids);
|
154
220
|
|
155
221
|
/// Calls addImpl_ for a single page of GPU-resident data
|
156
222
|
void addPage_(idx_t n, const float* x, const idx_t* ids);
|
223
|
+
void addPageEx_(
|
224
|
+
idx_t n,
|
225
|
+
const void* x,
|
226
|
+
NumericType numeric_type,
|
227
|
+
const idx_t* ids);
|
157
228
|
|
158
229
|
/// Calls searchImpl_ for a single page of GPU-resident data
|
159
230
|
void searchNonPaged_(
|
@@ -164,6 +235,15 @@ class GpuIndex : public faiss::Index {
|
|
164
235
|
idx_t* outIndicesData,
|
165
236
|
const SearchParameters* params) const;
|
166
237
|
|
238
|
+
void searchNonPagedEx_(
|
239
|
+
idx_t n,
|
240
|
+
const void* x,
|
241
|
+
NumericType numeric_type,
|
242
|
+
int k,
|
243
|
+
float* outDistancesData,
|
244
|
+
idx_t* outIndicesData,
|
245
|
+
const SearchParameters* params) const;
|
246
|
+
|
167
247
|
/// Calls searchImpl_ for a single page of GPU-resident data,
|
168
248
|
/// handling paging of the data and copies from the CPU
|
169
249
|
void searchFromCpuPaged_(
|
@@ -173,6 +253,14 @@ class GpuIndex : public faiss::Index {
|
|
173
253
|
float* outDistancesData,
|
174
254
|
idx_t* outIndicesData,
|
175
255
|
const SearchParameters* params) const;
|
256
|
+
void searchFromCpuPagedEx_(
|
257
|
+
idx_t n,
|
258
|
+
const void* x,
|
259
|
+
NumericType numeric_type,
|
260
|
+
int k,
|
261
|
+
float* outDistancesData,
|
262
|
+
idx_t* outIndicesData,
|
263
|
+
const SearchParameters* params) const;
|
176
264
|
|
177
265
|
protected:
|
178
266
|
/// Manages streams, cuBLAS handles and scratch memory for devices
|
@@ -0,0 +1,125 @@
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
2
|
+
/**
|
3
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
/*
|
9
|
+
* Copyright (c) 2025, NVIDIA CORPORATION.
|
10
|
+
*
|
11
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
12
|
+
* you may not use this file except in compliance with the License.
|
13
|
+
* You may obtain a copy of the License at
|
14
|
+
*
|
15
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
16
|
+
*
|
17
|
+
* Unless required by applicable law or agreed to in writing, software
|
18
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
19
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
20
|
+
* See the License for the specific language governing permissions and
|
21
|
+
* limitations under the License.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#pragma once
|
25
|
+
|
26
|
+
#include <faiss/IndexBinary.h>
|
27
|
+
#include <faiss/IndexBinaryHNSW.h>
|
28
|
+
#include <faiss/gpu/GpuIndexCagra.h>
|
29
|
+
|
30
|
+
#include <memory>
|
31
|
+
|
32
|
+
namespace faiss {
|
33
|
+
namespace gpu {
|
34
|
+
|
35
|
+
class BinaryCuvsCagra;
|
36
|
+
|
37
|
+
struct GpuIndexBinaryCagra : public IndexBinary {
|
38
|
+
public:
|
39
|
+
GpuIndexBinaryCagra(
|
40
|
+
GpuResourcesProvider* provider,
|
41
|
+
int dims,
|
42
|
+
GpuIndexCagraConfig config = GpuIndexCagraConfig());
|
43
|
+
|
44
|
+
~GpuIndexBinaryCagra() override;
|
45
|
+
|
46
|
+
int getDevice() const;
|
47
|
+
|
48
|
+
/// Returns a reference to our GpuResources object that manages memory,
|
49
|
+
/// stream and handle resources on the GPU
|
50
|
+
std::shared_ptr<GpuResources> getResources();
|
51
|
+
|
52
|
+
/// Trains CAGRA based on the given vector data and add them along with ids.
|
53
|
+
/// NB: The use of the add function here is to build the CAGRA graph on
|
54
|
+
/// the base dataset. Use this function when you want to add vectors with
|
55
|
+
/// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107
|
56
|
+
void add(idx_t n, const uint8_t* x) override;
|
57
|
+
|
58
|
+
/// Trains CAGRA based on the given vector data.
|
59
|
+
/// NB: The use of the train function here is to build the CAGRA graph on
|
60
|
+
/// the base dataset and is currently the only function to add the full set
|
61
|
+
/// of vectors (without IDs) to the index. There is no external quantizer to
|
62
|
+
/// be trained here.
|
63
|
+
void train(idx_t n, const uint8_t* x) override;
|
64
|
+
|
65
|
+
/// Initialize ourselves from the given CPU index; will overwrite
|
66
|
+
/// all data in ourselves
|
67
|
+
void copyFrom(const faiss::IndexBinaryHNSWCagra* index);
|
68
|
+
|
69
|
+
/// Copy ourselves to the given CPU index; will overwrite all data
|
70
|
+
/// in the index instance
|
71
|
+
void copyTo(faiss::IndexBinaryHNSWCagra* index) const;
|
72
|
+
|
73
|
+
void reset() override;
|
74
|
+
|
75
|
+
std::vector<idx_t> get_knngraph() const;
|
76
|
+
|
77
|
+
void search(
|
78
|
+
idx_t n,
|
79
|
+
const uint8_t* x,
|
80
|
+
// faiss::IndexBinary has idx_t for k
|
81
|
+
idx_t k,
|
82
|
+
int* distances,
|
83
|
+
faiss::idx_t* labels,
|
84
|
+
const faiss::SearchParameters* params = nullptr) const override;
|
85
|
+
|
86
|
+
protected:
|
87
|
+
/// Called from search when the input data is on the CPU;
|
88
|
+
/// potentially allows for pinned memory usage
|
89
|
+
void searchFromCpuPaged_(
|
90
|
+
idx_t n,
|
91
|
+
const uint8_t* x,
|
92
|
+
int k,
|
93
|
+
int* outDistancesData,
|
94
|
+
idx_t* outIndicesData,
|
95
|
+
const SearchParameters* search_params) const;
|
96
|
+
|
97
|
+
void searchNonPaged_(
|
98
|
+
idx_t n,
|
99
|
+
const uint8_t* x,
|
100
|
+
int k,
|
101
|
+
int* outDistancesData,
|
102
|
+
idx_t* outIndicesData,
|
103
|
+
const SearchParameters* search_params) const;
|
104
|
+
|
105
|
+
void searchImpl_(
|
106
|
+
idx_t n,
|
107
|
+
const uint8_t* x,
|
108
|
+
int k,
|
109
|
+
int* distances,
|
110
|
+
idx_t* labels,
|
111
|
+
const SearchParameters* search_params) const;
|
112
|
+
|
113
|
+
protected:
|
114
|
+
/// Manages streans, cuBLAS handles and scratch memory for devices
|
115
|
+
std::shared_ptr<GpuResources> resources_;
|
116
|
+
|
117
|
+
/// Configuration options
|
118
|
+
const GpuIndexCagraConfig cagraConfig_;
|
119
|
+
|
120
|
+
/// Instance that we own; contains the cuVS index
|
121
|
+
std::shared_ptr<BinaryCuvsCagra> index_;
|
122
|
+
};
|
123
|
+
|
124
|
+
} // namespace gpu
|
125
|
+
} // namespace faiss
|
@@ -6,7 +6,7 @@
|
|
6
6
|
* LICENSE file in the root directory of this source tree.
|
7
7
|
*/
|
8
8
|
/*
|
9
|
-
* Copyright (c) 2024, NVIDIA CORPORATION.
|
9
|
+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
|
10
10
|
*
|
11
11
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
12
12
|
* you may not use this file except in compliance with the License.
|
@@ -27,6 +27,9 @@
|
|
27
27
|
#include <faiss/gpu/GpuIndex.h>
|
28
28
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
29
29
|
|
30
|
+
#include <variant>
|
31
|
+
#include "faiss/Index.h"
|
32
|
+
|
30
33
|
namespace faiss {
|
31
34
|
struct IndexHNSWCagra;
|
32
35
|
}
|
@@ -34,13 +37,16 @@ struct IndexHNSWCagra;
|
|
34
37
|
namespace faiss {
|
35
38
|
namespace gpu {
|
36
39
|
|
40
|
+
template <typename data_t>
|
37
41
|
class CuvsCagra;
|
38
42
|
|
39
43
|
enum class graph_build_algo {
|
40
44
|
/// Use IVF-PQ to build all-neighbors knn graph
|
41
45
|
IVF_PQ,
|
42
46
|
/// Use NN-Descent to build all-neighbors knn graph
|
43
|
-
NN_DESCENT
|
47
|
+
NN_DESCENT,
|
48
|
+
/// Use iterative search to build knn graph
|
49
|
+
ITERATIVE_SEARCH
|
44
50
|
};
|
45
51
|
|
46
52
|
/// A type for specifying how PQ codebooks are created.
|
@@ -116,7 +122,6 @@ struct IVFPQBuildCagraConfig {
|
|
116
122
|
/// the algorithm always allocates the minimum amount of memory required to
|
117
123
|
/// store the given number of records. Set this flag to `true` if you prefer
|
118
124
|
/// to use as little GPU memory for the database as possible.
|
119
|
-
|
120
125
|
bool conservative_memory_allocation = false;
|
121
126
|
};
|
122
127
|
|
@@ -177,6 +182,9 @@ struct GpuIndexCagraConfig : public GpuIndexConfig {
|
|
177
182
|
std::shared_ptr<IVFPQSearchCagraConfig> ivf_pq_search_params{nullptr};
|
178
183
|
float refine_rate = 2.0f;
|
179
184
|
bool store_dataset = true;
|
185
|
+
|
186
|
+
/// Whether to use MST optimization to guarantee graph connectivity.
|
187
|
+
bool guarantee_connectivity = false;
|
180
188
|
};
|
181
189
|
|
182
190
|
enum class search_algo {
|
@@ -250,6 +258,7 @@ struct GpuIndexCagra : public GpuIndex {
|
|
250
258
|
/// the base dataset. Use this function when you want to add vectors with
|
251
259
|
/// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107
|
252
260
|
void add(idx_t n, const float* x) override;
|
261
|
+
void addEx(idx_t n, const void* x, NumericType numeric_type) override;
|
253
262
|
|
254
263
|
/// Trains CAGRA based on the given vector data.
|
255
264
|
/// NB: The use of the train function here is to build the CAGRA graph on
|
@@ -257,10 +266,14 @@ struct GpuIndexCagra : public GpuIndex {
|
|
257
266
|
/// of vectors (without IDs) to the index. There is no external quantizer to
|
258
267
|
/// be trained here.
|
259
268
|
void train(idx_t n, const float* x) override;
|
269
|
+
void trainEx(idx_t n, const void* x, NumericType numeric_type) override;
|
260
270
|
|
261
271
|
/// Initialize ourselves from the given CPU index; will overwrite
|
262
272
|
/// all data in ourselves
|
263
273
|
void copyFrom(const faiss::IndexHNSWCagra* index);
|
274
|
+
void copyFromEx(
|
275
|
+
const faiss::IndexHNSWCagra* index,
|
276
|
+
NumericType numeric_type);
|
264
277
|
|
265
278
|
/// Copy ourselves to the given CPU index; will overwrite all data
|
266
279
|
/// in the index instance
|
@@ -270,10 +283,17 @@ struct GpuIndexCagra : public GpuIndex {
|
|
270
283
|
|
271
284
|
std::vector<idx_t> get_knngraph() const;
|
272
285
|
|
286
|
+
faiss::NumericType get_numeric_type() const;
|
287
|
+
|
273
288
|
protected:
|
274
289
|
bool addImplRequiresIDs_() const override;
|
275
290
|
|
276
291
|
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
|
292
|
+
void addImplEx_(
|
293
|
+
idx_t n,
|
294
|
+
const void* x,
|
295
|
+
NumericType numeric_type,
|
296
|
+
const idx_t* ids) override;
|
277
297
|
|
278
298
|
/// Called from GpuIndex for search
|
279
299
|
void searchImpl_(
|
@@ -283,12 +303,27 @@ struct GpuIndexCagra : public GpuIndex {
|
|
283
303
|
float* distances,
|
284
304
|
idx_t* labels,
|
285
305
|
const SearchParameters* search_params) const override;
|
306
|
+
void searchImplEx_(
|
307
|
+
idx_t n,
|
308
|
+
const void* x,
|
309
|
+
NumericType numeric_type,
|
310
|
+
int k,
|
311
|
+
float* distances,
|
312
|
+
idx_t* labels,
|
313
|
+
const SearchParameters* search_params) const override;
|
286
314
|
|
287
315
|
/// Our configuration options
|
288
316
|
const GpuIndexCagraConfig cagraConfig_;
|
289
317
|
|
318
|
+
faiss::NumericType numeric_type_;
|
319
|
+
|
290
320
|
/// Instance that we own; contains the inverted lists
|
291
|
-
std::
|
321
|
+
std::variant<
|
322
|
+
std::monostate,
|
323
|
+
std::shared_ptr<CuvsCagra<float>>,
|
324
|
+
std::shared_ptr<CuvsCagra<half>>,
|
325
|
+
std::shared_ptr<CuvsCagra<int8_t>>>
|
326
|
+
index_;
|
292
327
|
};
|
293
328
|
|
294
329
|
} // namespace gpu
|
@@ -17,13 +17,13 @@ namespace gpu {
|
|
17
17
|
/// Returns the maximum k-selection value supported based on the CUDA SDK that
|
18
18
|
/// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
|
19
19
|
/// non-CUDA files
|
20
|
-
int getMaxKSelection();
|
20
|
+
int getMaxKSelection(bool use_cuvs = false);
|
21
21
|
|
22
22
|
// Validate the k parameter for search
|
23
|
-
void validateKSelect(int k);
|
23
|
+
void validateKSelect(int k, bool use_cuvs = false);
|
24
24
|
|
25
25
|
// Validate the nprobe parameter for search
|
26
|
-
void validateNProbe(size_t nprobe);
|
26
|
+
void validateNProbe(size_t nprobe, bool use_cuvs = false);
|
27
27
|
|
28
28
|
} // namespace gpu
|
29
29
|
} // namespace faiss
|
@@ -79,7 +79,7 @@ void testGpuIndexBinaryFlat(int kOverride = -1) {
|
|
79
79
|
|
80
80
|
int k = kOverride > 0
|
81
81
|
? kOverride
|
82
|
-
: faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection());
|
82
|
+
: faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection(false));
|
83
83
|
int numVecs = faiss::gpu::randVal(k + 1, 20000);
|
84
84
|
int numQuery = faiss::gpu::randVal(1, 1000);
|
85
85
|
|
@@ -56,7 +56,8 @@ void testFlat(const TestFlatOptions& opt) {
|
|
56
56
|
int k = opt.useFloat16
|
57
57
|
? std::min(faiss::gpu::randVal(1, 50), numVecs)
|
58
58
|
: std::min(
|
59
|
-
faiss::gpu::randVal(
|
59
|
+
faiss::gpu::randVal(
|
60
|
+
1, faiss::gpu::getMaxKSelection(opt.use_cuvs)),
|
60
61
|
numVecs);
|
61
62
|
if (opt.kOverride > 0) {
|
62
63
|
k = opt.kOverride;
|
@@ -164,7 +165,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
|
|
164
165
|
|
165
166
|
// At least one test for the k > 1024 select
|
166
167
|
TEST(TestGpuIndexFlat, L2_k_2048) {
|
167
|
-
if (faiss::gpu::getMaxKSelection() >= 2048) {
|
168
|
+
if (faiss::gpu::getMaxKSelection(false) >= 2048) {
|
168
169
|
TestFlatOptions opt;
|
169
170
|
opt.metric = faiss::MetricType::METRIC_L2;
|
170
171
|
opt.useFloat16 = false;
|
@@ -0,0 +1,41 @@
|
|
1
|
+
// @lint-ignore-every LICENSELINT
|
2
|
+
/**
|
3
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
/*
|
9
|
+
* Copyright (c) 2025, NVIDIA CORPORATION.
|
10
|
+
*
|
11
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
12
|
+
* you may not use this file except in compliance with the License.
|
13
|
+
* You may obtain a copy of the License at
|
14
|
+
*
|
15
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
16
|
+
*
|
17
|
+
* Unless required by applicable law or agreed to in writing, software
|
18
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
19
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
20
|
+
* See the License for the specific language governing permissions and
|
21
|
+
* limitations under the License.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#include <cuvs/core/bitset.hpp>
|
25
|
+
#include <faiss/gpu/GpuResources.h>
|
26
|
+
#include <faiss/impl/IDSelector.h>
|
27
|
+
|
28
|
+
#pragma GCC visibility push(default)
|
29
|
+
namespace faiss::gpu {
|
30
|
+
/// Convert a Faiss IDSelector to a cuvs::core::bitset_view
|
31
|
+
/// @param res The GpuResources object to use for the conversion
|
32
|
+
/// @param selector The Faiss IDSelector to convert
|
33
|
+
/// @param bitset The cuvs::core::bitset_view to store the result
|
34
|
+
/// @param num_threads Number of threads to use for the conversion. If 0, the
|
35
|
+
/// number of threads is set to the number of available threads.
|
36
|
+
void convert_to_bitset(
|
37
|
+
faiss::gpu::GpuResources* res,
|
38
|
+
const faiss::IDSelector& selector,
|
39
|
+
cuvs::core::bitset_view<uint32_t, uint32_t> bitset,
|
40
|
+
int num_threads = 0);
|
41
|
+
} // namespace faiss::gpu
|
@@ -168,23 +168,26 @@ void RangeSearchPartialResult::merge(
|
|
168
168
|
std::vector<RangeSearchPartialResult*>& partial_results,
|
169
169
|
bool do_delete) {
|
170
170
|
int npres = partial_results.size();
|
171
|
-
if (npres == 0)
|
171
|
+
if (npres == 0) {
|
172
172
|
return;
|
173
|
+
}
|
173
174
|
RangeSearchResult* result = partial_results[0]->res;
|
174
175
|
size_t nx = result->nq;
|
175
176
|
|
176
177
|
// count
|
177
178
|
for (const RangeSearchPartialResult* pres : partial_results) {
|
178
|
-
if (!pres)
|
179
|
+
if (!pres) {
|
179
180
|
continue;
|
181
|
+
}
|
180
182
|
for (const RangeQueryResult& qres : pres->queries) {
|
181
183
|
result->lims[qres.qno] += qres.nres;
|
182
184
|
}
|
183
185
|
}
|
184
186
|
result->do_allocation();
|
185
187
|
for (int j = 0; j < npres; j++) {
|
186
|
-
if (!partial_results[j])
|
188
|
+
if (!partial_results[j]) {
|
187
189
|
continue;
|
190
|
+
}
|
188
191
|
partial_results[j]->copy_result(true);
|
189
192
|
if (do_delete) {
|
190
193
|
delete partial_results[j];
|