faiss 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/faiss/index.cpp +36 -10
- data/ext/faiss/index_binary.cpp +19 -6
- data/ext/faiss/kmeans.cpp +6 -6
- data/ext/faiss/numo.hpp +273 -123
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +2 -3
- data/vendor/faiss/faiss/AutoTune.h +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +2 -2
- data/vendor/faiss/faiss/Clustering.h +2 -2
- data/vendor/faiss/faiss/IVFlib.cpp +1 -2
- data/vendor/faiss/faiss/IVFlib.h +1 -1
- data/vendor/faiss/faiss/Index.h +10 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
- data/vendor/faiss/faiss/Index2Layer.h +2 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexBinary.h +7 -7
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
- data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
- data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
- data/vendor/faiss/faiss/IndexFastScan.h +107 -7
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
- data/vendor/faiss/faiss/IndexHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
- data/vendor/faiss/faiss/IndexIDMap.h +6 -6
- data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVF.h +5 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
- data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -1
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
- data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
- data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
- data/vendor/faiss/faiss/IndexShards.cpp +1 -1
- data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
- data/vendor/faiss/faiss/MetricType.h +1 -1
- data/vendor/faiss/faiss/VectorTransform.h +2 -2
- data/vendor/faiss/faiss/clone_index.cpp +3 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
- data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
- data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
- data/vendor/faiss/faiss/impl/HNSW.h +4 -4
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
- data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
- data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
- data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
- data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
- data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
- data/vendor/faiss/faiss/impl/io.cpp +2 -2
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
- data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
- data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
- data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
- data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
- data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
- data/vendor/faiss/faiss/index_factory.cpp +43 -1
- data/vendor/faiss/faiss/index_factory.h +1 -1
- data/vendor/faiss/faiss/index_io.h +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
- data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
- data/vendor/faiss/faiss/utils/Heap.h +3 -3
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
- data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
- data/vendor/faiss/faiss/utils/distances.h +2 -2
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
- data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
- data/vendor/faiss/faiss/utils/hamming.h +1 -1
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
- data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
- data/vendor/faiss/faiss/utils/partitioning.h +2 -2
- data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
- data/vendor/faiss/faiss/utils/random.cpp +1 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
- data/vendor/faiss/faiss/utils/utils.cpp +5 -2
- data/vendor/faiss/faiss/utils/utils.h +2 -2
- metadata +14 -3
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <vector>
|
|
11
|
+
|
|
12
|
+
#include <faiss/IndexFastScan.h>
|
|
13
|
+
#include <faiss/IndexRaBitQ.h>
|
|
14
|
+
#include <faiss/impl/RaBitQUtils.h>
|
|
15
|
+
#include <faiss/impl/RaBitQuantizer.h>
|
|
16
|
+
#include <faiss/impl/simd_result_handlers.h>
|
|
17
|
+
#include <faiss/utils/Heap.h>
|
|
18
|
+
#include <faiss/utils/simdlib.h>
|
|
19
|
+
|
|
20
|
+
namespace faiss {
|
|
21
|
+
|
|
22
|
+
// Import shared utilities from RaBitQUtils
|
|
23
|
+
using rabitq_utils::FactorsData;
|
|
24
|
+
using rabitq_utils::QueryFactorsData;
|
|
25
|
+
|
|
26
|
+
/** Fast-scan version of RaBitQ index that processes 32 database vectors at a
|
|
27
|
+
* time using SIMD operations. Similar to IndexPQFastScan but adapted for
|
|
28
|
+
* RaBitQ's bit-level quantization with factors.
|
|
29
|
+
*
|
|
30
|
+
* The key differences from IndexRaBitQ:
|
|
31
|
+
* - Processes vectors in batches of 32
|
|
32
|
+
* - Uses 4-bit groupings for SIMD optimization (4 dimensions per 4-bit unit)
|
|
33
|
+
* - Separates factors from quantized bits for efficient processing
|
|
34
|
+
* - Leverages existing PQ4 FastScan infrastructure where possible
|
|
35
|
+
*/
|
|
36
|
+
struct IndexRaBitQFastScan : IndexFastScan {
|
|
37
|
+
/// RaBitQ quantizer for encoding/decoding
|
|
38
|
+
RaBitQuantizer rabitq;
|
|
39
|
+
|
|
40
|
+
/// Center of all points (same as IndexRaBitQ)
|
|
41
|
+
std::vector<float> center;
|
|
42
|
+
|
|
43
|
+
/// Extracted factors storage for batch processing
|
|
44
|
+
/// Size: ntotal, stores factors separately from packed codes
|
|
45
|
+
std::vector<FactorsData> factors_storage;
|
|
46
|
+
|
|
47
|
+
/// Default number of bits to quantize a query with
|
|
48
|
+
uint8_t qb = 8;
|
|
49
|
+
|
|
50
|
+
// quantize the query with a zero-centered scalar quantizer.
|
|
51
|
+
bool centered = false;
|
|
52
|
+
|
|
53
|
+
IndexRaBitQFastScan();
|
|
54
|
+
|
|
55
|
+
explicit IndexRaBitQFastScan(
|
|
56
|
+
idx_t d,
|
|
57
|
+
MetricType metric = METRIC_L2,
|
|
58
|
+
int bbs = 32);
|
|
59
|
+
|
|
60
|
+
/// build from an existing IndexRaBitQ
|
|
61
|
+
explicit IndexRaBitQFastScan(const IndexRaBitQ& orig, int bbs = 32);
|
|
62
|
+
|
|
63
|
+
void train(idx_t n, const float* x) override;
|
|
64
|
+
|
|
65
|
+
void add(idx_t n, const float* x) override;
|
|
66
|
+
|
|
67
|
+
void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
|
|
68
|
+
|
|
69
|
+
void compute_float_LUT(
|
|
70
|
+
float* lut,
|
|
71
|
+
idx_t n,
|
|
72
|
+
const float* x,
|
|
73
|
+
const FastScanDistancePostProcessing& context) const override;
|
|
74
|
+
|
|
75
|
+
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
|
|
76
|
+
|
|
77
|
+
void search(
|
|
78
|
+
idx_t n,
|
|
79
|
+
const float* x,
|
|
80
|
+
idx_t k,
|
|
81
|
+
float* distances,
|
|
82
|
+
idx_t* labels,
|
|
83
|
+
const SearchParameters* params = nullptr) const override;
|
|
84
|
+
|
|
85
|
+
/// Override to create RaBitQ-specific handlers
|
|
86
|
+
void* make_knn_handler(
|
|
87
|
+
bool is_max,
|
|
88
|
+
int /*impl*/,
|
|
89
|
+
idx_t n,
|
|
90
|
+
idx_t k,
|
|
91
|
+
size_t /*ntotal*/,
|
|
92
|
+
float* distances,
|
|
93
|
+
idx_t* labels,
|
|
94
|
+
const IDSelector* sel,
|
|
95
|
+
const FastScanDistancePostProcessing& context) const override;
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
/** SIMD result handler for RaBitQ FastScan that applies distance corrections
|
|
99
|
+
* and maintains heaps directly during SIMD operations.
|
|
100
|
+
*
|
|
101
|
+
* This handler processes batches of 32 distance computations from SIMD kernels,
|
|
102
|
+
* applies RaBitQ-specific adjustments (factors and normalizers), and
|
|
103
|
+
* immediately updates result heaps without intermediate storage. This
|
|
104
|
+
* eliminates the need for post-processing and provides significant memory and
|
|
105
|
+
* performance benefits.
|
|
106
|
+
*
|
|
107
|
+
* Key optimizations:
|
|
108
|
+
* - Direct heap integration (no intermediate result storage)
|
|
109
|
+
* - Batch-level computation of normalizers and query factors
|
|
110
|
+
* - Preserves exact mathematical equivalence to original RaBitQ distances
|
|
111
|
+
* @tparam C Comparator type (CMin/CMax) for heap operations
|
|
112
|
+
* @tparam with_id_map Whether to use id mapping (similar to HeapHandler)
|
|
113
|
+
*/
|
|
114
|
+
template <class C, bool with_id_map = false>
|
|
115
|
+
struct RaBitQHeapHandler
|
|
116
|
+
: simd_result_handlers::ResultHandlerCompare<C, with_id_map> {
|
|
117
|
+
using RHC = simd_result_handlers::ResultHandlerCompare<C, with_id_map>;
|
|
118
|
+
using RHC::normalizers;
|
|
119
|
+
|
|
120
|
+
const IndexRaBitQFastScan* rabitq_index;
|
|
121
|
+
float* heap_distances; // [nq * k]
|
|
122
|
+
int64_t* heap_labels; // [nq * k]
|
|
123
|
+
const size_t nq, k;
|
|
124
|
+
const FastScanDistancePostProcessing&
|
|
125
|
+
context; // Processing context with query offset
|
|
126
|
+
|
|
127
|
+
// Use float-based comparator for heap operations
|
|
128
|
+
using Cfloat = typename std::conditional<
|
|
129
|
+
C::is_max,
|
|
130
|
+
CMax<float, int64_t>,
|
|
131
|
+
CMin<float, int64_t>>::type;
|
|
132
|
+
|
|
133
|
+
RaBitQHeapHandler(
|
|
134
|
+
const IndexRaBitQFastScan* index,
|
|
135
|
+
size_t nq_val,
|
|
136
|
+
size_t k_val,
|
|
137
|
+
float* distances,
|
|
138
|
+
int64_t* labels,
|
|
139
|
+
const IDSelector* sel_in,
|
|
140
|
+
const FastScanDistancePostProcessing& context);
|
|
141
|
+
|
|
142
|
+
void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) final;
|
|
143
|
+
|
|
144
|
+
void begin(const float* norms);
|
|
145
|
+
|
|
146
|
+
void end();
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
} // namespace faiss
|
|
@@ -77,7 +77,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
|
|
|
77
77
|
if (d > 1024) {
|
|
78
78
|
do_comment(
|
|
79
79
|
"indexing this many dimensions is hard, "
|
|
80
|
-
"please consider dimensionality
|
|
80
|
+
"please consider dimensionality reduction (with PCAMatrix)\n");
|
|
81
81
|
}
|
|
82
82
|
|
|
83
83
|
hash_value = hash_bytes((const uint8_t*)x, n * d * sizeof(*x));
|
|
@@ -125,7 +125,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
|
|
|
125
125
|
}
|
|
126
126
|
}
|
|
127
127
|
|
|
128
|
-
// invalid
|
|
128
|
+
// invalid vector stats
|
|
129
129
|
if (n_valid == n) {
|
|
130
130
|
do_comment("no NaN or Infs in data\n");
|
|
131
131
|
} else {
|
|
@@ -229,7 +229,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
|
|
|
229
229
|
} else {
|
|
230
230
|
do_comment(
|
|
231
231
|
"%zd dimensions are too large "
|
|
232
|
-
"wrt. their variance, may
|
|
232
|
+
"wrt. their variance, may lose precision "
|
|
233
233
|
"in IndexFlatL2 (use CenteringTransform)\n",
|
|
234
234
|
n_dangerous_range);
|
|
235
235
|
}
|
|
@@ -35,7 +35,7 @@ enum MetricType {
|
|
|
35
35
|
|
|
36
36
|
/// sum_i(min(a_i, b_i)) / sum_i(max(a_i, b_i)) where a_i, b_i > 0
|
|
37
37
|
METRIC_Jaccard,
|
|
38
|
-
/// Squared
|
|
38
|
+
/// Squared Euclidean distance, ignoring NaNs
|
|
39
39
|
METRIC_NaNEuclidean,
|
|
40
40
|
/// Gower's distance - numeric dimensions are in [0,1] and categorical
|
|
41
41
|
/// dimensions are negative integers
|
|
@@ -37,7 +37,7 @@ struct VectorTransform {
|
|
|
37
37
|
* nothing by default.
|
|
38
38
|
*
|
|
39
39
|
* @param n nb of training vectors
|
|
40
|
-
* @param x training
|
|
40
|
+
* @param x training vectors, size n * d
|
|
41
41
|
*/
|
|
42
42
|
virtual void train(idx_t n, const float* x);
|
|
43
43
|
|
|
@@ -249,7 +249,7 @@ struct OPQMatrix : LinearTransform {
|
|
|
249
249
|
void train(idx_t n, const float* x) override;
|
|
250
250
|
};
|
|
251
251
|
|
|
252
|
-
/** remap dimensions for
|
|
252
|
+
/** remap dimensions for input vectors, possibly inserting 0s
|
|
253
253
|
* strictly speaking this is also a linear transform but we don't want
|
|
254
254
|
* to compute it with matrix multiplies */
|
|
255
255
|
struct RemapDimensionsTransform : VectorTransform {
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
#include <faiss/IndexIVF.h>
|
|
24
24
|
#include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
|
|
25
25
|
#include <faiss/IndexIVFFlat.h>
|
|
26
|
+
#include <faiss/IndexIVFFlatPanorama.h>
|
|
26
27
|
#include <faiss/IndexIVFPQ.h>
|
|
27
28
|
#include <faiss/IndexIVFPQFastScan.h>
|
|
28
29
|
#include <faiss/IndexIVFPQR.h>
|
|
@@ -97,6 +98,7 @@ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
|
|
|
97
98
|
|
|
98
99
|
TRYCLONE(IndexIVFFlatDedup, ivf)
|
|
99
100
|
TRYCLONE(IndexIVFFlat, ivf)
|
|
101
|
+
TRYCLONE(IndexIVFFlatPanorama, ivf)
|
|
100
102
|
|
|
101
103
|
TRYCLONE(IndexIVFSpectralHash, ivf)
|
|
102
104
|
|
|
@@ -152,7 +154,7 @@ IndexNSG* clone_IndexNSG(const IndexNSG* insg) {
|
|
|
152
154
|
TRYCLONE(IndexNSGPQ, insg)
|
|
153
155
|
TRYCLONE(IndexNSGSQ, insg)
|
|
154
156
|
TRYCLONE(IndexNSG, insg) {
|
|
155
|
-
FAISS_THROW_MSG("clone not supported for this type of
|
|
157
|
+
FAISS_THROW_MSG("clone not supported for this type of IndexNSG");
|
|
156
158
|
}
|
|
157
159
|
}
|
|
158
160
|
|
|
@@ -239,7 +239,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
239
239
|
config.device = device;
|
|
240
240
|
GpuIndexCagra* res =
|
|
241
241
|
new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
|
|
242
|
-
res->
|
|
242
|
+
res->copyFrom_ex(icg, icg->get_numeric_type());
|
|
243
243
|
return res;
|
|
244
244
|
}
|
|
245
245
|
#endif
|
|
@@ -77,13 +77,13 @@ class GpuIndex : public faiss::Index {
|
|
|
77
77
|
/// as needed
|
|
78
78
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
|
79
79
|
void add(idx_t, const float* x) override;
|
|
80
|
-
void
|
|
80
|
+
void add_ex(idx_t, const void* x, NumericType numeric_type) override;
|
|
81
81
|
|
|
82
82
|
/// `x` and `ids` can be resident on the CPU or any GPU; copies are
|
|
83
83
|
/// performed as needed
|
|
84
84
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
|
85
85
|
void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
|
|
86
|
-
void
|
|
86
|
+
void add_with_ids_ex(
|
|
87
87
|
idx_t n,
|
|
88
88
|
const void* x,
|
|
89
89
|
NumericType numeric_type,
|
|
@@ -103,7 +103,7 @@ class GpuIndex : public faiss::Index {
|
|
|
103
103
|
float* distances,
|
|
104
104
|
idx_t* labels,
|
|
105
105
|
const SearchParameters* params = nullptr) const override;
|
|
106
|
-
void
|
|
106
|
+
void search_ex(
|
|
107
107
|
idx_t n,
|
|
108
108
|
const void* x,
|
|
109
109
|
NumericType numeric_type,
|
|
@@ -139,7 +139,7 @@ class GpuIndex : public faiss::Index {
|
|
|
139
139
|
protected:
|
|
140
140
|
/// Copy what we need from the CPU equivalent
|
|
141
141
|
void copyFrom(const faiss::Index* index);
|
|
142
|
-
void
|
|
142
|
+
void copyFrom_ex(const faiss::Index* index, NumericType numeric_type) {
|
|
143
143
|
if (numeric_type == NumericType::Float32) {
|
|
144
144
|
copyFrom(index);
|
|
145
145
|
} else {
|
|
@@ -149,7 +149,7 @@ class GpuIndex : public faiss::Index {
|
|
|
149
149
|
|
|
150
150
|
/// Copy what we have to the CPU equivalent
|
|
151
151
|
void copyTo(faiss::Index* index) const;
|
|
152
|
-
void
|
|
152
|
+
void copyTo_ex(faiss::Index* index, NumericType numeric_type) {
|
|
153
153
|
if (numeric_type == NumericType::Float32) {
|
|
154
154
|
copyTo(index);
|
|
155
155
|
} else {
|
|
@@ -165,7 +165,7 @@ class GpuIndex : public faiss::Index {
|
|
|
165
165
|
/// All data is guaranteed to be resident on our device
|
|
166
166
|
virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
|
|
167
167
|
|
|
168
|
-
virtual void
|
|
168
|
+
virtual void addImpl_ex_(
|
|
169
169
|
idx_t n,
|
|
170
170
|
const void* x,
|
|
171
171
|
NumericType numeric_type,
|
|
@@ -187,7 +187,7 @@ class GpuIndex : public faiss::Index {
|
|
|
187
187
|
idx_t* labels,
|
|
188
188
|
const SearchParameters* params) const = 0;
|
|
189
189
|
|
|
190
|
-
virtual void
|
|
190
|
+
virtual void searchImpl_ex_(
|
|
191
191
|
idx_t n,
|
|
192
192
|
const void* x,
|
|
193
193
|
NumericType numeric_type,
|
|
@@ -212,7 +212,7 @@ class GpuIndex : public faiss::Index {
|
|
|
212
212
|
/// Handles paged adds if the add set is too large, passes to
|
|
213
213
|
/// addImpl_ to actually perform the add for the current page
|
|
214
214
|
void addPaged_(idx_t n, const float* x, const idx_t* ids);
|
|
215
|
-
void
|
|
215
|
+
void addPaged_ex_(
|
|
216
216
|
idx_t n,
|
|
217
217
|
const void* x,
|
|
218
218
|
NumericType numeric_type,
|
|
@@ -220,7 +220,7 @@ class GpuIndex : public faiss::Index {
|
|
|
220
220
|
|
|
221
221
|
/// Calls addImpl_ for a single page of GPU-resident data
|
|
222
222
|
void addPage_(idx_t n, const float* x, const idx_t* ids);
|
|
223
|
-
void
|
|
223
|
+
void addPage_ex_(
|
|
224
224
|
idx_t n,
|
|
225
225
|
const void* x,
|
|
226
226
|
NumericType numeric_type,
|
|
@@ -235,7 +235,7 @@ class GpuIndex : public faiss::Index {
|
|
|
235
235
|
idx_t* outIndicesData,
|
|
236
236
|
const SearchParameters* params) const;
|
|
237
237
|
|
|
238
|
-
void
|
|
238
|
+
void searchNonPaged_ex_(
|
|
239
239
|
idx_t n,
|
|
240
240
|
const void* x,
|
|
241
241
|
NumericType numeric_type,
|
|
@@ -253,7 +253,7 @@ class GpuIndex : public faiss::Index {
|
|
|
253
253
|
float* outDistancesData,
|
|
254
254
|
idx_t* outIndicesData,
|
|
255
255
|
const SearchParameters* params) const;
|
|
256
|
-
void
|
|
256
|
+
void searchFromCpuPaged_ex_(
|
|
257
257
|
idx_t n,
|
|
258
258
|
const void* x,
|
|
259
259
|
NumericType numeric_type,
|
|
@@ -111,7 +111,7 @@ struct GpuIndexBinaryCagra : public IndexBinary {
|
|
|
111
111
|
const SearchParameters* search_params) const;
|
|
112
112
|
|
|
113
113
|
protected:
|
|
114
|
-
/// Manages
|
|
114
|
+
/// Manages streams, cuBLAS handles and scratch memory for devices
|
|
115
115
|
std::shared_ptr<GpuResources> resources_;
|
|
116
116
|
|
|
117
117
|
/// Configuration options
|
|
@@ -86,7 +86,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
|
|
|
86
86
|
idx_t* outIndicesData) const;
|
|
87
87
|
|
|
88
88
|
protected:
|
|
89
|
-
/// Manages
|
|
89
|
+
/// Manages streams, cuBLAS handles and scratch memory for devices
|
|
90
90
|
std::shared_ptr<GpuResources> resources_;
|
|
91
91
|
|
|
92
92
|
/// Configuration options
|
|
@@ -135,7 +135,7 @@ struct IVFPQSearchCagraConfig {
|
|
|
135
135
|
///
|
|
136
136
|
/// The use of low-precision types reduces the amount of shared memory
|
|
137
137
|
/// required at search time, so fast shared memory kernels can be used even
|
|
138
|
-
/// for datasets with large
|
|
138
|
+
/// for datasets with large dimensionality. Note that the recall is slightly
|
|
139
139
|
/// degraded when low-precision type is selected.
|
|
140
140
|
|
|
141
141
|
cudaDataType_t lut_dtype = CUDA_R_32F;
|
|
@@ -166,6 +166,10 @@ struct IVFPQSearchCagraConfig {
|
|
|
166
166
|
/// negative effects on the search performance if tweaked incorrectly.
|
|
167
167
|
|
|
168
168
|
double preferred_shmem_carveout = 1.0;
|
|
169
|
+
|
|
170
|
+
/// Set the internal batch size to improve GPU utilization at the cost of
|
|
171
|
+
/// larger memory footprint.
|
|
172
|
+
uint32_t max_internal_batch_size = 4096;
|
|
169
173
|
};
|
|
170
174
|
|
|
171
175
|
struct GpuIndexCagraConfig : public GpuIndexConfig {
|
|
@@ -258,7 +262,7 @@ struct GpuIndexCagra : public GpuIndex {
|
|
|
258
262
|
/// the base dataset. Use this function when you want to add vectors with
|
|
259
263
|
/// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107
|
|
260
264
|
void add(idx_t n, const float* x) override;
|
|
261
|
-
void
|
|
265
|
+
void add_ex(idx_t n, const void* x, NumericType numeric_type) override;
|
|
262
266
|
|
|
263
267
|
/// Trains CAGRA based on the given vector data.
|
|
264
268
|
/// NB: The use of the train function here is to build the CAGRA graph on
|
|
@@ -266,12 +270,12 @@ struct GpuIndexCagra : public GpuIndex {
|
|
|
266
270
|
/// of vectors (without IDs) to the index. There is no external quantizer to
|
|
267
271
|
/// be trained here.
|
|
268
272
|
void train(idx_t n, const float* x) override;
|
|
269
|
-
void
|
|
273
|
+
void train_ex(idx_t n, const void* x, NumericType numeric_type) override;
|
|
270
274
|
|
|
271
275
|
/// Initialize ourselves from the given CPU index; will overwrite
|
|
272
276
|
/// all data in ourselves
|
|
273
277
|
void copyFrom(const faiss::IndexHNSWCagra* index);
|
|
274
|
-
void
|
|
278
|
+
void copyFrom_ex(
|
|
275
279
|
const faiss::IndexHNSWCagra* index,
|
|
276
280
|
NumericType numeric_type);
|
|
277
281
|
|
|
@@ -289,7 +293,7 @@ struct GpuIndexCagra : public GpuIndex {
|
|
|
289
293
|
bool addImplRequiresIDs_() const override;
|
|
290
294
|
|
|
291
295
|
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
|
|
292
|
-
void
|
|
296
|
+
void addImpl_ex_(
|
|
293
297
|
idx_t n,
|
|
294
298
|
const void* x,
|
|
295
299
|
NumericType numeric_type,
|
|
@@ -303,7 +307,7 @@ struct GpuIndexCagra : public GpuIndex {
|
|
|
303
307
|
float* distances,
|
|
304
308
|
idx_t* labels,
|
|
305
309
|
const SearchParameters* search_params) const override;
|
|
306
|
-
void
|
|
310
|
+
void searchImpl_ex_(
|
|
307
311
|
idx_t n,
|
|
308
312
|
const void* x,
|
|
309
313
|
NumericType numeric_type,
|
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
1
8
|
#include <faiss/gpu/GpuIcmEncoder.h>
|
|
2
9
|
#include <faiss/gpu/StandardGpuResources.h>
|
|
3
10
|
#include <faiss/gpu/test/TestUtils.h>
|
|
@@ -493,7 +493,7 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
|
|
|
493
493
|
// Construct a positive test set
|
|
494
494
|
auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
|
|
495
495
|
|
|
496
|
-
// Put all vecs on positive
|
|
496
|
+
// Put all vecs on positive side
|
|
497
497
|
for (auto& f : queryVecs) {
|
|
498
498
|
f = std::abs(f);
|
|
499
499
|
}
|
|
@@ -404,7 +404,7 @@ void AdditiveQuantizer::compute_LUT(
|
|
|
404
404
|
namespace {
|
|
405
405
|
|
|
406
406
|
/* compute inner products of one query with all centroids, given a look-up
|
|
407
|
-
* table of all inner
|
|
407
|
+
* table of all inner products with codebook entries */
|
|
408
408
|
void compute_inner_prod_with_LUT(
|
|
409
409
|
const AdditiveQuantizer& aq,
|
|
410
410
|
const float* LUT,
|
|
@@ -36,7 +36,7 @@ RangeSearchResult::RangeSearchResult(size_t nq, bool alloc_lims) : nq(nq) {
|
|
|
36
36
|
/// for each query
|
|
37
37
|
void RangeSearchResult::do_allocation() {
|
|
38
38
|
// works only if all the partial results are aggregated
|
|
39
|
-
//
|
|
39
|
+
// simultaneously
|
|
40
40
|
FAISS_THROW_IF_NOT(labels == nullptr && distances == nullptr);
|
|
41
41
|
size_t ofs = 0;
|
|
42
42
|
for (int i = 0; i < nq; i++) {
|
|
@@ -86,7 +86,7 @@ void BufferList::append_buffer() {
|
|
|
86
86
|
wp = 0;
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
-
/// copy
|
|
89
|
+
/// copy elements ofs:ofs+n-1 seen as linear data in the buffers to
|
|
90
90
|
/// tables dest_ids, dest_dis
|
|
91
91
|
void BufferList::copy_range(
|
|
92
92
|
size_t ofs,
|
|
@@ -80,7 +80,7 @@ struct BufferList {
|
|
|
80
80
|
/// add one result, possibly appending a new buffer if needed
|
|
81
81
|
void add(idx_t id, float dis);
|
|
82
82
|
|
|
83
|
-
/// copy
|
|
83
|
+
/// copy elements ofs:ofs+n-1 seen as linear data in the buffers to
|
|
84
84
|
/// tables dest_ids, dest_dis
|
|
85
85
|
void copy_range(size_t ofs, size_t n, idx_t* dest_ids, float* dest_dis);
|
|
86
86
|
};
|
|
@@ -38,14 +38,14 @@ struct CodePacker {
|
|
|
38
38
|
// code_size
|
|
39
39
|
) const = 0;
|
|
40
40
|
|
|
41
|
-
// pack all
|
|
41
|
+
// pack all codes in a block
|
|
42
42
|
virtual void pack_all(
|
|
43
43
|
const uint8_t* flat_codes, // codes to write to the block, size
|
|
44
44
|
// (nvec * code_size)
|
|
45
45
|
uint8_t* block // block to write to (size block_size)
|
|
46
46
|
) const;
|
|
47
47
|
|
|
48
|
-
// unpack all
|
|
48
|
+
// unpack all codes in a block
|
|
49
49
|
virtual void unpack_all(
|
|
50
50
|
const uint8_t* block, // block to read from (size block_size)
|
|
51
51
|
uint8_t* flat_codes // where to write the resulting codes size (nvec
|
|
@@ -60,7 +60,7 @@ struct DistanceComputer {
|
|
|
60
60
|
};
|
|
61
61
|
|
|
62
62
|
/* Wrap the distance computer into one that negates the
|
|
63
|
-
distances. This makes supporting
|
|
63
|
+
distances. This makes supporting INNER_PRODUCT search easier */
|
|
64
64
|
|
|
65
65
|
struct NegativeDistanceComputer : DistanceComputer {
|
|
66
66
|
/// owned by this
|
|
@@ -100,7 +100,7 @@ struct NegativeDistanceComputer : DistanceComputer {
|
|
|
100
100
|
return -basedis->symmetric_dis(i, j);
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
-
virtual ~NegativeDistanceComputer() {
|
|
103
|
+
virtual ~NegativeDistanceComputer() override {
|
|
104
104
|
delete basedis;
|
|
105
105
|
}
|
|
106
106
|
};
|
|
@@ -125,7 +125,7 @@ struct FlatCodesDistanceComputer : DistanceComputer {
|
|
|
125
125
|
/// compute distance of current query to an encoded vector
|
|
126
126
|
virtual float distance_to_code(const uint8_t* code) = 0;
|
|
127
127
|
|
|
128
|
-
virtual ~FlatCodesDistanceComputer() {}
|
|
128
|
+
virtual ~FlatCodesDistanceComputer() override {}
|
|
129
129
|
};
|
|
130
130
|
|
|
131
131
|
} // namespace faiss
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <cstddef>
|
|
11
|
+
|
|
12
|
+
namespace faiss {
|
|
13
|
+
|
|
14
|
+
// Forward declarations
|
|
15
|
+
struct NormTableScaler;
|
|
16
|
+
|
|
17
|
+
namespace rabitq_utils {
|
|
18
|
+
struct QueryFactorsData;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Simple context object that holds processors for FastScan operations.
|
|
23
|
+
* */
|
|
24
|
+
struct FastScanDistancePostProcessing {
|
|
25
|
+
/// Norm scaling processor for Additive Quantizers (nullptr if not needed)
|
|
26
|
+
const NormTableScaler* norm_scaler = nullptr;
|
|
27
|
+
|
|
28
|
+
/// Query factors data pointer for RaBitQ (nullptr if not needed)
|
|
29
|
+
/// This pointer should point to the beginning of the relevant
|
|
30
|
+
/// QueryFactorsData subset for this context.
|
|
31
|
+
rabitq_utils::QueryFactorsData* query_factors = nullptr;
|
|
32
|
+
|
|
33
|
+
/// The nprobe value used when allocating query_factors storage.
|
|
34
|
+
/// This is needed because the allocation size (n * nprobe) may use a
|
|
35
|
+
/// different nprobe than index->nprobe if search params override it.
|
|
36
|
+
/// Set to 0 to use index->nprobe as fallback.
|
|
37
|
+
size_t nprobe = 0;
|
|
38
|
+
|
|
39
|
+
/// Default constructor - no processing
|
|
40
|
+
FastScanDistancePostProcessing() = default;
|
|
41
|
+
|
|
42
|
+
/// Check if norm scaling is enabled
|
|
43
|
+
bool has_norm_scaling() const {
|
|
44
|
+
return norm_scaler != nullptr;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/// Check if query factors processing is enabled
|
|
48
|
+
bool has_query_processing() const {
|
|
49
|
+
return query_factors != nullptr;
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
} // namespace faiss
|
|
@@ -60,7 +60,7 @@ HNSW::HNSW(int M) : rng(12345) {
|
|
|
60
60
|
|
|
61
61
|
int HNSW::random_level() {
|
|
62
62
|
double f = rng.rand_float();
|
|
63
|
-
// could be a bit faster with
|
|
63
|
+
// could be a bit faster with bisection
|
|
64
64
|
for (int level = 0; level < assign_probas.size(); level++) {
|
|
65
65
|
if (f < assign_probas[level]) {
|
|
66
66
|
return level;
|
|
@@ -31,7 +31,7 @@ namespace faiss {
|
|
|
31
31
|
* Yu. A. Malkov, D. A. Yashunin, arXiv 2017
|
|
32
32
|
*
|
|
33
33
|
* This implementation is heavily influenced by the NMSlib
|
|
34
|
-
* implementation by Yury Malkov and Leonid
|
|
34
|
+
* implementation by Yury Malkov and Leonid Boytsov
|
|
35
35
|
* (https://github.com/searchivarius/nmslib)
|
|
36
36
|
*
|
|
37
37
|
* The HNSW object stores only the neighbor link structure, see
|
|
@@ -61,7 +61,7 @@ struct HNSW {
|
|
|
61
61
|
|
|
62
62
|
typedef std::pair<float, storage_idx_t> Node;
|
|
63
63
|
|
|
64
|
-
/** Heap structure that allows fast
|
|
64
|
+
/** Heap structure that allows fast access and updates.
|
|
65
65
|
*/
|
|
66
66
|
struct MinimaxHeap {
|
|
67
67
|
int n;
|
|
@@ -87,7 +87,7 @@ struct HNSW {
|
|
|
87
87
|
int count_below(float thresh);
|
|
88
88
|
};
|
|
89
89
|
|
|
90
|
-
/// to sort pairs of (id, distance) from nearest to
|
|
90
|
+
/// to sort pairs of (id, distance) from nearest to farthest or the reverse
|
|
91
91
|
struct NodeDistCloser {
|
|
92
92
|
float d;
|
|
93
93
|
int id;
|
|
@@ -160,7 +160,7 @@ struct HNSW {
|
|
|
160
160
|
/// nb of neighbors for this level
|
|
161
161
|
int nb_neighbors(int layer_no) const;
|
|
162
162
|
|
|
163
|
-
///
|
|
163
|
+
/// cumulative nb up to (and excluding) this level
|
|
164
164
|
int cum_nb_neighbors(int layer_no) const;
|
|
165
165
|
|
|
166
166
|
/// range of entries in the neighbors table of vertex no at layer_no
|
|
@@ -31,7 +31,7 @@ void IDSelectorRange::find_sorted_ids_bounds(
|
|
|
31
31
|
*jmin_out = *jmax_out = 0;
|
|
32
32
|
return;
|
|
33
33
|
}
|
|
34
|
-
//
|
|
34
|
+
// bisection to find imin
|
|
35
35
|
if (ids[0] >= imin) {
|
|
36
36
|
*jmin_out = 0;
|
|
37
37
|
} else {
|
|
@@ -46,7 +46,7 @@ void IDSelectorRange::find_sorted_ids_bounds(
|
|
|
46
46
|
}
|
|
47
47
|
*jmin_out = j1;
|
|
48
48
|
}
|
|
49
|
-
//
|
|
49
|
+
// bisection to find imax
|
|
50
50
|
if (*jmin_out == list_size || ids[*jmin_out] >= imax) {
|
|
51
51
|
*jmax_out = *jmin_out;
|
|
52
52
|
} else {
|