faiss 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -53,37 +53,37 @@ class GpuIndexBinaryFlat : public IndexBinary {
|
|
53
53
|
/// in the index instance
|
54
54
|
void copyTo(faiss::IndexBinaryFlat* index) const;
|
55
55
|
|
56
|
-
void add(faiss::
|
56
|
+
void add(faiss::idx_t n, const uint8_t* x) override;
|
57
57
|
|
58
58
|
void reset() override;
|
59
59
|
|
60
60
|
void search(
|
61
|
-
|
61
|
+
idx_t n,
|
62
62
|
const uint8_t* x,
|
63
|
-
faiss::IndexBinary
|
63
|
+
// faiss::IndexBinary has idx_t for k
|
64
|
+
idx_t k,
|
64
65
|
int32_t* distances,
|
65
|
-
faiss::
|
66
|
+
faiss::idx_t* labels,
|
66
67
|
const faiss::SearchParameters* params = nullptr) const override;
|
67
68
|
|
68
|
-
void reconstruct(faiss::
|
69
|
-
const override;
|
69
|
+
void reconstruct(faiss::idx_t key, uint8_t* recons) const override;
|
70
70
|
|
71
71
|
protected:
|
72
72
|
/// Called from search when the input data is on the CPU;
|
73
73
|
/// potentially allows for pinned memory usage
|
74
74
|
void searchFromCpuPaged_(
|
75
|
-
|
75
|
+
idx_t n,
|
76
76
|
const uint8_t* x,
|
77
77
|
int k,
|
78
78
|
int32_t* outDistancesData,
|
79
|
-
|
79
|
+
idx_t* outIndicesData) const;
|
80
80
|
|
81
81
|
void searchNonPaged_(
|
82
|
-
|
82
|
+
idx_t n,
|
83
83
|
const uint8_t* x,
|
84
84
|
int k,
|
85
85
|
int32_t* outDistancesData,
|
86
|
-
|
86
|
+
idx_t* outIndicesData) const;
|
87
87
|
|
88
88
|
protected:
|
89
89
|
/// Manages streans, cuBLAS handles and scratch memory for devices
|
@@ -82,33 +82,32 @@ class GpuIndexFlat : public GpuIndex {
|
|
82
82
|
void reset() override;
|
83
83
|
|
84
84
|
/// This index is not trained, so this does nothing
|
85
|
-
void train(
|
85
|
+
void train(idx_t n, const float* x) override;
|
86
86
|
|
87
87
|
/// Overrides to avoid excessive copies
|
88
|
-
void add(
|
88
|
+
void add(idx_t, const float* x) override;
|
89
89
|
|
90
90
|
/// Reconstruction methods; prefer the batch reconstruct as it will
|
91
91
|
/// be more efficient
|
92
|
-
void reconstruct(
|
92
|
+
void reconstruct(idx_t key, float* out) const override;
|
93
93
|
|
94
94
|
/// Batch reconstruction method
|
95
|
-
void reconstruct_n(
|
96
|
-
const override;
|
95
|
+
void reconstruct_n(idx_t i0, idx_t num, float* out) const override;
|
97
96
|
|
98
97
|
/// Batch reconstruction method
|
99
|
-
void reconstruct_batch(
|
98
|
+
void reconstruct_batch(idx_t n, const idx_t* keys, float* out)
|
100
99
|
const override;
|
101
100
|
|
102
101
|
/// Compute residual
|
103
|
-
void compute_residual(const float* x, float* residual,
|
102
|
+
void compute_residual(const float* x, float* residual, idx_t key)
|
104
103
|
const override;
|
105
104
|
|
106
105
|
/// Compute residual (batch mode)
|
107
106
|
void compute_residual_n(
|
108
|
-
|
107
|
+
idx_t n,
|
109
108
|
const float* xs,
|
110
109
|
float* residuals,
|
111
|
-
const
|
110
|
+
const idx_t* keys) const override;
|
112
111
|
|
113
112
|
/// For internal access
|
114
113
|
inline FlatIndex* getGpuData() {
|
@@ -121,15 +120,15 @@ class GpuIndexFlat : public GpuIndex {
|
|
121
120
|
bool addImplRequiresIDs_() const override;
|
122
121
|
|
123
122
|
/// Called from GpuIndex for add
|
124
|
-
void addImpl_(
|
123
|
+
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
|
125
124
|
|
126
125
|
/// Called from GpuIndex for search
|
127
126
|
void searchImpl_(
|
128
|
-
|
127
|
+
idx_t n,
|
129
128
|
const float* x,
|
130
129
|
int k,
|
131
130
|
float* distances,
|
132
|
-
|
131
|
+
idx_t* labels,
|
133
132
|
const SearchParameters* params) const override;
|
134
133
|
|
135
134
|
protected:
|
@@ -33,7 +33,7 @@ struct GpuIndexIVFConfig : public GpuIndexConfig {
|
|
33
33
|
/// Base class of all GPU IVF index types. This (for now) deliberately does not
|
34
34
|
/// inherit from IndexIVF, as many of the public data members and functionality
|
35
35
|
/// in IndexIVF is not supported in the same manner on the GPU.
|
36
|
-
class GpuIndexIVF : public GpuIndex {
|
36
|
+
class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
|
37
37
|
public:
|
38
38
|
/// Version that auto-constructs a flat coarse quantizer based on the
|
39
39
|
/// desired metric
|
@@ -42,7 +42,7 @@ class GpuIndexIVF : public GpuIndex {
|
|
42
42
|
int dims,
|
43
43
|
faiss::MetricType metric,
|
44
44
|
float metricArg,
|
45
|
-
|
45
|
+
idx_t nlist,
|
46
46
|
GpuIndexIVFConfig config = GpuIndexIVFConfig());
|
47
47
|
|
48
48
|
/// Version that takes a coarse quantizer instance. The GpuIndexIVF does not
|
@@ -53,7 +53,7 @@ class GpuIndexIVF : public GpuIndex {
|
|
53
53
|
int dims,
|
54
54
|
faiss::MetricType metric,
|
55
55
|
float metricArg,
|
56
|
-
|
56
|
+
idx_t nlist,
|
57
57
|
GpuIndexIVFConfig config = GpuIndexIVFConfig());
|
58
58
|
|
59
59
|
~GpuIndexIVF() override;
|
@@ -75,10 +75,10 @@ class GpuIndexIVF : public GpuIndex {
|
|
75
75
|
virtual void updateQuantizer() = 0;
|
76
76
|
|
77
77
|
/// Returns the number of inverted lists we're managing
|
78
|
-
|
78
|
+
idx_t getNumLists() const;
|
79
79
|
|
80
80
|
/// Returns the number of vectors present in a particular inverted list
|
81
|
-
|
81
|
+
idx_t getListLength(idx_t listId) const;
|
82
82
|
|
83
83
|
/// Return the encoded vector data contained in a particular inverted list,
|
84
84
|
/// for debugging purposes.
|
@@ -86,34 +86,13 @@ class GpuIndexIVF : public GpuIndex {
|
|
86
86
|
/// GPU-side representation.
|
87
87
|
/// Otherwise, it is converted to the CPU format.
|
88
88
|
/// compliant format, while the native GPU format may differ.
|
89
|
-
std::vector<uint8_t> getListVectorData(
|
89
|
+
std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
|
90
90
|
const;
|
91
91
|
|
92
92
|
/// Return the vector indices contained in a particular inverted list, for
|
93
93
|
/// debugging purposes.
|
94
|
-
std::vector<
|
95
|
-
|
96
|
-
/// Sets the number of list probes per query
|
97
|
-
void setNumProbes(int nprobe);
|
98
|
-
|
99
|
-
/// Returns our current number of list probes per query
|
100
|
-
int getNumProbes() const;
|
101
|
-
|
102
|
-
/// Same interface as faiss::IndexIVF, in order to search a set of vectors
|
103
|
-
/// pre-quantized by the IVF quantizer. Does not include IndexIVFStats as
|
104
|
-
/// that can only be obtained on the host via a GPU d2h copy.
|
105
|
-
/// @param n nb of vectors to query
|
106
|
-
/// @param x query vectors, size nx * d
|
107
|
-
/// @param assign coarse quantization indices, size nx * nprobe
|
108
|
-
/// @param centroid_dis
|
109
|
-
/// distances to coarse centroids, size nx * nprobe
|
110
|
-
/// @param distance
|
111
|
-
/// output distances, size n * k
|
112
|
-
/// @param labels output labels, size n * k
|
113
|
-
/// @param store_pairs store inv list index + inv list offset
|
114
|
-
/// instead in upper/lower 32 bit of result,
|
115
|
-
/// instead of ids (used for reranking).
|
116
|
-
/// @param params used to override the object's search parameters
|
94
|
+
std::vector<idx_t> getListIndices(idx_t listId) const;
|
95
|
+
|
117
96
|
void search_preassigned(
|
118
97
|
idx_t n,
|
119
98
|
const float* x,
|
@@ -123,41 +102,41 @@ class GpuIndexIVF : public GpuIndex {
|
|
123
102
|
float* distances,
|
124
103
|
idx_t* labels,
|
125
104
|
bool store_pairs,
|
126
|
-
const SearchParametersIVF* params = nullptr
|
105
|
+
const SearchParametersIVF* params = nullptr,
|
106
|
+
IndexIVFStats* stats = nullptr) const override;
|
107
|
+
|
108
|
+
// not implemented for GPU
|
109
|
+
void range_search_preassigned(
|
110
|
+
idx_t nx,
|
111
|
+
const float* x,
|
112
|
+
float radius,
|
113
|
+
const idx_t* keys,
|
114
|
+
const float* coarse_dis,
|
115
|
+
RangeSearchResult* result,
|
116
|
+
bool store_pairs = false,
|
117
|
+
const IVFSearchParameters* params = nullptr,
|
118
|
+
IndexIVFStats* stats = nullptr) const override;
|
127
119
|
|
128
120
|
protected:
|
121
|
+
/// From either the current set nprobe or the SearchParameters if available,
|
122
|
+
/// return the nprobe that we should use for the current search
|
123
|
+
int getCurrentNProbe_(const SearchParameters* params) const;
|
129
124
|
void verifyIVFSettings_() const;
|
130
125
|
bool addImplRequiresIDs_() const override;
|
131
|
-
void trainQuantizer_(
|
126
|
+
void trainQuantizer_(idx_t n, const float* x);
|
132
127
|
|
133
128
|
/// Called from GpuIndex for add/add_with_ids
|
134
|
-
void addImpl_(
|
129
|
+
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
|
135
130
|
|
136
131
|
/// Called from GpuIndex for search
|
137
132
|
void searchImpl_(
|
138
|
-
|
133
|
+
idx_t n,
|
139
134
|
const float* x,
|
140
135
|
int k,
|
141
136
|
float* distances,
|
142
|
-
|
137
|
+
idx_t* labels,
|
143
138
|
const SearchParameters* params) const override;
|
144
139
|
|
145
|
-
public:
|
146
|
-
/// Exposing this like the CPU version for manipulation
|
147
|
-
ClusteringParameters cp;
|
148
|
-
|
149
|
-
/// Exposing this like the CPU version for query
|
150
|
-
int nlist;
|
151
|
-
|
152
|
-
/// Exposing this like the CPU version for manipulation
|
153
|
-
int nprobe;
|
154
|
-
|
155
|
-
/// A user-pluggable coarse quantizer
|
156
|
-
Index* quantizer;
|
157
|
-
|
158
|
-
/// Whether or not we own the coarse quantizer
|
159
|
-
bool own_fields;
|
160
|
-
|
161
140
|
protected:
|
162
141
|
/// Our configuration options
|
163
142
|
const GpuIndexIVFConfig ivfConfig_;
|
@@ -44,7 +44,7 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
44
44
|
GpuIndexIVFFlat(
|
45
45
|
GpuResourcesProvider* provider,
|
46
46
|
int dims,
|
47
|
-
|
47
|
+
idx_t nlist,
|
48
48
|
faiss::MetricType metric = faiss::METRIC_L2,
|
49
49
|
GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
|
50
50
|
|
@@ -54,7 +54,7 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
54
54
|
GpuResourcesProvider* provider,
|
55
55
|
Index* coarseQuantizer,
|
56
56
|
int dims,
|
57
|
-
|
57
|
+
idx_t nlist,
|
58
58
|
faiss::MetricType metric = faiss::METRIC_L2,
|
59
59
|
GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
|
60
60
|
|
@@ -85,7 +85,7 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
85
85
|
void updateQuantizer() override;
|
86
86
|
|
87
87
|
/// Trains the coarse quantizer based on the given vector data
|
88
|
-
void train(
|
88
|
+
void train(idx_t n, const float* x) override;
|
89
89
|
|
90
90
|
protected:
|
91
91
|
/// Our configuration options
|
@@ -68,9 +68,9 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
68
68
|
GpuIndexIVFPQ(
|
69
69
|
GpuResourcesProvider* provider,
|
70
70
|
int dims,
|
71
|
-
|
72
|
-
|
73
|
-
|
71
|
+
idx_t nlist,
|
72
|
+
idx_t subQuantizers,
|
73
|
+
idx_t bitsPerCode,
|
74
74
|
faiss::MetricType metric = faiss::METRIC_L2,
|
75
75
|
GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
|
76
76
|
|
@@ -80,9 +80,9 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
80
80
|
GpuResourcesProvider* provider,
|
81
81
|
Index* coarseQuantizer,
|
82
82
|
int dims,
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
idx_t nlist,
|
84
|
+
idx_t subQuantizers,
|
85
|
+
idx_t bitsPerCode,
|
86
86
|
faiss::MetricType metric = faiss::METRIC_L2,
|
87
87
|
GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
|
88
88
|
|
@@ -131,7 +131,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
131
131
|
void updateQuantizer() override;
|
132
132
|
|
133
133
|
/// Trains the coarse and product quantizer based on the given vector data
|
134
|
-
void train(
|
134
|
+
void train(idx_t n, const float* x) override;
|
135
135
|
|
136
136
|
public:
|
137
137
|
/// Like the CPU version, we expose a publically-visible ProductQuantizer
|
@@ -143,7 +143,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
143
143
|
void verifyPQSettings_() const;
|
144
144
|
|
145
145
|
/// Trains the PQ quantizer based on the given vector data
|
146
|
-
void trainResidualQuantizer_(
|
146
|
+
void trainResidualQuantizer_(idx_t n, const float* x);
|
147
147
|
|
148
148
|
protected:
|
149
149
|
/// Our configuration options that we were initialized with
|
@@ -42,7 +42,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
42
42
|
GpuIndexIVFScalarQuantizer(
|
43
43
|
GpuResourcesProvider* provider,
|
44
44
|
int dims,
|
45
|
-
|
45
|
+
idx_t nlist,
|
46
46
|
faiss::ScalarQuantizer::QuantizerType qtype,
|
47
47
|
faiss::MetricType metric = MetricType::METRIC_L2,
|
48
48
|
bool encodeResidual = true,
|
@@ -55,7 +55,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
55
55
|
GpuResourcesProvider* provider,
|
56
56
|
Index* coarseQuantizer,
|
57
57
|
int dims,
|
58
|
-
|
58
|
+
idx_t nlist,
|
59
59
|
faiss::ScalarQuantizer::QuantizerType qtype,
|
60
60
|
faiss::MetricType metric = MetricType::METRIC_L2,
|
61
61
|
bool encodeResidual = true,
|
@@ -89,14 +89,14 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
89
89
|
void updateQuantizer() override;
|
90
90
|
|
91
91
|
/// Trains the coarse and scalar quantizer based on the given vector data
|
92
|
-
void train(
|
92
|
+
void train(idx_t n, const float* x) override;
|
93
93
|
|
94
94
|
protected:
|
95
95
|
/// Validates index SQ parameters
|
96
96
|
void verifySQSettings_() const;
|
97
97
|
|
98
98
|
/// Called from train to handle SQ residual training
|
99
|
-
void trainResiduals_(
|
99
|
+
void trainResiduals_(idx_t n, const float* x);
|
100
100
|
|
101
101
|
public:
|
102
102
|
/// Exposed like the CPU version
|
@@ -20,13 +20,10 @@ namespace gpu {
|
|
20
20
|
int getMaxKSelection();
|
21
21
|
|
22
22
|
// Validate the k parameter for search
|
23
|
-
void validateKSelect(
|
23
|
+
void validateKSelect(int k);
|
24
24
|
|
25
25
|
// Validate the nprobe parameter for search
|
26
|
-
void validateNProbe(
|
27
|
-
|
28
|
-
/// Validate the n (number of vectors) parameter for add, search, reconstruct
|
29
|
-
void validateNumVectors(Index::idx_t n);
|
26
|
+
void validateNProbe(size_t nprobe);
|
30
27
|
|
31
28
|
} // namespace gpu
|
32
29
|
} // namespace faiss
|
@@ -14,21 +14,23 @@ namespace gpu {
|
|
14
14
|
// Utility function to translate (list id, offset) to a user index on
|
15
15
|
// the CPU. In a cpp in order to use OpenMP
|
16
16
|
void ivfOffsetToUserIndex(
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
idx_t* indices,
|
18
|
+
idx_t numLists,
|
19
|
+
idx_t queries,
|
20
20
|
int k,
|
21
|
-
const std::vector<std::vector<
|
21
|
+
const std::vector<std::vector<idx_t>>& listOffsetToUserIndex) {
|
22
22
|
FAISS_ASSERT(numLists == listOffsetToUserIndex.size());
|
23
23
|
|
24
24
|
#pragma omp parallel for
|
25
|
-
for (
|
26
|
-
for (
|
25
|
+
for (idx_t q = 0; q < queries; ++q) {
|
26
|
+
for (idx_t r = 0; r < k; ++r) {
|
27
27
|
auto offsetIndex = indices[q * k + r];
|
28
28
|
|
29
|
-
if (offsetIndex < 0)
|
29
|
+
if (offsetIndex < 0) {
|
30
30
|
continue;
|
31
|
+
}
|
31
32
|
|
33
|
+
// FIXME: implicit limit on list and list offset length
|
32
34
|
int listId = (int)(offsetIndex >> 32);
|
33
35
|
int listOffset = (int)(offsetIndex & 0xffffffff);
|
34
36
|
|
@@ -16,11 +16,11 @@ namespace gpu {
|
|
16
16
|
/// Utility function to translate (list id, offset) to a user index on
|
17
17
|
/// the CPU. In a cpp in order to use OpenMP.
|
18
18
|
void ivfOffsetToUserIndex(
|
19
|
-
|
20
|
-
|
21
|
-
|
19
|
+
idx_t* indices,
|
20
|
+
idx_t numLists,
|
21
|
+
idx_t queries,
|
22
22
|
int k,
|
23
|
-
const std::vector<std::vector<
|
23
|
+
const std::vector<std::vector<idx_t>>& listOffsetToUserIndex);
|
24
24
|
|
25
25
|
} // namespace gpu
|
26
26
|
} // namespace faiss
|
@@ -58,8 +58,8 @@ void IndexWrapper<GpuIndex>::runOnIndices(std::function<void(GpuIndex*)> f) {
|
|
58
58
|
}
|
59
59
|
|
60
60
|
template <typename GpuIndex>
|
61
|
-
void IndexWrapper<GpuIndex>::setNumProbes(
|
62
|
-
runOnIndices([nprobe](GpuIndex* index) { index->
|
61
|
+
void IndexWrapper<GpuIndex>::setNumProbes(size_t nprobe) {
|
62
|
+
runOnIndices([nprobe](GpuIndex* index) { index->nprobe = nprobe; });
|
63
63
|
}
|
64
64
|
|
65
65
|
} // namespace gpu
|
@@ -18,9 +18,9 @@
|
|
18
18
|
|
19
19
|
void compareBinaryDist(
|
20
20
|
const std::vector<int>& cpuDist,
|
21
|
-
const std::vector<faiss::
|
21
|
+
const std::vector<faiss::idx_t>& cpuLabels,
|
22
22
|
const std::vector<int>& gpuDist,
|
23
|
-
const std::vector<faiss::
|
23
|
+
const std::vector<faiss::idx_t>& gpuLabels,
|
24
24
|
int numQuery,
|
25
25
|
int k) {
|
26
26
|
for (int i = 0; i < numQuery; ++i) {
|
@@ -29,8 +29,8 @@ void compareBinaryDist(
|
|
29
29
|
// encounters the values. The last set of equivalent distances seen in
|
30
30
|
// the min-k might be truncated, so we can't check that set, but all
|
31
31
|
// others we can check.
|
32
|
-
std::set<faiss::
|
33
|
-
std::set<faiss::
|
32
|
+
std::set<faiss::idx_t> cpuLabelSet;
|
33
|
+
std::set<faiss::idx_t> gpuLabelSet;
|
34
34
|
|
35
35
|
int curDist = -1;
|
36
36
|
|
@@ -89,13 +89,13 @@ void testGpuIndexBinaryFlat(int kOverride = -1) {
|
|
89
89
|
auto query = faiss::gpu::randBinaryVecs(numQuery, dims);
|
90
90
|
|
91
91
|
std::vector<int> cpuDist(numQuery * k);
|
92
|
-
std::vector<faiss::
|
92
|
+
std::vector<faiss::idx_t> cpuLabels(numQuery * k);
|
93
93
|
|
94
94
|
cpuIndex.search(
|
95
95
|
numQuery, query.data(), k, cpuDist.data(), cpuLabels.data());
|
96
96
|
|
97
97
|
std::vector<int> gpuDist(numQuery * k);
|
98
|
-
std::vector<faiss::
|
98
|
+
std::vector<faiss::idx_t> gpuLabels(numQuery * k);
|
99
99
|
|
100
100
|
gpuIndex.search(
|
101
101
|
numQuery, query.data(), k, gpuDist.data(), gpuLabels.data());
|
@@ -115,6 +115,55 @@ TEST(TestGpuIndexBinaryFlat, Test32) {
|
|
115
115
|
}
|
116
116
|
}
|
117
117
|
|
118
|
+
TEST(TestGpuIndexBinaryFlat, LargeIndex) {
|
119
|
+
// Construct on a random device to test multi-device, if we have
|
120
|
+
// multiple devices
|
121
|
+
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
122
|
+
|
123
|
+
faiss::gpu::StandardGpuResources res;
|
124
|
+
res.noTempMemory();
|
125
|
+
|
126
|
+
// Skip this device if we do not have sufficient memory
|
127
|
+
constexpr size_t kMem = size_t(8) * 1024 * 1024 * 1024;
|
128
|
+
|
129
|
+
if (faiss::gpu::getFreeMemory(device) < kMem) {
|
130
|
+
std::cerr << "TestGpuIndexFlat.LargeIndex: skipping due "
|
131
|
+
"to insufficient device memory\n";
|
132
|
+
return;
|
133
|
+
}
|
134
|
+
|
135
|
+
std::cerr << "Running LargeIndex test\n";
|
136
|
+
|
137
|
+
faiss::gpu::GpuIndexBinaryFlatConfig config;
|
138
|
+
config.device = device;
|
139
|
+
|
140
|
+
int dims = 1250 * 8;
|
141
|
+
faiss::gpu::GpuIndexBinaryFlat gpuIndex(&res, dims, config);
|
142
|
+
|
143
|
+
faiss::IndexBinaryFlat cpuIndex(dims);
|
144
|
+
|
145
|
+
int k = 10;
|
146
|
+
int nb = 4000000;
|
147
|
+
int nq = 10;
|
148
|
+
|
149
|
+
auto xb = faiss::gpu::randBinaryVecs(nb, dims);
|
150
|
+
auto xq = faiss::gpu::randBinaryVecs(nq, dims);
|
151
|
+
gpuIndex.add(nb, xb.data());
|
152
|
+
cpuIndex.add(nb, xb.data());
|
153
|
+
|
154
|
+
std::vector<int> cpuDist(nq * k);
|
155
|
+
std::vector<faiss::idx_t> cpuLabels(nq * k);
|
156
|
+
|
157
|
+
cpuIndex.search(nq, xq.data(), k, cpuDist.data(), cpuLabels.data());
|
158
|
+
|
159
|
+
std::vector<int> gpuDist(nq * k);
|
160
|
+
std::vector<faiss::idx_t> gpuLabels(nq * k);
|
161
|
+
|
162
|
+
gpuIndex.search(nq, xq.data(), k, gpuDist.data(), gpuLabels.data());
|
163
|
+
|
164
|
+
compareBinaryDist(cpuDist, cpuLabels, gpuDist, gpuLabels, nq, k);
|
165
|
+
}
|
166
|
+
|
118
167
|
int main(int argc, char** argv) {
|
119
168
|
testing::InitGoogleTest(&argc, argv);
|
120
169
|
|
@@ -141,6 +141,20 @@ TEST(TestGpuIndexFlat, L2_Float32) {
|
|
141
141
|
}
|
142
142
|
}
|
143
143
|
|
144
|
+
// At least one test for the k > 1024 select
|
145
|
+
TEST(TestGpuIndexFlat, L2_k_2048) {
|
146
|
+
if (faiss::gpu::getMaxKSelection() >= 2048) {
|
147
|
+
TestFlatOptions opt;
|
148
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
149
|
+
opt.useFloat16 = false;
|
150
|
+
opt.kOverride = 2048;
|
151
|
+
opt.dimOverride = 128;
|
152
|
+
opt.numVecsOverride = 10000;
|
153
|
+
|
154
|
+
testFlat(opt);
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
144
158
|
// test specialized k == 1 codepath
|
145
159
|
TEST(TestGpuIndexFlat, L2_Float32_K1) {
|
146
160
|
for (int tries = 0; tries < 3; ++tries) {
|
@@ -220,7 +234,7 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
|
|
220
234
|
std::vector<float> queries(numQuery * dim, 1.0f);
|
221
235
|
|
222
236
|
std::vector<float> dist(numQuery * k, 0);
|
223
|
-
std::vector<faiss::
|
237
|
+
std::vector<faiss::idx_t> ind(numQuery * k);
|
224
238
|
|
225
239
|
gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
|
226
240
|
|
@@ -437,7 +451,7 @@ TEST(TestGpuIndexFlat, Residual) {
|
|
437
451
|
cpuIndex.add(numVecs, vecs.data());
|
438
452
|
gpuIndex.add(numVecs, vecs.data());
|
439
453
|
|
440
|
-
auto indexVecs = std::vector<faiss::
|
454
|
+
auto indexVecs = std::vector<faiss::idx_t>{0, 2, 4, 6, 8};
|
441
455
|
auto queryVecs = faiss::gpu::randVecs(indexVecs.size(), dim);
|
442
456
|
|
443
457
|
auto residualsCpu = std::vector<float>(indexVecs.size() * dim);
|
@@ -517,7 +531,7 @@ TEST(TestGpuIndexFlat, Reconstruct) {
|
|
517
531
|
|
518
532
|
// Test reconstruct_batch
|
519
533
|
if (false) {
|
520
|
-
auto reconstructKeys = std::vector<faiss::
|
534
|
+
auto reconstructKeys = std::vector<faiss::idx_t>{1, 3, 5};
|
521
535
|
auto reconstructVecs =
|
522
536
|
std::vector<float>(reconstructKeys.size() * dim);
|
523
537
|
|
@@ -565,7 +579,7 @@ TEST(TestGpuIndexFlat, SearchAndReconstruct) {
|
|
565
579
|
gpuIndex.add(nb, xb.data());
|
566
580
|
|
567
581
|
std::vector<float> refDistance(nq * k, 0);
|
568
|
-
std::vector<faiss::
|
582
|
+
std::vector<faiss::idx_t> refIndices(nq * k, -1);
|
569
583
|
std::vector<float> refReconstruct(nq * k * dim, 0);
|
570
584
|
cpuIndex.search_and_reconstruct(
|
571
585
|
nq,
|
@@ -576,7 +590,7 @@ TEST(TestGpuIndexFlat, SearchAndReconstruct) {
|
|
576
590
|
refReconstruct.data());
|
577
591
|
|
578
592
|
std::vector<float> testDistance(nq * k, 0);
|
579
|
-
std::vector<faiss::
|
593
|
+
std::vector<faiss::idx_t> testIndices(nq * k, -1);
|
580
594
|
std::vector<float> testReconstruct(nq * k * dim, 0);
|
581
595
|
gpuIndex.search_and_reconstruct(
|
582
596
|
nq,
|
@@ -606,7 +620,7 @@ TEST(TestGpuIndexFlat, SearchAndReconstruct) {
|
|
606
620
|
// above will ensure a decent number of matches), reconstruction should be
|
607
621
|
// the same for the vectors that do match
|
608
622
|
for (int i = 0; i < nq; ++i) {
|
609
|
-
std::unordered_map<faiss::
|
623
|
+
std::unordered_map<faiss::idx_t, int> refLocation;
|
610
624
|
|
611
625
|
for (int j = 0; j < k; ++j) {
|
612
626
|
refLocation.insert(std::make_pair(refIndices[i * k + j], j));
|