faiss 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
|
@@ -53,37 +53,37 @@ class GpuIndexBinaryFlat : public IndexBinary {
|
|
|
53
53
|
/// in the index instance
|
|
54
54
|
void copyTo(faiss::IndexBinaryFlat* index) const;
|
|
55
55
|
|
|
56
|
-
void add(faiss::
|
|
56
|
+
void add(faiss::idx_t n, const uint8_t* x) override;
|
|
57
57
|
|
|
58
58
|
void reset() override;
|
|
59
59
|
|
|
60
60
|
void search(
|
|
61
|
-
|
|
61
|
+
idx_t n,
|
|
62
62
|
const uint8_t* x,
|
|
63
|
-
faiss::IndexBinary
|
|
63
|
+
// faiss::IndexBinary has idx_t for k
|
|
64
|
+
idx_t k,
|
|
64
65
|
int32_t* distances,
|
|
65
|
-
faiss::
|
|
66
|
+
faiss::idx_t* labels,
|
|
66
67
|
const faiss::SearchParameters* params = nullptr) const override;
|
|
67
68
|
|
|
68
|
-
void reconstruct(faiss::
|
|
69
|
-
const override;
|
|
69
|
+
void reconstruct(faiss::idx_t key, uint8_t* recons) const override;
|
|
70
70
|
|
|
71
71
|
protected:
|
|
72
72
|
/// Called from search when the input data is on the CPU;
|
|
73
73
|
/// potentially allows for pinned memory usage
|
|
74
74
|
void searchFromCpuPaged_(
|
|
75
|
-
|
|
75
|
+
idx_t n,
|
|
76
76
|
const uint8_t* x,
|
|
77
77
|
int k,
|
|
78
78
|
int32_t* outDistancesData,
|
|
79
|
-
|
|
79
|
+
idx_t* outIndicesData) const;
|
|
80
80
|
|
|
81
81
|
void searchNonPaged_(
|
|
82
|
-
|
|
82
|
+
idx_t n,
|
|
83
83
|
const uint8_t* x,
|
|
84
84
|
int k,
|
|
85
85
|
int32_t* outDistancesData,
|
|
86
|
-
|
|
86
|
+
idx_t* outIndicesData) const;
|
|
87
87
|
|
|
88
88
|
protected:
|
|
89
89
|
/// Manages streans, cuBLAS handles and scratch memory for devices
|
|
@@ -82,33 +82,32 @@ class GpuIndexFlat : public GpuIndex {
|
|
|
82
82
|
void reset() override;
|
|
83
83
|
|
|
84
84
|
/// This index is not trained, so this does nothing
|
|
85
|
-
void train(
|
|
85
|
+
void train(idx_t n, const float* x) override;
|
|
86
86
|
|
|
87
87
|
/// Overrides to avoid excessive copies
|
|
88
|
-
void add(
|
|
88
|
+
void add(idx_t, const float* x) override;
|
|
89
89
|
|
|
90
90
|
/// Reconstruction methods; prefer the batch reconstruct as it will
|
|
91
91
|
/// be more efficient
|
|
92
|
-
void reconstruct(
|
|
92
|
+
void reconstruct(idx_t key, float* out) const override;
|
|
93
93
|
|
|
94
94
|
/// Batch reconstruction method
|
|
95
|
-
void reconstruct_n(
|
|
96
|
-
const override;
|
|
95
|
+
void reconstruct_n(idx_t i0, idx_t num, float* out) const override;
|
|
97
96
|
|
|
98
97
|
/// Batch reconstruction method
|
|
99
|
-
void reconstruct_batch(
|
|
98
|
+
void reconstruct_batch(idx_t n, const idx_t* keys, float* out)
|
|
100
99
|
const override;
|
|
101
100
|
|
|
102
101
|
/// Compute residual
|
|
103
|
-
void compute_residual(const float* x, float* residual,
|
|
102
|
+
void compute_residual(const float* x, float* residual, idx_t key)
|
|
104
103
|
const override;
|
|
105
104
|
|
|
106
105
|
/// Compute residual (batch mode)
|
|
107
106
|
void compute_residual_n(
|
|
108
|
-
|
|
107
|
+
idx_t n,
|
|
109
108
|
const float* xs,
|
|
110
109
|
float* residuals,
|
|
111
|
-
const
|
|
110
|
+
const idx_t* keys) const override;
|
|
112
111
|
|
|
113
112
|
/// For internal access
|
|
114
113
|
inline FlatIndex* getGpuData() {
|
|
@@ -121,15 +120,15 @@ class GpuIndexFlat : public GpuIndex {
|
|
|
121
120
|
bool addImplRequiresIDs_() const override;
|
|
122
121
|
|
|
123
122
|
/// Called from GpuIndex for add
|
|
124
|
-
void addImpl_(
|
|
123
|
+
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
|
|
125
124
|
|
|
126
125
|
/// Called from GpuIndex for search
|
|
127
126
|
void searchImpl_(
|
|
128
|
-
|
|
127
|
+
idx_t n,
|
|
129
128
|
const float* x,
|
|
130
129
|
int k,
|
|
131
130
|
float* distances,
|
|
132
|
-
|
|
131
|
+
idx_t* labels,
|
|
133
132
|
const SearchParameters* params) const override;
|
|
134
133
|
|
|
135
134
|
protected:
|
|
@@ -33,7 +33,7 @@ struct GpuIndexIVFConfig : public GpuIndexConfig {
|
|
|
33
33
|
/// Base class of all GPU IVF index types. This (for now) deliberately does not
|
|
34
34
|
/// inherit from IndexIVF, as many of the public data members and functionality
|
|
35
35
|
/// in IndexIVF is not supported in the same manner on the GPU.
|
|
36
|
-
class GpuIndexIVF : public GpuIndex {
|
|
36
|
+
class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
|
|
37
37
|
public:
|
|
38
38
|
/// Version that auto-constructs a flat coarse quantizer based on the
|
|
39
39
|
/// desired metric
|
|
@@ -42,7 +42,7 @@ class GpuIndexIVF : public GpuIndex {
|
|
|
42
42
|
int dims,
|
|
43
43
|
faiss::MetricType metric,
|
|
44
44
|
float metricArg,
|
|
45
|
-
|
|
45
|
+
idx_t nlist,
|
|
46
46
|
GpuIndexIVFConfig config = GpuIndexIVFConfig());
|
|
47
47
|
|
|
48
48
|
/// Version that takes a coarse quantizer instance. The GpuIndexIVF does not
|
|
@@ -53,7 +53,7 @@ class GpuIndexIVF : public GpuIndex {
|
|
|
53
53
|
int dims,
|
|
54
54
|
faiss::MetricType metric,
|
|
55
55
|
float metricArg,
|
|
56
|
-
|
|
56
|
+
idx_t nlist,
|
|
57
57
|
GpuIndexIVFConfig config = GpuIndexIVFConfig());
|
|
58
58
|
|
|
59
59
|
~GpuIndexIVF() override;
|
|
@@ -75,10 +75,10 @@ class GpuIndexIVF : public GpuIndex {
|
|
|
75
75
|
virtual void updateQuantizer() = 0;
|
|
76
76
|
|
|
77
77
|
/// Returns the number of inverted lists we're managing
|
|
78
|
-
|
|
78
|
+
idx_t getNumLists() const;
|
|
79
79
|
|
|
80
80
|
/// Returns the number of vectors present in a particular inverted list
|
|
81
|
-
|
|
81
|
+
idx_t getListLength(idx_t listId) const;
|
|
82
82
|
|
|
83
83
|
/// Return the encoded vector data contained in a particular inverted list,
|
|
84
84
|
/// for debugging purposes.
|
|
@@ -86,34 +86,13 @@ class GpuIndexIVF : public GpuIndex {
|
|
|
86
86
|
/// GPU-side representation.
|
|
87
87
|
/// Otherwise, it is converted to the CPU format.
|
|
88
88
|
/// compliant format, while the native GPU format may differ.
|
|
89
|
-
std::vector<uint8_t> getListVectorData(
|
|
89
|
+
std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
|
|
90
90
|
const;
|
|
91
91
|
|
|
92
92
|
/// Return the vector indices contained in a particular inverted list, for
|
|
93
93
|
/// debugging purposes.
|
|
94
|
-
std::vector<
|
|
95
|
-
|
|
96
|
-
/// Sets the number of list probes per query
|
|
97
|
-
void setNumProbes(int nprobe);
|
|
98
|
-
|
|
99
|
-
/// Returns our current number of list probes per query
|
|
100
|
-
int getNumProbes() const;
|
|
101
|
-
|
|
102
|
-
/// Same interface as faiss::IndexIVF, in order to search a set of vectors
|
|
103
|
-
/// pre-quantized by the IVF quantizer. Does not include IndexIVFStats as
|
|
104
|
-
/// that can only be obtained on the host via a GPU d2h copy.
|
|
105
|
-
/// @param n nb of vectors to query
|
|
106
|
-
/// @param x query vectors, size nx * d
|
|
107
|
-
/// @param assign coarse quantization indices, size nx * nprobe
|
|
108
|
-
/// @param centroid_dis
|
|
109
|
-
/// distances to coarse centroids, size nx * nprobe
|
|
110
|
-
/// @param distance
|
|
111
|
-
/// output distances, size n * k
|
|
112
|
-
/// @param labels output labels, size n * k
|
|
113
|
-
/// @param store_pairs store inv list index + inv list offset
|
|
114
|
-
/// instead in upper/lower 32 bit of result,
|
|
115
|
-
/// instead of ids (used for reranking).
|
|
116
|
-
/// @param params used to override the object's search parameters
|
|
94
|
+
std::vector<idx_t> getListIndices(idx_t listId) const;
|
|
95
|
+
|
|
117
96
|
void search_preassigned(
|
|
118
97
|
idx_t n,
|
|
119
98
|
const float* x,
|
|
@@ -123,41 +102,41 @@ class GpuIndexIVF : public GpuIndex {
|
|
|
123
102
|
float* distances,
|
|
124
103
|
idx_t* labels,
|
|
125
104
|
bool store_pairs,
|
|
126
|
-
const SearchParametersIVF* params = nullptr
|
|
105
|
+
const SearchParametersIVF* params = nullptr,
|
|
106
|
+
IndexIVFStats* stats = nullptr) const override;
|
|
107
|
+
|
|
108
|
+
// not implemented for GPU
|
|
109
|
+
void range_search_preassigned(
|
|
110
|
+
idx_t nx,
|
|
111
|
+
const float* x,
|
|
112
|
+
float radius,
|
|
113
|
+
const idx_t* keys,
|
|
114
|
+
const float* coarse_dis,
|
|
115
|
+
RangeSearchResult* result,
|
|
116
|
+
bool store_pairs = false,
|
|
117
|
+
const IVFSearchParameters* params = nullptr,
|
|
118
|
+
IndexIVFStats* stats = nullptr) const override;
|
|
127
119
|
|
|
128
120
|
protected:
|
|
121
|
+
/// From either the current set nprobe or the SearchParameters if available,
|
|
122
|
+
/// return the nprobe that we should use for the current search
|
|
123
|
+
int getCurrentNProbe_(const SearchParameters* params) const;
|
|
129
124
|
void verifyIVFSettings_() const;
|
|
130
125
|
bool addImplRequiresIDs_() const override;
|
|
131
|
-
void trainQuantizer_(
|
|
126
|
+
void trainQuantizer_(idx_t n, const float* x);
|
|
132
127
|
|
|
133
128
|
/// Called from GpuIndex for add/add_with_ids
|
|
134
|
-
void addImpl_(
|
|
129
|
+
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
|
|
135
130
|
|
|
136
131
|
/// Called from GpuIndex for search
|
|
137
132
|
void searchImpl_(
|
|
138
|
-
|
|
133
|
+
idx_t n,
|
|
139
134
|
const float* x,
|
|
140
135
|
int k,
|
|
141
136
|
float* distances,
|
|
142
|
-
|
|
137
|
+
idx_t* labels,
|
|
143
138
|
const SearchParameters* params) const override;
|
|
144
139
|
|
|
145
|
-
public:
|
|
146
|
-
/// Exposing this like the CPU version for manipulation
|
|
147
|
-
ClusteringParameters cp;
|
|
148
|
-
|
|
149
|
-
/// Exposing this like the CPU version for query
|
|
150
|
-
int nlist;
|
|
151
|
-
|
|
152
|
-
/// Exposing this like the CPU version for manipulation
|
|
153
|
-
int nprobe;
|
|
154
|
-
|
|
155
|
-
/// A user-pluggable coarse quantizer
|
|
156
|
-
Index* quantizer;
|
|
157
|
-
|
|
158
|
-
/// Whether or not we own the coarse quantizer
|
|
159
|
-
bool own_fields;
|
|
160
|
-
|
|
161
140
|
protected:
|
|
162
141
|
/// Our configuration options
|
|
163
142
|
const GpuIndexIVFConfig ivfConfig_;
|
|
@@ -44,7 +44,7 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
|
44
44
|
GpuIndexIVFFlat(
|
|
45
45
|
GpuResourcesProvider* provider,
|
|
46
46
|
int dims,
|
|
47
|
-
|
|
47
|
+
idx_t nlist,
|
|
48
48
|
faiss::MetricType metric = faiss::METRIC_L2,
|
|
49
49
|
GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
|
|
50
50
|
|
|
@@ -54,7 +54,7 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
|
54
54
|
GpuResourcesProvider* provider,
|
|
55
55
|
Index* coarseQuantizer,
|
|
56
56
|
int dims,
|
|
57
|
-
|
|
57
|
+
idx_t nlist,
|
|
58
58
|
faiss::MetricType metric = faiss::METRIC_L2,
|
|
59
59
|
GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
|
|
60
60
|
|
|
@@ -85,7 +85,7 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
|
85
85
|
void updateQuantizer() override;
|
|
86
86
|
|
|
87
87
|
/// Trains the coarse quantizer based on the given vector data
|
|
88
|
-
void train(
|
|
88
|
+
void train(idx_t n, const float* x) override;
|
|
89
89
|
|
|
90
90
|
protected:
|
|
91
91
|
/// Our configuration options
|
|
@@ -68,9 +68,9 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
|
68
68
|
GpuIndexIVFPQ(
|
|
69
69
|
GpuResourcesProvider* provider,
|
|
70
70
|
int dims,
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
71
|
+
idx_t nlist,
|
|
72
|
+
idx_t subQuantizers,
|
|
73
|
+
idx_t bitsPerCode,
|
|
74
74
|
faiss::MetricType metric = faiss::METRIC_L2,
|
|
75
75
|
GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
|
|
76
76
|
|
|
@@ -80,9 +80,9 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
|
80
80
|
GpuResourcesProvider* provider,
|
|
81
81
|
Index* coarseQuantizer,
|
|
82
82
|
int dims,
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
idx_t nlist,
|
|
84
|
+
idx_t subQuantizers,
|
|
85
|
+
idx_t bitsPerCode,
|
|
86
86
|
faiss::MetricType metric = faiss::METRIC_L2,
|
|
87
87
|
GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
|
|
88
88
|
|
|
@@ -131,7 +131,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
|
131
131
|
void updateQuantizer() override;
|
|
132
132
|
|
|
133
133
|
/// Trains the coarse and product quantizer based on the given vector data
|
|
134
|
-
void train(
|
|
134
|
+
void train(idx_t n, const float* x) override;
|
|
135
135
|
|
|
136
136
|
public:
|
|
137
137
|
/// Like the CPU version, we expose a publically-visible ProductQuantizer
|
|
@@ -143,7 +143,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
|
143
143
|
void verifyPQSettings_() const;
|
|
144
144
|
|
|
145
145
|
/// Trains the PQ quantizer based on the given vector data
|
|
146
|
-
void trainResidualQuantizer_(
|
|
146
|
+
void trainResidualQuantizer_(idx_t n, const float* x);
|
|
147
147
|
|
|
148
148
|
protected:
|
|
149
149
|
/// Our configuration options that we were initialized with
|
|
@@ -42,7 +42,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
|
42
42
|
GpuIndexIVFScalarQuantizer(
|
|
43
43
|
GpuResourcesProvider* provider,
|
|
44
44
|
int dims,
|
|
45
|
-
|
|
45
|
+
idx_t nlist,
|
|
46
46
|
faiss::ScalarQuantizer::QuantizerType qtype,
|
|
47
47
|
faiss::MetricType metric = MetricType::METRIC_L2,
|
|
48
48
|
bool encodeResidual = true,
|
|
@@ -55,7 +55,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
|
55
55
|
GpuResourcesProvider* provider,
|
|
56
56
|
Index* coarseQuantizer,
|
|
57
57
|
int dims,
|
|
58
|
-
|
|
58
|
+
idx_t nlist,
|
|
59
59
|
faiss::ScalarQuantizer::QuantizerType qtype,
|
|
60
60
|
faiss::MetricType metric = MetricType::METRIC_L2,
|
|
61
61
|
bool encodeResidual = true,
|
|
@@ -89,14 +89,14 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
|
89
89
|
void updateQuantizer() override;
|
|
90
90
|
|
|
91
91
|
/// Trains the coarse and scalar quantizer based on the given vector data
|
|
92
|
-
void train(
|
|
92
|
+
void train(idx_t n, const float* x) override;
|
|
93
93
|
|
|
94
94
|
protected:
|
|
95
95
|
/// Validates index SQ parameters
|
|
96
96
|
void verifySQSettings_() const;
|
|
97
97
|
|
|
98
98
|
/// Called from train to handle SQ residual training
|
|
99
|
-
void trainResiduals_(
|
|
99
|
+
void trainResiduals_(idx_t n, const float* x);
|
|
100
100
|
|
|
101
101
|
public:
|
|
102
102
|
/// Exposed like the CPU version
|
|
@@ -20,13 +20,10 @@ namespace gpu {
|
|
|
20
20
|
int getMaxKSelection();
|
|
21
21
|
|
|
22
22
|
// Validate the k parameter for search
|
|
23
|
-
void validateKSelect(
|
|
23
|
+
void validateKSelect(int k);
|
|
24
24
|
|
|
25
25
|
// Validate the nprobe parameter for search
|
|
26
|
-
void validateNProbe(
|
|
27
|
-
|
|
28
|
-
/// Validate the n (number of vectors) parameter for add, search, reconstruct
|
|
29
|
-
void validateNumVectors(Index::idx_t n);
|
|
26
|
+
void validateNProbe(size_t nprobe);
|
|
30
27
|
|
|
31
28
|
} // namespace gpu
|
|
32
29
|
} // namespace faiss
|
|
@@ -14,21 +14,23 @@ namespace gpu {
|
|
|
14
14
|
// Utility function to translate (list id, offset) to a user index on
|
|
15
15
|
// the CPU. In a cpp in order to use OpenMP
|
|
16
16
|
void ivfOffsetToUserIndex(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
idx_t* indices,
|
|
18
|
+
idx_t numLists,
|
|
19
|
+
idx_t queries,
|
|
20
20
|
int k,
|
|
21
|
-
const std::vector<std::vector<
|
|
21
|
+
const std::vector<std::vector<idx_t>>& listOffsetToUserIndex) {
|
|
22
22
|
FAISS_ASSERT(numLists == listOffsetToUserIndex.size());
|
|
23
23
|
|
|
24
24
|
#pragma omp parallel for
|
|
25
|
-
for (
|
|
26
|
-
for (
|
|
25
|
+
for (idx_t q = 0; q < queries; ++q) {
|
|
26
|
+
for (idx_t r = 0; r < k; ++r) {
|
|
27
27
|
auto offsetIndex = indices[q * k + r];
|
|
28
28
|
|
|
29
|
-
if (offsetIndex < 0)
|
|
29
|
+
if (offsetIndex < 0) {
|
|
30
30
|
continue;
|
|
31
|
+
}
|
|
31
32
|
|
|
33
|
+
// FIXME: implicit limit on list and list offset length
|
|
32
34
|
int listId = (int)(offsetIndex >> 32);
|
|
33
35
|
int listOffset = (int)(offsetIndex & 0xffffffff);
|
|
34
36
|
|
|
@@ -16,11 +16,11 @@ namespace gpu {
|
|
|
16
16
|
/// Utility function to translate (list id, offset) to a user index on
|
|
17
17
|
/// the CPU. In a cpp in order to use OpenMP.
|
|
18
18
|
void ivfOffsetToUserIndex(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
idx_t* indices,
|
|
20
|
+
idx_t numLists,
|
|
21
|
+
idx_t queries,
|
|
22
22
|
int k,
|
|
23
|
-
const std::vector<std::vector<
|
|
23
|
+
const std::vector<std::vector<idx_t>>& listOffsetToUserIndex);
|
|
24
24
|
|
|
25
25
|
} // namespace gpu
|
|
26
26
|
} // namespace faiss
|
|
@@ -58,8 +58,8 @@ void IndexWrapper<GpuIndex>::runOnIndices(std::function<void(GpuIndex*)> f) {
|
|
|
58
58
|
}
|
|
59
59
|
|
|
60
60
|
template <typename GpuIndex>
|
|
61
|
-
void IndexWrapper<GpuIndex>::setNumProbes(
|
|
62
|
-
runOnIndices([nprobe](GpuIndex* index) { index->
|
|
61
|
+
void IndexWrapper<GpuIndex>::setNumProbes(size_t nprobe) {
|
|
62
|
+
runOnIndices([nprobe](GpuIndex* index) { index->nprobe = nprobe; });
|
|
63
63
|
}
|
|
64
64
|
|
|
65
65
|
} // namespace gpu
|
|
@@ -18,9 +18,9 @@
|
|
|
18
18
|
|
|
19
19
|
void compareBinaryDist(
|
|
20
20
|
const std::vector<int>& cpuDist,
|
|
21
|
-
const std::vector<faiss::
|
|
21
|
+
const std::vector<faiss::idx_t>& cpuLabels,
|
|
22
22
|
const std::vector<int>& gpuDist,
|
|
23
|
-
const std::vector<faiss::
|
|
23
|
+
const std::vector<faiss::idx_t>& gpuLabels,
|
|
24
24
|
int numQuery,
|
|
25
25
|
int k) {
|
|
26
26
|
for (int i = 0; i < numQuery; ++i) {
|
|
@@ -29,8 +29,8 @@ void compareBinaryDist(
|
|
|
29
29
|
// encounters the values. The last set of equivalent distances seen in
|
|
30
30
|
// the min-k might be truncated, so we can't check that set, but all
|
|
31
31
|
// others we can check.
|
|
32
|
-
std::set<faiss::
|
|
33
|
-
std::set<faiss::
|
|
32
|
+
std::set<faiss::idx_t> cpuLabelSet;
|
|
33
|
+
std::set<faiss::idx_t> gpuLabelSet;
|
|
34
34
|
|
|
35
35
|
int curDist = -1;
|
|
36
36
|
|
|
@@ -89,13 +89,13 @@ void testGpuIndexBinaryFlat(int kOverride = -1) {
|
|
|
89
89
|
auto query = faiss::gpu::randBinaryVecs(numQuery, dims);
|
|
90
90
|
|
|
91
91
|
std::vector<int> cpuDist(numQuery * k);
|
|
92
|
-
std::vector<faiss::
|
|
92
|
+
std::vector<faiss::idx_t> cpuLabels(numQuery * k);
|
|
93
93
|
|
|
94
94
|
cpuIndex.search(
|
|
95
95
|
numQuery, query.data(), k, cpuDist.data(), cpuLabels.data());
|
|
96
96
|
|
|
97
97
|
std::vector<int> gpuDist(numQuery * k);
|
|
98
|
-
std::vector<faiss::
|
|
98
|
+
std::vector<faiss::idx_t> gpuLabels(numQuery * k);
|
|
99
99
|
|
|
100
100
|
gpuIndex.search(
|
|
101
101
|
numQuery, query.data(), k, gpuDist.data(), gpuLabels.data());
|
|
@@ -115,6 +115,55 @@ TEST(TestGpuIndexBinaryFlat, Test32) {
|
|
|
115
115
|
}
|
|
116
116
|
}
|
|
117
117
|
|
|
118
|
+
TEST(TestGpuIndexBinaryFlat, LargeIndex) {
|
|
119
|
+
// Construct on a random device to test multi-device, if we have
|
|
120
|
+
// multiple devices
|
|
121
|
+
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
|
122
|
+
|
|
123
|
+
faiss::gpu::StandardGpuResources res;
|
|
124
|
+
res.noTempMemory();
|
|
125
|
+
|
|
126
|
+
// Skip this device if we do not have sufficient memory
|
|
127
|
+
constexpr size_t kMem = size_t(8) * 1024 * 1024 * 1024;
|
|
128
|
+
|
|
129
|
+
if (faiss::gpu::getFreeMemory(device) < kMem) {
|
|
130
|
+
std::cerr << "TestGpuIndexFlat.LargeIndex: skipping due "
|
|
131
|
+
"to insufficient device memory\n";
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
std::cerr << "Running LargeIndex test\n";
|
|
136
|
+
|
|
137
|
+
faiss::gpu::GpuIndexBinaryFlatConfig config;
|
|
138
|
+
config.device = device;
|
|
139
|
+
|
|
140
|
+
int dims = 1250 * 8;
|
|
141
|
+
faiss::gpu::GpuIndexBinaryFlat gpuIndex(&res, dims, config);
|
|
142
|
+
|
|
143
|
+
faiss::IndexBinaryFlat cpuIndex(dims);
|
|
144
|
+
|
|
145
|
+
int k = 10;
|
|
146
|
+
int nb = 4000000;
|
|
147
|
+
int nq = 10;
|
|
148
|
+
|
|
149
|
+
auto xb = faiss::gpu::randBinaryVecs(nb, dims);
|
|
150
|
+
auto xq = faiss::gpu::randBinaryVecs(nq, dims);
|
|
151
|
+
gpuIndex.add(nb, xb.data());
|
|
152
|
+
cpuIndex.add(nb, xb.data());
|
|
153
|
+
|
|
154
|
+
std::vector<int> cpuDist(nq * k);
|
|
155
|
+
std::vector<faiss::idx_t> cpuLabels(nq * k);
|
|
156
|
+
|
|
157
|
+
cpuIndex.search(nq, xq.data(), k, cpuDist.data(), cpuLabels.data());
|
|
158
|
+
|
|
159
|
+
std::vector<int> gpuDist(nq * k);
|
|
160
|
+
std::vector<faiss::idx_t> gpuLabels(nq * k);
|
|
161
|
+
|
|
162
|
+
gpuIndex.search(nq, xq.data(), k, gpuDist.data(), gpuLabels.data());
|
|
163
|
+
|
|
164
|
+
compareBinaryDist(cpuDist, cpuLabels, gpuDist, gpuLabels, nq, k);
|
|
165
|
+
}
|
|
166
|
+
|
|
118
167
|
int main(int argc, char** argv) {
|
|
119
168
|
testing::InitGoogleTest(&argc, argv);
|
|
120
169
|
|
|
@@ -141,6 +141,20 @@ TEST(TestGpuIndexFlat, L2_Float32) {
|
|
|
141
141
|
}
|
|
142
142
|
}
|
|
143
143
|
|
|
144
|
+
// At least one test for the k > 1024 select
|
|
145
|
+
TEST(TestGpuIndexFlat, L2_k_2048) {
|
|
146
|
+
if (faiss::gpu::getMaxKSelection() >= 2048) {
|
|
147
|
+
TestFlatOptions opt;
|
|
148
|
+
opt.metric = faiss::MetricType::METRIC_L2;
|
|
149
|
+
opt.useFloat16 = false;
|
|
150
|
+
opt.kOverride = 2048;
|
|
151
|
+
opt.dimOverride = 128;
|
|
152
|
+
opt.numVecsOverride = 10000;
|
|
153
|
+
|
|
154
|
+
testFlat(opt);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
144
158
|
// test specialized k == 1 codepath
|
|
145
159
|
TEST(TestGpuIndexFlat, L2_Float32_K1) {
|
|
146
160
|
for (int tries = 0; tries < 3; ++tries) {
|
|
@@ -220,7 +234,7 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
|
|
|
220
234
|
std::vector<float> queries(numQuery * dim, 1.0f);
|
|
221
235
|
|
|
222
236
|
std::vector<float> dist(numQuery * k, 0);
|
|
223
|
-
std::vector<faiss::
|
|
237
|
+
std::vector<faiss::idx_t> ind(numQuery * k);
|
|
224
238
|
|
|
225
239
|
gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
|
|
226
240
|
|
|
@@ -437,7 +451,7 @@ TEST(TestGpuIndexFlat, Residual) {
|
|
|
437
451
|
cpuIndex.add(numVecs, vecs.data());
|
|
438
452
|
gpuIndex.add(numVecs, vecs.data());
|
|
439
453
|
|
|
440
|
-
auto indexVecs = std::vector<faiss::
|
|
454
|
+
auto indexVecs = std::vector<faiss::idx_t>{0, 2, 4, 6, 8};
|
|
441
455
|
auto queryVecs = faiss::gpu::randVecs(indexVecs.size(), dim);
|
|
442
456
|
|
|
443
457
|
auto residualsCpu = std::vector<float>(indexVecs.size() * dim);
|
|
@@ -517,7 +531,7 @@ TEST(TestGpuIndexFlat, Reconstruct) {
|
|
|
517
531
|
|
|
518
532
|
// Test reconstruct_batch
|
|
519
533
|
if (false) {
|
|
520
|
-
auto reconstructKeys = std::vector<faiss::
|
|
534
|
+
auto reconstructKeys = std::vector<faiss::idx_t>{1, 3, 5};
|
|
521
535
|
auto reconstructVecs =
|
|
522
536
|
std::vector<float>(reconstructKeys.size() * dim);
|
|
523
537
|
|
|
@@ -565,7 +579,7 @@ TEST(TestGpuIndexFlat, SearchAndReconstruct) {
|
|
|
565
579
|
gpuIndex.add(nb, xb.data());
|
|
566
580
|
|
|
567
581
|
std::vector<float> refDistance(nq * k, 0);
|
|
568
|
-
std::vector<faiss::
|
|
582
|
+
std::vector<faiss::idx_t> refIndices(nq * k, -1);
|
|
569
583
|
std::vector<float> refReconstruct(nq * k * dim, 0);
|
|
570
584
|
cpuIndex.search_and_reconstruct(
|
|
571
585
|
nq,
|
|
@@ -576,7 +590,7 @@ TEST(TestGpuIndexFlat, SearchAndReconstruct) {
|
|
|
576
590
|
refReconstruct.data());
|
|
577
591
|
|
|
578
592
|
std::vector<float> testDistance(nq * k, 0);
|
|
579
|
-
std::vector<faiss::
|
|
593
|
+
std::vector<faiss::idx_t> testIndices(nq * k, -1);
|
|
580
594
|
std::vector<float> testReconstruct(nq * k * dim, 0);
|
|
581
595
|
gpuIndex.search_and_reconstruct(
|
|
582
596
|
nq,
|
|
@@ -606,7 +620,7 @@ TEST(TestGpuIndexFlat, SearchAndReconstruct) {
|
|
|
606
620
|
// above will ensure a decent number of matches), reconstruction should be
|
|
607
621
|
// the same for the vectors that do match
|
|
608
622
|
for (int i = 0; i < nq; ++i) {
|
|
609
|
-
std::unordered_map<faiss::
|
|
623
|
+
std::unordered_map<faiss::idx_t, int> refLocation;
|
|
610
624
|
|
|
611
625
|
for (int j = 0; j < k; ++j) {
|
|
612
626
|
refLocation.insert(std::make_pair(refIndices[i * k + j], j));
|