faiss 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +23 -21
- data/ext/faiss/extconf.rb +11 -0
- data/ext/faiss/index.cpp +4 -4
- data/ext/faiss/index_binary.cpp +6 -6
- data/ext/faiss/product_quantizer.cpp +4 -4
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +13 -0
- data/vendor/faiss/faiss/IVFlib.cpp +101 -2
- data/vendor/faiss/faiss/IVFlib.h +26 -2
- data/vendor/faiss/faiss/Index.cpp +36 -3
- data/vendor/faiss/faiss/Index.h +43 -6
- data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
- data/vendor/faiss/faiss/Index2Layer.h +6 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
- data/vendor/faiss/faiss/IndexBinary.h +18 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
- data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
- data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
- data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
- data/vendor/faiss/faiss/IndexFastScan.h +145 -0
- data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
- data/vendor/faiss/faiss/IndexFlat.h +7 -4
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
- data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
- data/vendor/faiss/faiss/IndexHNSW.h +4 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
- data/vendor/faiss/faiss/IndexIDMap.h +107 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
- data/vendor/faiss/faiss/IndexIVF.h +35 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
- data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
- data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
- data/vendor/faiss/faiss/IndexLSH.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
- data/vendor/faiss/faiss/IndexLattice.h +3 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
- data/vendor/faiss/faiss/IndexNSG.h +25 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
- data/vendor/faiss/faiss/IndexPQ.h +19 -5
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
- data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
- data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
- data/vendor/faiss/faiss/IndexRefine.h +4 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
- data/vendor/faiss/faiss/IndexShards.cpp +4 -1
- data/vendor/faiss/faiss/IndexShards.h +2 -1
- data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
- data/vendor/faiss/faiss/MetaIndexes.h +3 -81
- data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
- data/vendor/faiss/faiss/VectorTransform.h +22 -4
- data/vendor/faiss/faiss/clone_index.cpp +23 -1
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
- data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
- data/vendor/faiss/faiss/impl/HNSW.h +19 -16
- data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
- data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
- data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
- data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
- data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
- data/vendor/faiss/faiss/index_factory.cpp +196 -7
- data/vendor/faiss/faiss/index_io.h +5 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
- data/vendor/faiss/faiss/utils/Heap.h +31 -15
- data/vendor/faiss/faiss/utils/distances.cpp +380 -56
- data/vendor/faiss/faiss/utils/distances.h +113 -15
- data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
- data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
- data/vendor/faiss/faiss/utils/fp16.h +11 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
- data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
- data/vendor/faiss/faiss/utils/random.cpp +53 -0
- data/vendor/faiss/faiss/utils/random.h +5 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
- metadata +37 -3
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/IndexAdditiveQuantizer.h>
|
|
11
|
+
#include <faiss/IndexFastScan.h>
|
|
12
|
+
#include <faiss/impl/AdditiveQuantizer.h>
|
|
13
|
+
#include <faiss/impl/ProductAdditiveQuantizer.h>
|
|
14
|
+
#include <faiss/utils/AlignedTable.h>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
/** Fast scan version of IndexAQ. Works for 4-bit AQ for now.
|
|
19
|
+
*
|
|
20
|
+
* The codes are not stored sequentially but grouped in blocks of size bbs.
|
|
21
|
+
* This makes it possible to compute distances quickly with SIMD instructions.
|
|
22
|
+
*
|
|
23
|
+
* Implementations:
|
|
24
|
+
* 12: blocked loop with internal loop on Q with qbs
|
|
25
|
+
* 13: same with reservoir accumulator to store results
|
|
26
|
+
* 14: no qbs with heap accumulator
|
|
27
|
+
* 15: no qbs with reservoir accumulator
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
struct IndexAdditiveQuantizerFastScan : IndexFastScan {
|
|
31
|
+
AdditiveQuantizer* aq;
|
|
32
|
+
using Search_type_t = AdditiveQuantizer::Search_type_t;
|
|
33
|
+
|
|
34
|
+
bool rescale_norm = true;
|
|
35
|
+
int norm_scale = 1;
|
|
36
|
+
|
|
37
|
+
// max number of training vectors
|
|
38
|
+
size_t max_train_points = 0;
|
|
39
|
+
|
|
40
|
+
explicit IndexAdditiveQuantizerFastScan(
|
|
41
|
+
AdditiveQuantizer* aq,
|
|
42
|
+
MetricType metric = METRIC_L2,
|
|
43
|
+
int bbs = 32);
|
|
44
|
+
|
|
45
|
+
void init(
|
|
46
|
+
AdditiveQuantizer* aq,
|
|
47
|
+
MetricType metric = METRIC_L2,
|
|
48
|
+
int bbs = 32);
|
|
49
|
+
|
|
50
|
+
IndexAdditiveQuantizerFastScan();
|
|
51
|
+
|
|
52
|
+
~IndexAdditiveQuantizerFastScan() override;
|
|
53
|
+
|
|
54
|
+
/// build from an existing IndexAQ
|
|
55
|
+
explicit IndexAdditiveQuantizerFastScan(
|
|
56
|
+
const IndexAdditiveQuantizer& orig,
|
|
57
|
+
int bbs = 32);
|
|
58
|
+
|
|
59
|
+
void train(idx_t n, const float* x) override;
|
|
60
|
+
|
|
61
|
+
void estimate_norm_scale(idx_t n, const float* x);
|
|
62
|
+
|
|
63
|
+
void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
|
|
64
|
+
|
|
65
|
+
void compute_float_LUT(float* lut, idx_t n, const float* x) const override;
|
|
66
|
+
|
|
67
|
+
void search(
|
|
68
|
+
idx_t n,
|
|
69
|
+
const float* x,
|
|
70
|
+
idx_t k,
|
|
71
|
+
float* distances,
|
|
72
|
+
idx_t* labels,
|
|
73
|
+
const SearchParameters* params = nullptr) const override;
|
|
74
|
+
|
|
75
|
+
/** Decode a set of vectors.
|
|
76
|
+
*
|
|
77
|
+
* NOTE: The codes in the IndexAdditiveQuantizerFastScan object are non-
|
|
78
|
+
* contiguous. But this method requires a contiguous representation.
|
|
79
|
+
*
|
|
80
|
+
* @param n number of vectors
|
|
81
|
+
* @param bytes input encoded vectors, size n * code_size
|
|
82
|
+
* @param x output vectors, size n * d
|
|
83
|
+
*/
|
|
84
|
+
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
/** Index based on a residual quantizer. Stored vectors are
|
|
88
|
+
* approximated by residual quantization codes.
|
|
89
|
+
* Can also be used as a codec
|
|
90
|
+
*/
|
|
91
|
+
struct IndexResidualQuantizerFastScan : IndexAdditiveQuantizerFastScan {
|
|
92
|
+
/// The residual quantizer used to encode the vectors
|
|
93
|
+
ResidualQuantizer rq;
|
|
94
|
+
|
|
95
|
+
/** Constructor.
|
|
96
|
+
*
|
|
97
|
+
* @param d dimensionality of the input vectors
|
|
98
|
+
* @param M number of subquantizers
|
|
99
|
+
* @param nbits number of bit per subvector index
|
|
100
|
+
* @param metric metric type
|
|
101
|
+
* @param search_type AQ search type
|
|
102
|
+
*/
|
|
103
|
+
IndexResidualQuantizerFastScan(
|
|
104
|
+
int d, ///< dimensionality of the input vectors
|
|
105
|
+
size_t M, ///< number of subquantizers
|
|
106
|
+
size_t nbits, ///< number of bit per subvector index
|
|
107
|
+
MetricType metric = METRIC_L2,
|
|
108
|
+
Search_type_t search_type = AdditiveQuantizer::ST_norm_rq2x4,
|
|
109
|
+
int bbs = 32);
|
|
110
|
+
|
|
111
|
+
IndexResidualQuantizerFastScan();
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
/** Index based on a local search quantizer. Stored vectors are
|
|
115
|
+
* approximated by local search quantization codes.
|
|
116
|
+
* Can also be used as a codec
|
|
117
|
+
*/
|
|
118
|
+
struct IndexLocalSearchQuantizerFastScan : IndexAdditiveQuantizerFastScan {
|
|
119
|
+
LocalSearchQuantizer lsq;
|
|
120
|
+
|
|
121
|
+
/** Constructor.
|
|
122
|
+
*
|
|
123
|
+
* @param d dimensionality of the input vectors
|
|
124
|
+
* @param M number of subquantizers
|
|
125
|
+
* @param nbits number of bit per subvector index
|
|
126
|
+
* @param metric metric type
|
|
127
|
+
* @param search_type AQ search type
|
|
128
|
+
*/
|
|
129
|
+
IndexLocalSearchQuantizerFastScan(
|
|
130
|
+
int d, ///< dimensionality of the input vectors
|
|
131
|
+
size_t M, ///< number of subquantizers
|
|
132
|
+
size_t nbits, ///< number of bit per subvector index
|
|
133
|
+
MetricType metric = METRIC_L2,
|
|
134
|
+
Search_type_t search_type = AdditiveQuantizer::ST_norm_lsq2x4,
|
|
135
|
+
int bbs = 32);
|
|
136
|
+
|
|
137
|
+
IndexLocalSearchQuantizerFastScan();
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
/** Index based on a product residual quantizer. Stored vectors are
|
|
141
|
+
* approximated by product residual quantization codes.
|
|
142
|
+
* Can also be used as a codec
|
|
143
|
+
*/
|
|
144
|
+
struct IndexProductResidualQuantizerFastScan : IndexAdditiveQuantizerFastScan {
|
|
145
|
+
/// The product residual quantizer used to encode the vectors
|
|
146
|
+
ProductResidualQuantizer prq;
|
|
147
|
+
|
|
148
|
+
/** Constructor.
|
|
149
|
+
*
|
|
150
|
+
* @param d dimensionality of the input vectors
|
|
151
|
+
* @param nsplits number of residual quantizers
|
|
152
|
+
* @param Msub number of subquantizers per RQ
|
|
153
|
+
* @param nbits number of bit per subvector index
|
|
154
|
+
* @param metric metric type
|
|
155
|
+
* @param search_type AQ search type
|
|
156
|
+
*/
|
|
157
|
+
IndexProductResidualQuantizerFastScan(
|
|
158
|
+
int d, ///< dimensionality of the input vectors
|
|
159
|
+
size_t nsplits, ///< number of residual quantizers
|
|
160
|
+
size_t Msub, ///< number of subquantizers per RQ
|
|
161
|
+
size_t nbits, ///< number of bit per subvector index
|
|
162
|
+
MetricType metric = METRIC_L2,
|
|
163
|
+
Search_type_t search_type = AdditiveQuantizer::ST_norm_rq2x4,
|
|
164
|
+
int bbs = 32);
|
|
165
|
+
|
|
166
|
+
IndexProductResidualQuantizerFastScan();
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
/** Index based on a product local search quantizer. Stored vectors are
|
|
170
|
+
* approximated by product local search quantization codes.
|
|
171
|
+
* Can also be used as a codec
|
|
172
|
+
*/
|
|
173
|
+
struct IndexProductLocalSearchQuantizerFastScan
|
|
174
|
+
: IndexAdditiveQuantizerFastScan {
|
|
175
|
+
/// The product local search quantizer used to encode the vectors
|
|
176
|
+
ProductLocalSearchQuantizer plsq;
|
|
177
|
+
|
|
178
|
+
/** Constructor.
|
|
179
|
+
*
|
|
180
|
+
* @param d dimensionality of the input vectors
|
|
181
|
+
* @param nsplits number of local search quantizers
|
|
182
|
+
* @param Msub number of subquantizers per LSQ
|
|
183
|
+
* @param nbits number of bit per subvector index
|
|
184
|
+
* @param metric metric type
|
|
185
|
+
* @param search_type AQ search type
|
|
186
|
+
*/
|
|
187
|
+
IndexProductLocalSearchQuantizerFastScan(
|
|
188
|
+
int d, ///< dimensionality of the input vectors
|
|
189
|
+
size_t nsplits, ///< number of local search quantizers
|
|
190
|
+
size_t Msub, ///< number of subquantizers per LSQ
|
|
191
|
+
size_t nbits, ///< number of bit per subvector index
|
|
192
|
+
MetricType metric = METRIC_L2,
|
|
193
|
+
Search_type_t search_type = AdditiveQuantizer::ST_norm_rq2x4,
|
|
194
|
+
int bbs = 32);
|
|
195
|
+
|
|
196
|
+
IndexProductLocalSearchQuantizerFastScan();
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
} // namespace faiss
|
|
@@ -21,8 +21,12 @@ void IndexBinary::train(idx_t, const uint8_t*) {
|
|
|
21
21
|
// Does nothing by default.
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
void IndexBinary::range_search(
|
|
25
|
-
|
|
24
|
+
void IndexBinary::range_search(
|
|
25
|
+
idx_t,
|
|
26
|
+
const uint8_t*,
|
|
27
|
+
int,
|
|
28
|
+
RangeSearchResult*,
|
|
29
|
+
const SearchParameters*) const {
|
|
26
30
|
FAISS_THROW_MSG("range search not implemented");
|
|
27
31
|
}
|
|
28
32
|
|
|
@@ -57,10 +61,11 @@ void IndexBinary::search_and_reconstruct(
|
|
|
57
61
|
idx_t k,
|
|
58
62
|
int32_t* distances,
|
|
59
63
|
idx_t* labels,
|
|
60
|
-
uint8_t* recons
|
|
64
|
+
uint8_t* recons,
|
|
65
|
+
const SearchParameters* params) const {
|
|
61
66
|
FAISS_THROW_IF_NOT(k > 0);
|
|
62
67
|
|
|
63
|
-
search(n, x, k, distances, labels);
|
|
68
|
+
search(n, x, k, distances, labels, params);
|
|
64
69
|
for (idx_t i = 0; i < n; ++i) {
|
|
65
70
|
for (idx_t j = 0; j < k; ++j) {
|
|
66
71
|
idx_t ij = i * k + j;
|
|
@@ -82,4 +87,15 @@ void IndexBinary::display() const {
|
|
|
82
87
|
ntotal);
|
|
83
88
|
}
|
|
84
89
|
|
|
90
|
+
void IndexBinary::merge_from(
|
|
91
|
+
IndexBinary& /* otherIndex */,
|
|
92
|
+
idx_t /* add_id */) {
|
|
93
|
+
FAISS_THROW_MSG("merge_from() not implemented");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
void IndexBinary::check_compatible_for_merge(
|
|
97
|
+
const IndexBinary& /* otherIndex */) const {
|
|
98
|
+
FAISS_THROW_MSG("check_compatible_for_merge() not implemented");
|
|
99
|
+
}
|
|
100
|
+
|
|
85
101
|
} // namespace faiss
|
|
@@ -97,7 +97,8 @@ struct IndexBinary {
|
|
|
97
97
|
const uint8_t* x,
|
|
98
98
|
idx_t k,
|
|
99
99
|
int32_t* distances,
|
|
100
|
-
idx_t* labels
|
|
100
|
+
idx_t* labels,
|
|
101
|
+
const SearchParameters* params = nullptr) const = 0;
|
|
101
102
|
|
|
102
103
|
/** Query n vectors of dimension d to the index.
|
|
103
104
|
*
|
|
@@ -117,7 +118,8 @@ struct IndexBinary {
|
|
|
117
118
|
idx_t n,
|
|
118
119
|
const uint8_t* x,
|
|
119
120
|
int radius,
|
|
120
|
-
RangeSearchResult* result
|
|
121
|
+
RangeSearchResult* result,
|
|
122
|
+
const SearchParameters* params = nullptr) const;
|
|
121
123
|
|
|
122
124
|
/** Return the indexes of the k vectors closest to the query x.
|
|
123
125
|
*
|
|
@@ -164,10 +166,23 @@ struct IndexBinary {
|
|
|
164
166
|
idx_t k,
|
|
165
167
|
int32_t* distances,
|
|
166
168
|
idx_t* labels,
|
|
167
|
-
uint8_t* recons
|
|
169
|
+
uint8_t* recons,
|
|
170
|
+
const SearchParameters* params = nullptr) const;
|
|
168
171
|
|
|
169
172
|
/** Display the actual class name and some more info. */
|
|
170
173
|
void display() const;
|
|
174
|
+
|
|
175
|
+
/** moves the entries from another dataset to self.
|
|
176
|
+
* On output, other is empty.
|
|
177
|
+
* add_id is added to all moved ids
|
|
178
|
+
* (for sequential ids, this would be this->ntotal) */
|
|
179
|
+
virtual void merge_from(IndexBinary& otherIndex, idx_t add_id = 0);
|
|
180
|
+
|
|
181
|
+
/** check that the two indexes are compatible (ie, they are
|
|
182
|
+
* trained in the same way and have the same
|
|
183
|
+
* parameters). Otherwise throw. */
|
|
184
|
+
virtual void check_compatible_for_merge(
|
|
185
|
+
const IndexBinary& otherIndex) const;
|
|
171
186
|
};
|
|
172
187
|
|
|
173
188
|
} // namespace faiss
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
13
13
|
#include <faiss/impl/FaissAssert.h>
|
|
14
|
+
#include <faiss/impl/IDSelector.h>
|
|
14
15
|
#include <faiss/utils/Heap.h>
|
|
15
16
|
#include <faiss/utils/hamming.h>
|
|
16
17
|
#include <faiss/utils/utils.h>
|
|
@@ -35,7 +36,10 @@ void IndexBinaryFlat::search(
|
|
|
35
36
|
const uint8_t* x,
|
|
36
37
|
idx_t k,
|
|
37
38
|
int32_t* distances,
|
|
38
|
-
idx_t* labels
|
|
39
|
+
idx_t* labels,
|
|
40
|
+
const SearchParameters* params) const {
|
|
41
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
42
|
+
!params, "search params not supported for this index");
|
|
39
43
|
FAISS_THROW_IF_NOT(k > 0);
|
|
40
44
|
|
|
41
45
|
const idx_t block_size = query_batch_size;
|
|
@@ -101,7 +105,10 @@ void IndexBinaryFlat::range_search(
|
|
|
101
105
|
idx_t n,
|
|
102
106
|
const uint8_t* x,
|
|
103
107
|
int radius,
|
|
104
|
-
RangeSearchResult* result
|
|
108
|
+
RangeSearchResult* result,
|
|
109
|
+
const SearchParameters* params) const {
|
|
110
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
111
|
+
!params, "search params not supported for this index");
|
|
105
112
|
hamming_range_search(x, xb.data(), n, ntotal, radius, code_size, result);
|
|
106
113
|
}
|
|
107
114
|
|
|
@@ -39,13 +39,15 @@ struct IndexBinaryFlat : IndexBinary {
|
|
|
39
39
|
const uint8_t* x,
|
|
40
40
|
idx_t k,
|
|
41
41
|
int32_t* distances,
|
|
42
|
-
idx_t* labels
|
|
42
|
+
idx_t* labels,
|
|
43
|
+
const SearchParameters* params = nullptr) const override;
|
|
43
44
|
|
|
44
45
|
void range_search(
|
|
45
46
|
idx_t n,
|
|
46
47
|
const uint8_t* x,
|
|
47
48
|
int radius,
|
|
48
|
-
RangeSearchResult* result
|
|
49
|
+
RangeSearchResult* result,
|
|
50
|
+
const SearchParameters* params = nullptr) const override;
|
|
49
51
|
|
|
50
52
|
void reconstruct(idx_t key, uint8_t* recons) const override;
|
|
51
53
|
|
|
@@ -52,7 +52,10 @@ void IndexBinaryFromFloat::search(
|
|
|
52
52
|
const uint8_t* x,
|
|
53
53
|
idx_t k,
|
|
54
54
|
int32_t* distances,
|
|
55
|
-
idx_t* labels
|
|
55
|
+
idx_t* labels,
|
|
56
|
+
const SearchParameters* params) const {
|
|
57
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
58
|
+
!params, "search params not supported for this index");
|
|
56
59
|
FAISS_THROW_IF_NOT(k > 0);
|
|
57
60
|
|
|
58
61
|
constexpr idx_t bs = 32768;
|
|
@@ -43,7 +43,8 @@ struct IndexBinaryFromFloat : IndexBinary {
|
|
|
43
43
|
const uint8_t* x,
|
|
44
44
|
idx_t k,
|
|
45
45
|
int32_t* distances,
|
|
46
|
-
idx_t* labels
|
|
46
|
+
idx_t* labels,
|
|
47
|
+
const SearchParameters* params = nullptr) const override;
|
|
47
48
|
|
|
48
49
|
void train(idx_t n, const uint8_t* x) override;
|
|
49
50
|
};
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
|
|
27
27
|
#include <faiss/IndexBinaryFlat.h>
|
|
28
28
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
29
|
+
#include <faiss/impl/DistanceComputer.h>
|
|
29
30
|
#include <faiss/impl/FaissAssert.h>
|
|
30
31
|
#include <faiss/utils/Heap.h>
|
|
31
32
|
#include <faiss/utils/hamming.h>
|
|
@@ -194,7 +195,10 @@ void IndexBinaryHNSW::search(
|
|
|
194
195
|
const uint8_t* x,
|
|
195
196
|
idx_t k,
|
|
196
197
|
int32_t* distances,
|
|
197
|
-
idx_t* labels
|
|
198
|
+
idx_t* labels,
|
|
199
|
+
const SearchParameters* params) const {
|
|
200
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
201
|
+
!params, "search params not supported for this index");
|
|
198
202
|
FAISS_THROW_IF_NOT(k > 0);
|
|
199
203
|
|
|
200
204
|
#pragma omp parallel
|
|
@@ -47,7 +47,8 @@ struct IndexBinaryHNSW : IndexBinary {
|
|
|
47
47
|
const uint8_t* x,
|
|
48
48
|
idx_t k,
|
|
49
49
|
int32_t* distances,
|
|
50
|
-
idx_t* labels
|
|
50
|
+
idx_t* labels,
|
|
51
|
+
const SearchParameters* params = nullptr) const override;
|
|
51
52
|
|
|
52
53
|
void reconstruct(idx_t key, uint8_t* recons) const override;
|
|
53
54
|
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
#include <cinttypes>
|
|
13
13
|
#include <cstdio>
|
|
14
14
|
#include <memory>
|
|
15
|
+
#include <unordered_set>
|
|
15
16
|
|
|
16
17
|
#include <faiss/utils/hamming.h>
|
|
17
18
|
#include <faiss/utils/utils.h>
|
|
@@ -216,7 +217,10 @@ void IndexBinaryHash::range_search(
|
|
|
216
217
|
idx_t n,
|
|
217
218
|
const uint8_t* x,
|
|
218
219
|
int radius,
|
|
219
|
-
RangeSearchResult* result
|
|
220
|
+
RangeSearchResult* result,
|
|
221
|
+
const SearchParameters* params) const {
|
|
222
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
223
|
+
!params, "search params not supported for this index");
|
|
220
224
|
size_t nlist = 0, ndis = 0, n0 = 0;
|
|
221
225
|
|
|
222
226
|
#pragma omp parallel if (n > 100) reduction(+ : ndis, n0, nlist)
|
|
@@ -244,7 +248,10 @@ void IndexBinaryHash::search(
|
|
|
244
248
|
const uint8_t* x,
|
|
245
249
|
idx_t k,
|
|
246
250
|
int32_t* distances,
|
|
247
|
-
idx_t* labels
|
|
251
|
+
idx_t* labels,
|
|
252
|
+
const SearchParameters* params) const {
|
|
253
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
254
|
+
!params, "search params not supported for this index");
|
|
248
255
|
FAISS_THROW_IF_NOT(k > 0);
|
|
249
256
|
|
|
250
257
|
using HeapForL2 = CMax<int32_t, idx_t>;
|
|
@@ -431,7 +438,10 @@ void IndexBinaryMultiHash::range_search(
|
|
|
431
438
|
idx_t n,
|
|
432
439
|
const uint8_t* x,
|
|
433
440
|
int radius,
|
|
434
|
-
RangeSearchResult* result
|
|
441
|
+
RangeSearchResult* result,
|
|
442
|
+
const SearchParameters* params) const {
|
|
443
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
444
|
+
!params, "search params not supported for this index");
|
|
435
445
|
size_t nlist = 0, ndis = 0, n0 = 0;
|
|
436
446
|
|
|
437
447
|
#pragma omp parallel if (n > 100) reduction(+ : ndis, n0, nlist)
|
|
@@ -459,7 +469,10 @@ void IndexBinaryMultiHash::search(
|
|
|
459
469
|
const uint8_t* x,
|
|
460
470
|
idx_t k,
|
|
461
471
|
int32_t* distances,
|
|
462
|
-
idx_t* labels
|
|
472
|
+
idx_t* labels,
|
|
473
|
+
const SearchParameters* params) const {
|
|
474
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
475
|
+
!params, "search params not supported for this index");
|
|
463
476
|
FAISS_THROW_IF_NOT(k > 0);
|
|
464
477
|
|
|
465
478
|
using HeapForL2 = CMax<int32_t, idx_t>;
|
|
@@ -50,14 +50,16 @@ struct IndexBinaryHash : IndexBinary {
|
|
|
50
50
|
idx_t n,
|
|
51
51
|
const uint8_t* x,
|
|
52
52
|
int radius,
|
|
53
|
-
RangeSearchResult* result
|
|
53
|
+
RangeSearchResult* result,
|
|
54
|
+
const SearchParameters* params = nullptr) const override;
|
|
54
55
|
|
|
55
56
|
void search(
|
|
56
57
|
idx_t n,
|
|
57
58
|
const uint8_t* x,
|
|
58
59
|
idx_t k,
|
|
59
60
|
int32_t* distances,
|
|
60
|
-
idx_t* labels
|
|
61
|
+
idx_t* labels,
|
|
62
|
+
const SearchParameters* params = nullptr) const override;
|
|
61
63
|
|
|
62
64
|
void display() const;
|
|
63
65
|
size_t hashtable_size() const;
|
|
@@ -107,14 +109,16 @@ struct IndexBinaryMultiHash : IndexBinary {
|
|
|
107
109
|
idx_t n,
|
|
108
110
|
const uint8_t* x,
|
|
109
111
|
int radius,
|
|
110
|
-
RangeSearchResult* result
|
|
112
|
+
RangeSearchResult* result,
|
|
113
|
+
const SearchParameters* params = nullptr) const override;
|
|
111
114
|
|
|
112
115
|
void search(
|
|
113
116
|
idx_t n,
|
|
114
117
|
const uint8_t* x,
|
|
115
118
|
idx_t k,
|
|
116
119
|
int32_t* distances,
|
|
117
|
-
idx_t* labels
|
|
120
|
+
idx_t* labels,
|
|
121
|
+
const SearchParameters* params = nullptr) const override;
|
|
118
122
|
|
|
119
123
|
size_t hashtable_size() const;
|
|
120
124
|
};
|
|
@@ -125,7 +125,10 @@ void IndexBinaryIVF::search(
|
|
|
125
125
|
const uint8_t* x,
|
|
126
126
|
idx_t k,
|
|
127
127
|
int32_t* distances,
|
|
128
|
-
idx_t* labels
|
|
128
|
+
idx_t* labels,
|
|
129
|
+
const SearchParameters* params) const {
|
|
130
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
131
|
+
!params, "search params not supported for this index");
|
|
129
132
|
FAISS_THROW_IF_NOT(k > 0);
|
|
130
133
|
FAISS_THROW_IF_NOT(nprobe > 0);
|
|
131
134
|
|
|
@@ -175,7 +178,10 @@ void IndexBinaryIVF::search_and_reconstruct(
|
|
|
175
178
|
idx_t k,
|
|
176
179
|
int32_t* distances,
|
|
177
180
|
idx_t* labels,
|
|
178
|
-
uint8_t* recons
|
|
181
|
+
uint8_t* recons,
|
|
182
|
+
const SearchParameters* params) const {
|
|
183
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
184
|
+
!params, "search params not supported for this index");
|
|
179
185
|
const size_t nprobe = std::min(nlist, this->nprobe);
|
|
180
186
|
FAISS_THROW_IF_NOT(k > 0);
|
|
181
187
|
FAISS_THROW_IF_NOT(nprobe > 0);
|
|
@@ -279,22 +285,28 @@ void IndexBinaryIVF::train(idx_t n, const uint8_t* x) {
|
|
|
279
285
|
is_trained = true;
|
|
280
286
|
}
|
|
281
287
|
|
|
282
|
-
void IndexBinaryIVF::
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
FAISS_THROW_IF_NOT(other
|
|
286
|
-
FAISS_THROW_IF_NOT(other
|
|
288
|
+
void IndexBinaryIVF::check_compatible_for_merge(
|
|
289
|
+
const IndexBinary& otherIndex) const {
|
|
290
|
+
auto other = dynamic_cast<const IndexBinaryIVF*>(&otherIndex);
|
|
291
|
+
FAISS_THROW_IF_NOT(other);
|
|
292
|
+
FAISS_THROW_IF_NOT(other->d == d);
|
|
293
|
+
FAISS_THROW_IF_NOT(other->nlist == nlist);
|
|
294
|
+
FAISS_THROW_IF_NOT(other->code_size == code_size);
|
|
287
295
|
FAISS_THROW_IF_NOT_MSG(
|
|
288
|
-
direct_map.no() && other
|
|
296
|
+
direct_map.no() && other->direct_map.no(),
|
|
289
297
|
"direct map copy not implemented");
|
|
290
298
|
FAISS_THROW_IF_NOT_MSG(
|
|
291
299
|
typeid(*this) == typeid(other),
|
|
292
300
|
"can only merge indexes of the same type");
|
|
301
|
+
}
|
|
293
302
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
other
|
|
303
|
+
void IndexBinaryIVF::merge_from(IndexBinary& otherIndex, idx_t add_id) {
|
|
304
|
+
// minimal sanity checks
|
|
305
|
+
check_compatible_for_merge(otherIndex);
|
|
306
|
+
auto other = static_cast<IndexBinaryIVF*>(&otherIndex);
|
|
307
|
+
invlists->merge_from(other->invlists, add_id);
|
|
308
|
+
ntotal += other->ntotal;
|
|
309
|
+
other->ntotal = 0;
|
|
298
310
|
}
|
|
299
311
|
|
|
300
312
|
void IndexBinaryIVF::replace_invlists(InvertedLists* il, bool own) {
|
|
@@ -650,7 +662,10 @@ void IndexBinaryIVF::range_search(
|
|
|
650
662
|
idx_t n,
|
|
651
663
|
const uint8_t* x,
|
|
652
664
|
int radius,
|
|
653
|
-
RangeSearchResult* res
|
|
665
|
+
RangeSearchResult* res,
|
|
666
|
+
const SearchParameters* params) const {
|
|
667
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
668
|
+
!params, "search params not supported for this index");
|
|
654
669
|
const size_t nprobe = std::min(nlist, this->nprobe);
|
|
655
670
|
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
|
|
656
671
|
std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
|
|
@@ -123,13 +123,15 @@ struct IndexBinaryIVF : IndexBinary {
|
|
|
123
123
|
const uint8_t* x,
|
|
124
124
|
idx_t k,
|
|
125
125
|
int32_t* distances,
|
|
126
|
-
idx_t* labels
|
|
126
|
+
idx_t* labels,
|
|
127
|
+
const SearchParameters* params = nullptr) const override;
|
|
127
128
|
|
|
128
129
|
void range_search(
|
|
129
130
|
idx_t n,
|
|
130
131
|
const uint8_t* x,
|
|
131
132
|
int radius,
|
|
132
|
-
RangeSearchResult* result
|
|
133
|
+
RangeSearchResult* result,
|
|
134
|
+
const SearchParameters* params = nullptr) const override;
|
|
133
135
|
|
|
134
136
|
void range_search_preassigned(
|
|
135
137
|
idx_t n,
|
|
@@ -167,7 +169,8 @@ struct IndexBinaryIVF : IndexBinary {
|
|
|
167
169
|
idx_t k,
|
|
168
170
|
int32_t* distances,
|
|
169
171
|
idx_t* labels,
|
|
170
|
-
uint8_t* recons
|
|
172
|
+
uint8_t* recons,
|
|
173
|
+
const SearchParameters* params = nullptr) const override;
|
|
171
174
|
|
|
172
175
|
/** Reconstruct a vector given the location in terms of (inv list index +
|
|
173
176
|
* inv list offset) instead of the id.
|
|
@@ -184,10 +187,10 @@ struct IndexBinaryIVF : IndexBinary {
|
|
|
184
187
|
/// Dataset manipulation functions
|
|
185
188
|
size_t remove_ids(const IDSelector& sel) override;
|
|
186
189
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
190
|
+
void merge_from(IndexBinary& other, idx_t add_id) override;
|
|
191
|
+
|
|
192
|
+
void check_compatible_for_merge(
|
|
193
|
+
const IndexBinary& otherIndex) const override;
|
|
191
194
|
|
|
192
195
|
size_t get_list_size(size_t list_no) const {
|
|
193
196
|
return invlists->list_size(list_no);
|