faiss 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
@@ -14,67 +14,55 @@
|
|
14
14
|
* vectors Often these are pre-processing steps.
|
15
15
|
*/
|
16
16
|
|
17
|
-
#include <vector>
|
18
17
|
#include <stdint.h>
|
18
|
+
#include <vector>
|
19
19
|
|
20
20
|
#include <faiss/Index.h>
|
21
21
|
|
22
|
-
|
23
22
|
namespace faiss {
|
24
23
|
|
25
|
-
|
26
24
|
/** Any transformation applied on a set of vectors */
|
27
25
|
struct VectorTransform {
|
28
|
-
|
29
26
|
typedef Index::idx_t idx_t;
|
30
27
|
|
31
|
-
int d_in;
|
32
|
-
int d_out;
|
33
|
-
|
34
|
-
explicit VectorTransform (int d_in = 0, int d_out = 0):
|
35
|
-
d_in(d_in), d_out(d_out), is_trained(true)
|
36
|
-
{}
|
28
|
+
int d_in; ///! input dimension
|
29
|
+
int d_out; ///! output dimension
|
37
30
|
|
31
|
+
explicit VectorTransform(int d_in = 0, int d_out = 0)
|
32
|
+
: d_in(d_in), d_out(d_out), is_trained(true) {}
|
38
33
|
|
39
34
|
/// set if the VectorTransform does not require training, or if
|
40
35
|
/// training is done already
|
41
36
|
bool is_trained;
|
42
37
|
|
43
|
-
|
44
38
|
/** Perform training on a representative set of vectors. Does
|
45
39
|
* nothing by default.
|
46
40
|
*
|
47
41
|
* @param n nb of training vectors
|
48
42
|
* @param x training vecors, size n * d
|
49
43
|
*/
|
50
|
-
virtual void train
|
44
|
+
virtual void train(idx_t n, const float* x);
|
51
45
|
|
52
|
-
/** apply the random
|
46
|
+
/** apply the random rotation, return new allocated matrix
|
53
47
|
* @param x size n * d_in
|
54
48
|
* @return size n * d_out
|
55
49
|
*/
|
56
|
-
float
|
50
|
+
float* apply(idx_t n, const float* x) const;
|
57
51
|
|
58
52
|
/// same as apply, but result is pre-allocated
|
59
|
-
virtual void apply_noalloc
|
60
|
-
float *xt) const = 0;
|
53
|
+
virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
|
61
54
|
|
62
55
|
/// reverse transformation. May not be implemented or may return
|
63
56
|
/// approximate result
|
64
|
-
virtual void reverse_transform
|
65
|
-
float *x) const;
|
66
|
-
|
67
|
-
virtual ~VectorTransform () {}
|
57
|
+
virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
|
68
58
|
|
59
|
+
virtual ~VectorTransform() {}
|
69
60
|
};
|
70
61
|
|
71
|
-
|
72
|
-
|
73
62
|
/** Generic linear transformation, with bias term applied on output
|
74
63
|
* y = A * x + b
|
75
64
|
*/
|
76
|
-
struct LinearTransform: VectorTransform {
|
77
|
-
|
65
|
+
struct LinearTransform : VectorTransform {
|
78
66
|
bool have_bias; ///! whether to use the bias term
|
79
67
|
|
80
68
|
/// check if matrix A is orthonormal (enables reverse_transform)
|
@@ -83,58 +71,56 @@ struct LinearTransform: VectorTransform {
|
|
83
71
|
/// Transformation matrix, size d_out * d_in
|
84
72
|
std::vector<float> A;
|
85
73
|
|
86
|
-
|
74
|
+
/// bias vector, size d_out
|
87
75
|
std::vector<float> b;
|
88
76
|
|
89
77
|
/// both d_in > d_out and d_out < d_in are supported
|
90
|
-
explicit LinearTransform
|
91
|
-
|
78
|
+
explicit LinearTransform(
|
79
|
+
int d_in = 0,
|
80
|
+
int d_out = 0,
|
81
|
+
bool have_bias = false);
|
92
82
|
|
93
83
|
/// same as apply, but result is pre-allocated
|
94
84
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
95
85
|
|
96
86
|
/// compute x = A^T * (x - b)
|
97
87
|
/// is reverse transform if A has orthonormal lines
|
98
|
-
void transform_transpose
|
99
|
-
float *x) const;
|
88
|
+
void transform_transpose(idx_t n, const float* y, float* x) const;
|
100
89
|
|
101
90
|
/// works only if is_orthonormal
|
102
|
-
void reverse_transform
|
103
|
-
float *x) const override;
|
91
|
+
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
104
92
|
|
105
93
|
/// compute A^T * A to set the is_orthonormal flag
|
106
|
-
void set_is_orthonormal
|
94
|
+
void set_is_orthonormal();
|
107
95
|
|
108
96
|
bool verbose;
|
109
|
-
void print_if_verbose
|
110
|
-
|
97
|
+
void print_if_verbose(
|
98
|
+
const char* name,
|
99
|
+
const std::vector<double>& mat,
|
100
|
+
int n,
|
101
|
+
int d) const;
|
111
102
|
|
112
103
|
~LinearTransform() override {}
|
113
104
|
};
|
114
105
|
|
115
|
-
|
116
|
-
|
117
106
|
/// Randomly rotate a set of vectors
|
118
|
-
struct RandomRotationMatrix: LinearTransform {
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
LinearTransform(d_in, d_out, false) {}
|
107
|
+
struct RandomRotationMatrix : LinearTransform {
|
108
|
+
/// both d_in > d_out and d_out < d_in are supported
|
109
|
+
RandomRotationMatrix(int d_in, int d_out)
|
110
|
+
: LinearTransform(d_in, d_out, false) {}
|
123
111
|
|
124
|
-
|
125
|
-
|
112
|
+
/// must be called before the transform is used
|
113
|
+
void init(int seed);
|
126
114
|
|
127
|
-
|
128
|
-
|
115
|
+
// intializes with an arbitrary seed
|
116
|
+
void train(idx_t n, const float* x) override;
|
129
117
|
|
130
|
-
|
118
|
+
RandomRotationMatrix() {}
|
131
119
|
};
|
132
120
|
|
133
|
-
|
134
121
|
/** Applies a principal component analysis on a set of vectors,
|
135
122
|
* with optionally whitening and random rotation. */
|
136
|
-
struct PCAMatrix: LinearTransform {
|
137
|
-
|
123
|
+
struct PCAMatrix : LinearTransform {
|
138
124
|
/** after transformation the components are multiplied by
|
139
125
|
* eigenvalues^eigen_power
|
140
126
|
*
|
@@ -162,22 +148,23 @@ struct PCAMatrix: LinearTransform {
|
|
162
148
|
std::vector<float> PCAMat;
|
163
149
|
|
164
150
|
// the final matrix is computed after random rotation and/or whitening
|
165
|
-
explicit PCAMatrix
|
166
|
-
|
151
|
+
explicit PCAMatrix(
|
152
|
+
int d_in = 0,
|
153
|
+
int d_out = 0,
|
154
|
+
float eigen_power = 0,
|
155
|
+
bool random_rotation = false);
|
167
156
|
|
168
157
|
/// train on n vectors. If n < d_in then the eigenvector matrix
|
169
158
|
/// will be completed with 0s
|
170
159
|
void train(idx_t n, const float* x) override;
|
171
160
|
|
172
161
|
/// copy pre-trained PCA matrix
|
173
|
-
void copy_from
|
162
|
+
void copy_from(const PCAMatrix& other);
|
174
163
|
|
175
164
|
/// called after mean, PCAMat and eigenvalues are computed
|
176
165
|
void prepare_Ab();
|
177
|
-
|
178
166
|
};
|
179
167
|
|
180
|
-
|
181
168
|
/** ITQ implementation from
|
182
169
|
*
|
183
170
|
* Iterative quantization: A procrustean approach to learning binary codes
|
@@ -187,25 +174,21 @@ struct PCAMatrix: LinearTransform {
|
|
187
174
|
* PAMI'12.
|
188
175
|
*/
|
189
176
|
|
190
|
-
struct ITQMatrix: LinearTransform {
|
191
|
-
|
177
|
+
struct ITQMatrix : LinearTransform {
|
192
178
|
int max_iter;
|
193
179
|
int seed;
|
194
180
|
|
195
181
|
// force initialization of the rotation (for debugging)
|
196
182
|
std::vector<double> init_rotation;
|
197
183
|
|
198
|
-
explicit ITQMatrix
|
184
|
+
explicit ITQMatrix(int d = 0);
|
199
185
|
|
200
|
-
void train
|
186
|
+
void train(idx_t n, const float* x) override;
|
201
187
|
};
|
202
188
|
|
203
|
-
|
204
|
-
|
205
189
|
/** The full ITQ transform, including normalizations and PCA transformation
|
206
190
|
*/
|
207
|
-
struct ITQTransform: VectorTransform {
|
208
|
-
|
191
|
+
struct ITQTransform : VectorTransform {
|
209
192
|
std::vector<float> mean;
|
210
193
|
bool do_pca;
|
211
194
|
ITQMatrix itq;
|
@@ -216,15 +199,13 @@ struct ITQTransform: VectorTransform {
|
|
216
199
|
// concatenation of PCA + ITQ transformation
|
217
200
|
LinearTransform pca_then_itq;
|
218
201
|
|
219
|
-
explicit ITQTransform
|
220
|
-
|
221
|
-
void train (idx_t n, const float *x) override;
|
202
|
+
explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
|
222
203
|
|
223
|
-
void
|
204
|
+
void train(idx_t n, const float* x) override;
|
224
205
|
|
206
|
+
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
225
207
|
};
|
226
208
|
|
227
|
-
|
228
209
|
struct ProductQuantizer;
|
229
210
|
|
230
211
|
/** Applies a rotation to align the dimensions with a PQ to minimize
|
@@ -235,8 +216,7 @@ struct ProductQuantizer;
|
|
235
216
|
* Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
|
236
217
|
*
|
237
218
|
*/
|
238
|
-
struct OPQMatrix: LinearTransform {
|
239
|
-
|
219
|
+
struct OPQMatrix : LinearTransform {
|
240
220
|
int M; ///< nb of subquantizers
|
241
221
|
int niter; ///< Number of outer training iterations
|
242
222
|
int niter_pq; ///< Number of training iterations for the PQ
|
@@ -248,46 +228,43 @@ struct OPQMatrix: LinearTransform {
|
|
248
228
|
|
249
229
|
/// if non-NULL, use this product quantizer for training
|
250
230
|
/// should be constructed with (d_out, M, _)
|
251
|
-
ProductQuantizer
|
231
|
+
ProductQuantizer* pq;
|
252
232
|
|
253
233
|
/// if d2 != -1, output vectors of this dimension
|
254
|
-
explicit OPQMatrix
|
234
|
+
explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
|
255
235
|
|
256
236
|
void train(idx_t n, const float* x) override;
|
257
237
|
};
|
258
238
|
|
259
|
-
|
260
239
|
/** remap dimensions for intput vectors, possibly inserting 0s
|
261
240
|
* strictly speaking this is also a linear transform but we don't want
|
262
241
|
* to compute it with matrix multiplies */
|
263
|
-
struct RemapDimensionsTransform: VectorTransform {
|
264
|
-
|
242
|
+
struct RemapDimensionsTransform : VectorTransform {
|
265
243
|
/// map from output dimension to input, size d_out
|
266
244
|
/// -1 -> set output to 0
|
267
245
|
std::vector<int> map;
|
268
246
|
|
269
|
-
RemapDimensionsTransform
|
247
|
+
RemapDimensionsTransform(int d_in, int d_out, const int* map);
|
270
248
|
|
271
249
|
/// remap input to output, skipping or inserting dimensions as needed
|
272
250
|
/// if uniform: distribute dimensions uniformly
|
273
251
|
/// otherwise just take the d_out first ones.
|
274
|
-
RemapDimensionsTransform
|
252
|
+
RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
|
275
253
|
|
276
254
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
277
255
|
|
278
256
|
/// reverse transform correct only when the mapping is a permutation
|
279
257
|
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
280
258
|
|
281
|
-
RemapDimensionsTransform
|
259
|
+
RemapDimensionsTransform() {}
|
282
260
|
};
|
283
261
|
|
284
|
-
|
285
262
|
/** per-vector normalization */
|
286
|
-
struct NormalizationTransform: VectorTransform {
|
263
|
+
struct NormalizationTransform : VectorTransform {
|
287
264
|
float norm;
|
288
265
|
|
289
|
-
explicit NormalizationTransform
|
290
|
-
NormalizationTransform
|
266
|
+
explicit NormalizationTransform(int d, float norm = 2.0);
|
267
|
+
NormalizationTransform();
|
291
268
|
|
292
269
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
293
270
|
|
@@ -296,12 +273,11 @@ struct NormalizationTransform: VectorTransform {
|
|
296
273
|
};
|
297
274
|
|
298
275
|
/** Subtract the mean of each component from the vectors. */
|
299
|
-
struct CenteringTransform: VectorTransform {
|
300
|
-
|
276
|
+
struct CenteringTransform : VectorTransform {
|
301
277
|
/// Mean, size d_in = d_out
|
302
278
|
std::vector<float> mean;
|
303
279
|
|
304
|
-
explicit CenteringTransform
|
280
|
+
explicit CenteringTransform(int d = 0);
|
305
281
|
|
306
282
|
/// train on n vectors.
|
307
283
|
void train(idx_t n, const float* x) override;
|
@@ -310,13 +286,9 @@ struct CenteringTransform: VectorTransform {
|
|
310
286
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
311
287
|
|
312
288
|
/// add the mean
|
313
|
-
void reverse_transform
|
314
|
-
float *x) const override;
|
315
|
-
|
289
|
+
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
316
290
|
};
|
317
291
|
|
318
|
-
|
319
292
|
} // namespace faiss
|
320
293
|
|
321
|
-
|
322
294
|
#endif
|
@@ -14,22 +14,23 @@
|
|
14
14
|
|
15
15
|
#include <faiss/impl/FaissAssert.h>
|
16
16
|
|
17
|
+
#include <faiss/Index2Layer.h>
|
17
18
|
#include <faiss/IndexFlat.h>
|
18
|
-
#include <faiss/
|
19
|
-
#include <faiss/IndexPreTransform.h>
|
20
|
-
#include <faiss/IndexLSH.h>
|
21
|
-
#include <faiss/IndexPQ.h>
|
19
|
+
#include <faiss/IndexHNSW.h>
|
22
20
|
#include <faiss/IndexIVF.h>
|
21
|
+
#include <faiss/IndexIVFFlat.h>
|
23
22
|
#include <faiss/IndexIVFPQ.h>
|
24
23
|
#include <faiss/IndexIVFPQR.h>
|
25
|
-
#include <faiss/Index2Layer.h>
|
26
|
-
#include <faiss/IndexIVFFlat.h>
|
27
24
|
#include <faiss/IndexIVFSpectralHash.h>
|
28
|
-
#include <faiss/
|
29
|
-
#include <faiss/IndexScalarQuantizer.h>
|
30
|
-
#include <faiss/IndexHNSW.h>
|
25
|
+
#include <faiss/IndexLSH.h>
|
31
26
|
#include <faiss/IndexLattice.h>
|
32
|
-
#include <faiss/
|
27
|
+
#include <faiss/IndexNSG.h>
|
28
|
+
#include <faiss/IndexPQ.h>
|
29
|
+
#include <faiss/IndexPreTransform.h>
|
30
|
+
#include <faiss/IndexResidual.h>
|
31
|
+
#include <faiss/IndexScalarQuantizer.h>
|
32
|
+
#include <faiss/MetaIndexes.h>
|
33
|
+
#include <faiss/VectorTransform.h>
|
33
34
|
|
34
35
|
namespace faiss {
|
35
36
|
|
@@ -37,111 +38,114 @@ namespace faiss {
|
|
37
38
|
* cloning functions
|
38
39
|
**************************************************************/
|
39
40
|
|
40
|
-
|
41
|
-
|
42
|
-
Index * clone_index (const Index *index)
|
43
|
-
{
|
41
|
+
Index* clone_index(const Index* index) {
|
44
42
|
Cloner cl;
|
45
|
-
return cl.clone_Index
|
43
|
+
return cl.clone_Index(index);
|
46
44
|
}
|
47
45
|
|
48
46
|
// assumes there is a copy constructor ready. Always try from most
|
49
47
|
// specific to most general. Most indexes don't have complicated
|
50
48
|
// structs, the default copy constructor often just works.
|
51
|
-
#define TRYCLONE(classname, obj)
|
52
|
-
if (const classname
|
53
|
-
return new classname(*clo);
|
49
|
+
#define TRYCLONE(classname, obj) \
|
50
|
+
if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
|
51
|
+
return new classname(*clo); \
|
54
52
|
} else
|
55
53
|
|
56
|
-
VectorTransform
|
57
|
-
|
58
|
-
TRYCLONE
|
59
|
-
TRYCLONE
|
60
|
-
TRYCLONE
|
61
|
-
TRYCLONE
|
62
|
-
TRYCLONE
|
63
|
-
|
64
|
-
{
|
65
|
-
FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
|
54
|
+
VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
|
55
|
+
TRYCLONE(RemapDimensionsTransform, vt)
|
56
|
+
TRYCLONE(OPQMatrix, vt)
|
57
|
+
TRYCLONE(PCAMatrix, vt)
|
58
|
+
TRYCLONE(ITQMatrix, vt)
|
59
|
+
TRYCLONE(RandomRotationMatrix, vt)
|
60
|
+
TRYCLONE(LinearTransform, vt) {
|
61
|
+
FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
|
66
62
|
}
|
67
63
|
return nullptr;
|
68
64
|
}
|
69
65
|
|
70
|
-
IndexIVF
|
71
|
-
|
72
|
-
TRYCLONE
|
73
|
-
TRYCLONE
|
74
|
-
TRYCLONE
|
75
|
-
|
76
|
-
{
|
77
|
-
FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
|
66
|
+
IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
|
67
|
+
TRYCLONE(IndexIVFPQR, ivf)
|
68
|
+
TRYCLONE(IndexIVFPQ, ivf)
|
69
|
+
TRYCLONE(IndexIVFFlat, ivf)
|
70
|
+
TRYCLONE(IndexIVFScalarQuantizer, ivf) {
|
71
|
+
FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
|
78
72
|
}
|
79
73
|
return nullptr;
|
80
74
|
}
|
81
75
|
|
82
|
-
Index
|
83
|
-
|
84
|
-
TRYCLONE
|
85
|
-
TRYCLONE
|
86
|
-
TRYCLONE
|
87
|
-
TRYCLONE
|
88
|
-
TRYCLONE
|
89
|
-
TRYCLONE
|
90
|
-
TRYCLONE
|
91
|
-
TRYCLONE
|
92
|
-
if (const IndexIVF
|
93
|
-
IndexIVF
|
76
|
+
Index* Cloner::clone_Index(const Index* index) {
|
77
|
+
TRYCLONE(IndexPQ, index)
|
78
|
+
TRYCLONE(IndexLSH, index)
|
79
|
+
TRYCLONE(IndexFlatL2, index)
|
80
|
+
TRYCLONE(IndexFlatIP, index)
|
81
|
+
TRYCLONE(IndexFlat, index)
|
82
|
+
TRYCLONE(IndexLattice, index)
|
83
|
+
TRYCLONE(IndexResidual, index)
|
84
|
+
TRYCLONE(IndexScalarQuantizer, index)
|
85
|
+
TRYCLONE(MultiIndexQuantizer, index)
|
86
|
+
if (const IndexIVF* ivf = dynamic_cast<const IndexIVF*>(index)) {
|
87
|
+
IndexIVF* res = clone_IndexIVF(ivf);
|
94
88
|
if (ivf->invlists == nullptr) {
|
95
89
|
res->invlists = nullptr;
|
96
|
-
} else if (
|
97
|
-
|
90
|
+
} else if (
|
91
|
+
auto* ails = dynamic_cast<const ArrayInvertedLists*>(
|
92
|
+
ivf->invlists)) {
|
98
93
|
res->invlists = new ArrayInvertedLists(*ails);
|
99
94
|
res->own_invlists = true;
|
100
95
|
} else {
|
101
|
-
FAISS_THROW_MSG(
|
96
|
+
FAISS_THROW_MSG(
|
97
|
+
"clone not supported for this type of inverted lists");
|
102
98
|
}
|
103
99
|
res->own_fields = true;
|
104
|
-
res->quantizer = clone_Index
|
100
|
+
res->quantizer = clone_Index(ivf->quantizer);
|
105
101
|
return res;
|
106
|
-
} else if (
|
107
|
-
|
108
|
-
|
102
|
+
} else if (
|
103
|
+
const IndexPreTransform* ipt =
|
104
|
+
dynamic_cast<const IndexPreTransform*>(index)) {
|
105
|
+
IndexPreTransform* res = new IndexPreTransform();
|
109
106
|
res->d = ipt->d;
|
110
107
|
res->ntotal = ipt->ntotal;
|
111
108
|
res->is_trained = ipt->is_trained;
|
112
109
|
res->metric_type = ipt->metric_type;
|
113
110
|
res->metric_arg = ipt->metric_arg;
|
114
111
|
|
115
|
-
|
116
|
-
res->index = clone_Index (ipt->index);
|
112
|
+
res->index = clone_Index(ipt->index);
|
117
113
|
for (int i = 0; i < ipt->chain.size(); i++)
|
118
|
-
res->chain.push_back
|
114
|
+
res->chain.push_back(clone_VectorTransform(ipt->chain[i]));
|
119
115
|
res->own_fields = true;
|
120
116
|
return res;
|
121
|
-
} else if (
|
122
|
-
|
123
|
-
IndexIDMap
|
117
|
+
} else if (
|
118
|
+
const IndexIDMap* idmap = dynamic_cast<const IndexIDMap*>(index)) {
|
119
|
+
IndexIDMap* res = new IndexIDMap(*idmap);
|
124
120
|
res->own_fields = true;
|
125
|
-
res->index = clone_Index
|
121
|
+
res->index = clone_Index(idmap->index);
|
126
122
|
return res;
|
127
|
-
} else if (const IndexHNSW
|
128
|
-
|
129
|
-
IndexHNSW *res = new IndexHNSW (*ihnsw);
|
123
|
+
} else if (const IndexHNSW* ihnsw = dynamic_cast<const IndexHNSW*>(index)) {
|
124
|
+
IndexHNSW* res = new IndexHNSW(*ihnsw);
|
130
125
|
res->own_fields = true;
|
131
|
-
res->storage = clone_Index
|
126
|
+
res->storage = clone_Index(ihnsw->storage);
|
132
127
|
return res;
|
133
|
-
} else if (const
|
134
|
-
|
135
|
-
|
128
|
+
} else if (const IndexNSG* insg = dynamic_cast<const IndexNSG*>(index)) {
|
129
|
+
IndexNSG* res = new IndexNSG(*insg);
|
130
|
+
|
131
|
+
// copy the dynamic allocated graph
|
132
|
+
auto& new_graph = res->nsg.final_graph;
|
133
|
+
auto& old_graph = insg->nsg.final_graph;
|
134
|
+
new_graph = std::make_shared<nsg::Graph<int>>(*old_graph);
|
135
|
+
|
136
|
+
res->own_fields = true;
|
137
|
+
res->storage = clone_Index(insg->storage);
|
138
|
+
return res;
|
139
|
+
} else if (
|
140
|
+
const Index2Layer* i2l = dynamic_cast<const Index2Layer*>(index)) {
|
141
|
+
Index2Layer* res = new Index2Layer(*i2l);
|
136
142
|
res->q1.own_fields = true;
|
137
|
-
res->q1.quantizer = clone_Index
|
143
|
+
res->q1.quantizer = clone_Index(i2l->q1.quantizer);
|
138
144
|
return res;
|
139
145
|
} else {
|
140
|
-
FAISS_THROW_MSG(
|
146
|
+
FAISS_THROW_MSG("clone not supported for this type of Index");
|
141
147
|
}
|
142
148
|
return nullptr;
|
143
149
|
}
|
144
150
|
|
145
|
-
|
146
|
-
|
147
151
|
} // namespace faiss
|