faiss 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
|
@@ -14,67 +14,55 @@
|
|
|
14
14
|
* vectors Often these are pre-processing steps.
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
#include <vector>
|
|
18
17
|
#include <stdint.h>
|
|
18
|
+
#include <vector>
|
|
19
19
|
|
|
20
20
|
#include <faiss/Index.h>
|
|
21
21
|
|
|
22
|
-
|
|
23
22
|
namespace faiss {
|
|
24
23
|
|
|
25
|
-
|
|
26
24
|
/** Any transformation applied on a set of vectors */
|
|
27
25
|
struct VectorTransform {
|
|
28
|
-
|
|
29
26
|
typedef Index::idx_t idx_t;
|
|
30
27
|
|
|
31
|
-
int d_in;
|
|
32
|
-
int d_out;
|
|
33
|
-
|
|
34
|
-
explicit VectorTransform (int d_in = 0, int d_out = 0):
|
|
35
|
-
d_in(d_in), d_out(d_out), is_trained(true)
|
|
36
|
-
{}
|
|
28
|
+
int d_in; ///! input dimension
|
|
29
|
+
int d_out; ///! output dimension
|
|
37
30
|
|
|
31
|
+
explicit VectorTransform(int d_in = 0, int d_out = 0)
|
|
32
|
+
: d_in(d_in), d_out(d_out), is_trained(true) {}
|
|
38
33
|
|
|
39
34
|
/// set if the VectorTransform does not require training, or if
|
|
40
35
|
/// training is done already
|
|
41
36
|
bool is_trained;
|
|
42
37
|
|
|
43
|
-
|
|
44
38
|
/** Perform training on a representative set of vectors. Does
|
|
45
39
|
* nothing by default.
|
|
46
40
|
*
|
|
47
41
|
* @param n nb of training vectors
|
|
48
42
|
* @param x training vecors, size n * d
|
|
49
43
|
*/
|
|
50
|
-
virtual void train
|
|
44
|
+
virtual void train(idx_t n, const float* x);
|
|
51
45
|
|
|
52
|
-
/** apply the random
|
|
46
|
+
/** apply the random rotation, return new allocated matrix
|
|
53
47
|
* @param x size n * d_in
|
|
54
48
|
* @return size n * d_out
|
|
55
49
|
*/
|
|
56
|
-
float
|
|
50
|
+
float* apply(idx_t n, const float* x) const;
|
|
57
51
|
|
|
58
52
|
/// same as apply, but result is pre-allocated
|
|
59
|
-
virtual void apply_noalloc
|
|
60
|
-
float *xt) const = 0;
|
|
53
|
+
virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
|
|
61
54
|
|
|
62
55
|
/// reverse transformation. May not be implemented or may return
|
|
63
56
|
/// approximate result
|
|
64
|
-
virtual void reverse_transform
|
|
65
|
-
float *x) const;
|
|
66
|
-
|
|
67
|
-
virtual ~VectorTransform () {}
|
|
57
|
+
virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
|
|
68
58
|
|
|
59
|
+
virtual ~VectorTransform() {}
|
|
69
60
|
};
|
|
70
61
|
|
|
71
|
-
|
|
72
|
-
|
|
73
62
|
/** Generic linear transformation, with bias term applied on output
|
|
74
63
|
* y = A * x + b
|
|
75
64
|
*/
|
|
76
|
-
struct LinearTransform: VectorTransform {
|
|
77
|
-
|
|
65
|
+
struct LinearTransform : VectorTransform {
|
|
78
66
|
bool have_bias; ///! whether to use the bias term
|
|
79
67
|
|
|
80
68
|
/// check if matrix A is orthonormal (enables reverse_transform)
|
|
@@ -83,58 +71,56 @@ struct LinearTransform: VectorTransform {
|
|
|
83
71
|
/// Transformation matrix, size d_out * d_in
|
|
84
72
|
std::vector<float> A;
|
|
85
73
|
|
|
86
|
-
|
|
74
|
+
/// bias vector, size d_out
|
|
87
75
|
std::vector<float> b;
|
|
88
76
|
|
|
89
77
|
/// both d_in > d_out and d_out < d_in are supported
|
|
90
|
-
explicit LinearTransform
|
|
91
|
-
|
|
78
|
+
explicit LinearTransform(
|
|
79
|
+
int d_in = 0,
|
|
80
|
+
int d_out = 0,
|
|
81
|
+
bool have_bias = false);
|
|
92
82
|
|
|
93
83
|
/// same as apply, but result is pre-allocated
|
|
94
84
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
95
85
|
|
|
96
86
|
/// compute x = A^T * (x - b)
|
|
97
87
|
/// is reverse transform if A has orthonormal lines
|
|
98
|
-
void transform_transpose
|
|
99
|
-
float *x) const;
|
|
88
|
+
void transform_transpose(idx_t n, const float* y, float* x) const;
|
|
100
89
|
|
|
101
90
|
/// works only if is_orthonormal
|
|
102
|
-
void reverse_transform
|
|
103
|
-
float *x) const override;
|
|
91
|
+
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
|
104
92
|
|
|
105
93
|
/// compute A^T * A to set the is_orthonormal flag
|
|
106
|
-
void set_is_orthonormal
|
|
94
|
+
void set_is_orthonormal();
|
|
107
95
|
|
|
108
96
|
bool verbose;
|
|
109
|
-
void print_if_verbose
|
|
110
|
-
|
|
97
|
+
void print_if_verbose(
|
|
98
|
+
const char* name,
|
|
99
|
+
const std::vector<double>& mat,
|
|
100
|
+
int n,
|
|
101
|
+
int d) const;
|
|
111
102
|
|
|
112
103
|
~LinearTransform() override {}
|
|
113
104
|
};
|
|
114
105
|
|
|
115
|
-
|
|
116
|
-
|
|
117
106
|
/// Randomly rotate a set of vectors
|
|
118
|
-
struct RandomRotationMatrix: LinearTransform {
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
LinearTransform(d_in, d_out, false) {}
|
|
107
|
+
struct RandomRotationMatrix : LinearTransform {
|
|
108
|
+
/// both d_in > d_out and d_out < d_in are supported
|
|
109
|
+
RandomRotationMatrix(int d_in, int d_out)
|
|
110
|
+
: LinearTransform(d_in, d_out, false) {}
|
|
123
111
|
|
|
124
|
-
|
|
125
|
-
|
|
112
|
+
/// must be called before the transform is used
|
|
113
|
+
void init(int seed);
|
|
126
114
|
|
|
127
|
-
|
|
128
|
-
|
|
115
|
+
// intializes with an arbitrary seed
|
|
116
|
+
void train(idx_t n, const float* x) override;
|
|
129
117
|
|
|
130
|
-
|
|
118
|
+
RandomRotationMatrix() {}
|
|
131
119
|
};
|
|
132
120
|
|
|
133
|
-
|
|
134
121
|
/** Applies a principal component analysis on a set of vectors,
|
|
135
122
|
* with optionally whitening and random rotation. */
|
|
136
|
-
struct PCAMatrix: LinearTransform {
|
|
137
|
-
|
|
123
|
+
struct PCAMatrix : LinearTransform {
|
|
138
124
|
/** after transformation the components are multiplied by
|
|
139
125
|
* eigenvalues^eigen_power
|
|
140
126
|
*
|
|
@@ -162,22 +148,23 @@ struct PCAMatrix: LinearTransform {
|
|
|
162
148
|
std::vector<float> PCAMat;
|
|
163
149
|
|
|
164
150
|
// the final matrix is computed after random rotation and/or whitening
|
|
165
|
-
explicit PCAMatrix
|
|
166
|
-
|
|
151
|
+
explicit PCAMatrix(
|
|
152
|
+
int d_in = 0,
|
|
153
|
+
int d_out = 0,
|
|
154
|
+
float eigen_power = 0,
|
|
155
|
+
bool random_rotation = false);
|
|
167
156
|
|
|
168
157
|
/// train on n vectors. If n < d_in then the eigenvector matrix
|
|
169
158
|
/// will be completed with 0s
|
|
170
159
|
void train(idx_t n, const float* x) override;
|
|
171
160
|
|
|
172
161
|
/// copy pre-trained PCA matrix
|
|
173
|
-
void copy_from
|
|
162
|
+
void copy_from(const PCAMatrix& other);
|
|
174
163
|
|
|
175
164
|
/// called after mean, PCAMat and eigenvalues are computed
|
|
176
165
|
void prepare_Ab();
|
|
177
|
-
|
|
178
166
|
};
|
|
179
167
|
|
|
180
|
-
|
|
181
168
|
/** ITQ implementation from
|
|
182
169
|
*
|
|
183
170
|
* Iterative quantization: A procrustean approach to learning binary codes
|
|
@@ -187,25 +174,21 @@ struct PCAMatrix: LinearTransform {
|
|
|
187
174
|
* PAMI'12.
|
|
188
175
|
*/
|
|
189
176
|
|
|
190
|
-
struct ITQMatrix: LinearTransform {
|
|
191
|
-
|
|
177
|
+
struct ITQMatrix : LinearTransform {
|
|
192
178
|
int max_iter;
|
|
193
179
|
int seed;
|
|
194
180
|
|
|
195
181
|
// force initialization of the rotation (for debugging)
|
|
196
182
|
std::vector<double> init_rotation;
|
|
197
183
|
|
|
198
|
-
explicit ITQMatrix
|
|
184
|
+
explicit ITQMatrix(int d = 0);
|
|
199
185
|
|
|
200
|
-
void train
|
|
186
|
+
void train(idx_t n, const float* x) override;
|
|
201
187
|
};
|
|
202
188
|
|
|
203
|
-
|
|
204
|
-
|
|
205
189
|
/** The full ITQ transform, including normalizations and PCA transformation
|
|
206
190
|
*/
|
|
207
|
-
struct ITQTransform: VectorTransform {
|
|
208
|
-
|
|
191
|
+
struct ITQTransform : VectorTransform {
|
|
209
192
|
std::vector<float> mean;
|
|
210
193
|
bool do_pca;
|
|
211
194
|
ITQMatrix itq;
|
|
@@ -216,15 +199,13 @@ struct ITQTransform: VectorTransform {
|
|
|
216
199
|
// concatenation of PCA + ITQ transformation
|
|
217
200
|
LinearTransform pca_then_itq;
|
|
218
201
|
|
|
219
|
-
explicit ITQTransform
|
|
220
|
-
|
|
221
|
-
void train (idx_t n, const float *x) override;
|
|
202
|
+
explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
|
|
222
203
|
|
|
223
|
-
void
|
|
204
|
+
void train(idx_t n, const float* x) override;
|
|
224
205
|
|
|
206
|
+
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
225
207
|
};
|
|
226
208
|
|
|
227
|
-
|
|
228
209
|
struct ProductQuantizer;
|
|
229
210
|
|
|
230
211
|
/** Applies a rotation to align the dimensions with a PQ to minimize
|
|
@@ -235,8 +216,7 @@ struct ProductQuantizer;
|
|
|
235
216
|
* Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
|
|
236
217
|
*
|
|
237
218
|
*/
|
|
238
|
-
struct OPQMatrix: LinearTransform {
|
|
239
|
-
|
|
219
|
+
struct OPQMatrix : LinearTransform {
|
|
240
220
|
int M; ///< nb of subquantizers
|
|
241
221
|
int niter; ///< Number of outer training iterations
|
|
242
222
|
int niter_pq; ///< Number of training iterations for the PQ
|
|
@@ -248,46 +228,43 @@ struct OPQMatrix: LinearTransform {
|
|
|
248
228
|
|
|
249
229
|
/// if non-NULL, use this product quantizer for training
|
|
250
230
|
/// should be constructed with (d_out, M, _)
|
|
251
|
-
ProductQuantizer
|
|
231
|
+
ProductQuantizer* pq;
|
|
252
232
|
|
|
253
233
|
/// if d2 != -1, output vectors of this dimension
|
|
254
|
-
explicit OPQMatrix
|
|
234
|
+
explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
|
|
255
235
|
|
|
256
236
|
void train(idx_t n, const float* x) override;
|
|
257
237
|
};
|
|
258
238
|
|
|
259
|
-
|
|
260
239
|
/** remap dimensions for intput vectors, possibly inserting 0s
|
|
261
240
|
* strictly speaking this is also a linear transform but we don't want
|
|
262
241
|
* to compute it with matrix multiplies */
|
|
263
|
-
struct RemapDimensionsTransform: VectorTransform {
|
|
264
|
-
|
|
242
|
+
struct RemapDimensionsTransform : VectorTransform {
|
|
265
243
|
/// map from output dimension to input, size d_out
|
|
266
244
|
/// -1 -> set output to 0
|
|
267
245
|
std::vector<int> map;
|
|
268
246
|
|
|
269
|
-
RemapDimensionsTransform
|
|
247
|
+
RemapDimensionsTransform(int d_in, int d_out, const int* map);
|
|
270
248
|
|
|
271
249
|
/// remap input to output, skipping or inserting dimensions as needed
|
|
272
250
|
/// if uniform: distribute dimensions uniformly
|
|
273
251
|
/// otherwise just take the d_out first ones.
|
|
274
|
-
RemapDimensionsTransform
|
|
252
|
+
RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
|
|
275
253
|
|
|
276
254
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
277
255
|
|
|
278
256
|
/// reverse transform correct only when the mapping is a permutation
|
|
279
257
|
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
|
280
258
|
|
|
281
|
-
RemapDimensionsTransform
|
|
259
|
+
RemapDimensionsTransform() {}
|
|
282
260
|
};
|
|
283
261
|
|
|
284
|
-
|
|
285
262
|
/** per-vector normalization */
|
|
286
|
-
struct NormalizationTransform: VectorTransform {
|
|
263
|
+
struct NormalizationTransform : VectorTransform {
|
|
287
264
|
float norm;
|
|
288
265
|
|
|
289
|
-
explicit NormalizationTransform
|
|
290
|
-
NormalizationTransform
|
|
266
|
+
explicit NormalizationTransform(int d, float norm = 2.0);
|
|
267
|
+
NormalizationTransform();
|
|
291
268
|
|
|
292
269
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
293
270
|
|
|
@@ -296,12 +273,11 @@ struct NormalizationTransform: VectorTransform {
|
|
|
296
273
|
};
|
|
297
274
|
|
|
298
275
|
/** Subtract the mean of each component from the vectors. */
|
|
299
|
-
struct CenteringTransform: VectorTransform {
|
|
300
|
-
|
|
276
|
+
struct CenteringTransform : VectorTransform {
|
|
301
277
|
/// Mean, size d_in = d_out
|
|
302
278
|
std::vector<float> mean;
|
|
303
279
|
|
|
304
|
-
explicit CenteringTransform
|
|
280
|
+
explicit CenteringTransform(int d = 0);
|
|
305
281
|
|
|
306
282
|
/// train on n vectors.
|
|
307
283
|
void train(idx_t n, const float* x) override;
|
|
@@ -310,13 +286,9 @@ struct CenteringTransform: VectorTransform {
|
|
|
310
286
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
311
287
|
|
|
312
288
|
/// add the mean
|
|
313
|
-
void reverse_transform
|
|
314
|
-
float *x) const override;
|
|
315
|
-
|
|
289
|
+
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
|
316
290
|
};
|
|
317
291
|
|
|
318
|
-
|
|
319
292
|
} // namespace faiss
|
|
320
293
|
|
|
321
|
-
|
|
322
294
|
#endif
|
|
@@ -14,22 +14,23 @@
|
|
|
14
14
|
|
|
15
15
|
#include <faiss/impl/FaissAssert.h>
|
|
16
16
|
|
|
17
|
+
#include <faiss/Index2Layer.h>
|
|
17
18
|
#include <faiss/IndexFlat.h>
|
|
18
|
-
#include <faiss/
|
|
19
|
-
#include <faiss/IndexPreTransform.h>
|
|
20
|
-
#include <faiss/IndexLSH.h>
|
|
21
|
-
#include <faiss/IndexPQ.h>
|
|
19
|
+
#include <faiss/IndexHNSW.h>
|
|
22
20
|
#include <faiss/IndexIVF.h>
|
|
21
|
+
#include <faiss/IndexIVFFlat.h>
|
|
23
22
|
#include <faiss/IndexIVFPQ.h>
|
|
24
23
|
#include <faiss/IndexIVFPQR.h>
|
|
25
|
-
#include <faiss/Index2Layer.h>
|
|
26
|
-
#include <faiss/IndexIVFFlat.h>
|
|
27
24
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
28
|
-
#include <faiss/
|
|
29
|
-
#include <faiss/IndexScalarQuantizer.h>
|
|
30
|
-
#include <faiss/IndexHNSW.h>
|
|
25
|
+
#include <faiss/IndexLSH.h>
|
|
31
26
|
#include <faiss/IndexLattice.h>
|
|
32
|
-
#include <faiss/
|
|
27
|
+
#include <faiss/IndexNSG.h>
|
|
28
|
+
#include <faiss/IndexPQ.h>
|
|
29
|
+
#include <faiss/IndexPreTransform.h>
|
|
30
|
+
#include <faiss/IndexResidual.h>
|
|
31
|
+
#include <faiss/IndexScalarQuantizer.h>
|
|
32
|
+
#include <faiss/MetaIndexes.h>
|
|
33
|
+
#include <faiss/VectorTransform.h>
|
|
33
34
|
|
|
34
35
|
namespace faiss {
|
|
35
36
|
|
|
@@ -37,111 +38,114 @@ namespace faiss {
|
|
|
37
38
|
* cloning functions
|
|
38
39
|
**************************************************************/
|
|
39
40
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
Index * clone_index (const Index *index)
|
|
43
|
-
{
|
|
41
|
+
Index* clone_index(const Index* index) {
|
|
44
42
|
Cloner cl;
|
|
45
|
-
return cl.clone_Index
|
|
43
|
+
return cl.clone_Index(index);
|
|
46
44
|
}
|
|
47
45
|
|
|
48
46
|
// assumes there is a copy constructor ready. Always try from most
|
|
49
47
|
// specific to most general. Most indexes don't have complicated
|
|
50
48
|
// structs, the default copy constructor often just works.
|
|
51
|
-
#define TRYCLONE(classname, obj)
|
|
52
|
-
if (const classname
|
|
53
|
-
return new classname(*clo);
|
|
49
|
+
#define TRYCLONE(classname, obj) \
|
|
50
|
+
if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
|
|
51
|
+
return new classname(*clo); \
|
|
54
52
|
} else
|
|
55
53
|
|
|
56
|
-
VectorTransform
|
|
57
|
-
|
|
58
|
-
TRYCLONE
|
|
59
|
-
TRYCLONE
|
|
60
|
-
TRYCLONE
|
|
61
|
-
TRYCLONE
|
|
62
|
-
TRYCLONE
|
|
63
|
-
|
|
64
|
-
{
|
|
65
|
-
FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
|
|
54
|
+
VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
|
|
55
|
+
TRYCLONE(RemapDimensionsTransform, vt)
|
|
56
|
+
TRYCLONE(OPQMatrix, vt)
|
|
57
|
+
TRYCLONE(PCAMatrix, vt)
|
|
58
|
+
TRYCLONE(ITQMatrix, vt)
|
|
59
|
+
TRYCLONE(RandomRotationMatrix, vt)
|
|
60
|
+
TRYCLONE(LinearTransform, vt) {
|
|
61
|
+
FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
|
|
66
62
|
}
|
|
67
63
|
return nullptr;
|
|
68
64
|
}
|
|
69
65
|
|
|
70
|
-
IndexIVF
|
|
71
|
-
|
|
72
|
-
TRYCLONE
|
|
73
|
-
TRYCLONE
|
|
74
|
-
TRYCLONE
|
|
75
|
-
|
|
76
|
-
{
|
|
77
|
-
FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
|
|
66
|
+
IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
|
|
67
|
+
TRYCLONE(IndexIVFPQR, ivf)
|
|
68
|
+
TRYCLONE(IndexIVFPQ, ivf)
|
|
69
|
+
TRYCLONE(IndexIVFFlat, ivf)
|
|
70
|
+
TRYCLONE(IndexIVFScalarQuantizer, ivf) {
|
|
71
|
+
FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
|
|
78
72
|
}
|
|
79
73
|
return nullptr;
|
|
80
74
|
}
|
|
81
75
|
|
|
82
|
-
Index
|
|
83
|
-
|
|
84
|
-
TRYCLONE
|
|
85
|
-
TRYCLONE
|
|
86
|
-
TRYCLONE
|
|
87
|
-
TRYCLONE
|
|
88
|
-
TRYCLONE
|
|
89
|
-
TRYCLONE
|
|
90
|
-
TRYCLONE
|
|
91
|
-
TRYCLONE
|
|
92
|
-
if (const IndexIVF
|
|
93
|
-
IndexIVF
|
|
76
|
+
Index* Cloner::clone_Index(const Index* index) {
|
|
77
|
+
TRYCLONE(IndexPQ, index)
|
|
78
|
+
TRYCLONE(IndexLSH, index)
|
|
79
|
+
TRYCLONE(IndexFlatL2, index)
|
|
80
|
+
TRYCLONE(IndexFlatIP, index)
|
|
81
|
+
TRYCLONE(IndexFlat, index)
|
|
82
|
+
TRYCLONE(IndexLattice, index)
|
|
83
|
+
TRYCLONE(IndexResidual, index)
|
|
84
|
+
TRYCLONE(IndexScalarQuantizer, index)
|
|
85
|
+
TRYCLONE(MultiIndexQuantizer, index)
|
|
86
|
+
if (const IndexIVF* ivf = dynamic_cast<const IndexIVF*>(index)) {
|
|
87
|
+
IndexIVF* res = clone_IndexIVF(ivf);
|
|
94
88
|
if (ivf->invlists == nullptr) {
|
|
95
89
|
res->invlists = nullptr;
|
|
96
|
-
} else if (
|
|
97
|
-
|
|
90
|
+
} else if (
|
|
91
|
+
auto* ails = dynamic_cast<const ArrayInvertedLists*>(
|
|
92
|
+
ivf->invlists)) {
|
|
98
93
|
res->invlists = new ArrayInvertedLists(*ails);
|
|
99
94
|
res->own_invlists = true;
|
|
100
95
|
} else {
|
|
101
|
-
FAISS_THROW_MSG(
|
|
96
|
+
FAISS_THROW_MSG(
|
|
97
|
+
"clone not supported for this type of inverted lists");
|
|
102
98
|
}
|
|
103
99
|
res->own_fields = true;
|
|
104
|
-
res->quantizer = clone_Index
|
|
100
|
+
res->quantizer = clone_Index(ivf->quantizer);
|
|
105
101
|
return res;
|
|
106
|
-
} else if (
|
|
107
|
-
|
|
108
|
-
|
|
102
|
+
} else if (
|
|
103
|
+
const IndexPreTransform* ipt =
|
|
104
|
+
dynamic_cast<const IndexPreTransform*>(index)) {
|
|
105
|
+
IndexPreTransform* res = new IndexPreTransform();
|
|
109
106
|
res->d = ipt->d;
|
|
110
107
|
res->ntotal = ipt->ntotal;
|
|
111
108
|
res->is_trained = ipt->is_trained;
|
|
112
109
|
res->metric_type = ipt->metric_type;
|
|
113
110
|
res->metric_arg = ipt->metric_arg;
|
|
114
111
|
|
|
115
|
-
|
|
116
|
-
res->index = clone_Index (ipt->index);
|
|
112
|
+
res->index = clone_Index(ipt->index);
|
|
117
113
|
for (int i = 0; i < ipt->chain.size(); i++)
|
|
118
|
-
res->chain.push_back
|
|
114
|
+
res->chain.push_back(clone_VectorTransform(ipt->chain[i]));
|
|
119
115
|
res->own_fields = true;
|
|
120
116
|
return res;
|
|
121
|
-
} else if (
|
|
122
|
-
|
|
123
|
-
IndexIDMap
|
|
117
|
+
} else if (
|
|
118
|
+
const IndexIDMap* idmap = dynamic_cast<const IndexIDMap*>(index)) {
|
|
119
|
+
IndexIDMap* res = new IndexIDMap(*idmap);
|
|
124
120
|
res->own_fields = true;
|
|
125
|
-
res->index = clone_Index
|
|
121
|
+
res->index = clone_Index(idmap->index);
|
|
126
122
|
return res;
|
|
127
|
-
} else if (const IndexHNSW
|
|
128
|
-
|
|
129
|
-
IndexHNSW *res = new IndexHNSW (*ihnsw);
|
|
123
|
+
} else if (const IndexHNSW* ihnsw = dynamic_cast<const IndexHNSW*>(index)) {
|
|
124
|
+
IndexHNSW* res = new IndexHNSW(*ihnsw);
|
|
130
125
|
res->own_fields = true;
|
|
131
|
-
res->storage = clone_Index
|
|
126
|
+
res->storage = clone_Index(ihnsw->storage);
|
|
132
127
|
return res;
|
|
133
|
-
} else if (const
|
|
134
|
-
|
|
135
|
-
|
|
128
|
+
} else if (const IndexNSG* insg = dynamic_cast<const IndexNSG*>(index)) {
|
|
129
|
+
IndexNSG* res = new IndexNSG(*insg);
|
|
130
|
+
|
|
131
|
+
// copy the dynamic allocated graph
|
|
132
|
+
auto& new_graph = res->nsg.final_graph;
|
|
133
|
+
auto& old_graph = insg->nsg.final_graph;
|
|
134
|
+
new_graph = std::make_shared<nsg::Graph<int>>(*old_graph);
|
|
135
|
+
|
|
136
|
+
res->own_fields = true;
|
|
137
|
+
res->storage = clone_Index(insg->storage);
|
|
138
|
+
return res;
|
|
139
|
+
} else if (
|
|
140
|
+
const Index2Layer* i2l = dynamic_cast<const Index2Layer*>(index)) {
|
|
141
|
+
Index2Layer* res = new Index2Layer(*i2l);
|
|
136
142
|
res->q1.own_fields = true;
|
|
137
|
-
res->q1.quantizer = clone_Index
|
|
143
|
+
res->q1.quantizer = clone_Index(i2l->q1.quantizer);
|
|
138
144
|
return res;
|
|
139
145
|
} else {
|
|
140
|
-
FAISS_THROW_MSG(
|
|
146
|
+
FAISS_THROW_MSG("clone not supported for this type of Index");
|
|
141
147
|
}
|
|
142
148
|
return nullptr;
|
|
143
149
|
}
|
|
144
150
|
|
|
145
|
-
|
|
146
|
-
|
|
147
151
|
} // namespace faiss
|