faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -14,67 +14,55 @@
|
|
|
14
14
|
* vectors Often these are pre-processing steps.
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
#include <vector>
|
|
18
17
|
#include <stdint.h>
|
|
18
|
+
#include <vector>
|
|
19
19
|
|
|
20
20
|
#include <faiss/Index.h>
|
|
21
21
|
|
|
22
|
-
|
|
23
22
|
namespace faiss {
|
|
24
23
|
|
|
25
|
-
|
|
26
24
|
/** Any transformation applied on a set of vectors */
|
|
27
25
|
struct VectorTransform {
|
|
28
|
-
|
|
29
26
|
typedef Index::idx_t idx_t;
|
|
30
27
|
|
|
31
|
-
int d_in;
|
|
32
|
-
int d_out;
|
|
33
|
-
|
|
34
|
-
explicit VectorTransform (int d_in = 0, int d_out = 0):
|
|
35
|
-
d_in(d_in), d_out(d_out), is_trained(true)
|
|
36
|
-
{}
|
|
28
|
+
int d_in; ///! input dimension
|
|
29
|
+
int d_out; ///! output dimension
|
|
37
30
|
|
|
31
|
+
explicit VectorTransform(int d_in = 0, int d_out = 0)
|
|
32
|
+
: d_in(d_in), d_out(d_out), is_trained(true) {}
|
|
38
33
|
|
|
39
34
|
/// set if the VectorTransform does not require training, or if
|
|
40
35
|
/// training is done already
|
|
41
36
|
bool is_trained;
|
|
42
37
|
|
|
43
|
-
|
|
44
38
|
/** Perform training on a representative set of vectors. Does
|
|
45
39
|
* nothing by default.
|
|
46
40
|
*
|
|
47
41
|
* @param n nb of training vectors
|
|
48
42
|
* @param x training vecors, size n * d
|
|
49
43
|
*/
|
|
50
|
-
virtual void train
|
|
44
|
+
virtual void train(idx_t n, const float* x);
|
|
51
45
|
|
|
52
|
-
/** apply the random
|
|
46
|
+
/** apply the random rotation, return new allocated matrix
|
|
53
47
|
* @param x size n * d_in
|
|
54
48
|
* @return size n * d_out
|
|
55
49
|
*/
|
|
56
|
-
float
|
|
50
|
+
float* apply(idx_t n, const float* x) const;
|
|
57
51
|
|
|
58
52
|
/// same as apply, but result is pre-allocated
|
|
59
|
-
virtual void apply_noalloc
|
|
60
|
-
float *xt) const = 0;
|
|
53
|
+
virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
|
|
61
54
|
|
|
62
55
|
/// reverse transformation. May not be implemented or may return
|
|
63
56
|
/// approximate result
|
|
64
|
-
virtual void reverse_transform
|
|
65
|
-
float *x) const;
|
|
66
|
-
|
|
67
|
-
virtual ~VectorTransform () {}
|
|
57
|
+
virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
|
|
68
58
|
|
|
59
|
+
virtual ~VectorTransform() {}
|
|
69
60
|
};
|
|
70
61
|
|
|
71
|
-
|
|
72
|
-
|
|
73
62
|
/** Generic linear transformation, with bias term applied on output
|
|
74
63
|
* y = A * x + b
|
|
75
64
|
*/
|
|
76
|
-
struct LinearTransform: VectorTransform {
|
|
77
|
-
|
|
65
|
+
struct LinearTransform : VectorTransform {
|
|
78
66
|
bool have_bias; ///! whether to use the bias term
|
|
79
67
|
|
|
80
68
|
/// check if matrix A is orthonormal (enables reverse_transform)
|
|
@@ -83,58 +71,56 @@ struct LinearTransform: VectorTransform {
|
|
|
83
71
|
/// Transformation matrix, size d_out * d_in
|
|
84
72
|
std::vector<float> A;
|
|
85
73
|
|
|
86
|
-
|
|
74
|
+
/// bias vector, size d_out
|
|
87
75
|
std::vector<float> b;
|
|
88
76
|
|
|
89
77
|
/// both d_in > d_out and d_out < d_in are supported
|
|
90
|
-
explicit LinearTransform
|
|
91
|
-
|
|
78
|
+
explicit LinearTransform(
|
|
79
|
+
int d_in = 0,
|
|
80
|
+
int d_out = 0,
|
|
81
|
+
bool have_bias = false);
|
|
92
82
|
|
|
93
83
|
/// same as apply, but result is pre-allocated
|
|
94
84
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
95
85
|
|
|
96
86
|
/// compute x = A^T * (x - b)
|
|
97
87
|
/// is reverse transform if A has orthonormal lines
|
|
98
|
-
void transform_transpose
|
|
99
|
-
float *x) const;
|
|
88
|
+
void transform_transpose(idx_t n, const float* y, float* x) const;
|
|
100
89
|
|
|
101
90
|
/// works only if is_orthonormal
|
|
102
|
-
void reverse_transform
|
|
103
|
-
float *x) const override;
|
|
91
|
+
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
|
104
92
|
|
|
105
93
|
/// compute A^T * A to set the is_orthonormal flag
|
|
106
|
-
void set_is_orthonormal
|
|
94
|
+
void set_is_orthonormal();
|
|
107
95
|
|
|
108
96
|
bool verbose;
|
|
109
|
-
void print_if_verbose
|
|
110
|
-
|
|
97
|
+
void print_if_verbose(
|
|
98
|
+
const char* name,
|
|
99
|
+
const std::vector<double>& mat,
|
|
100
|
+
int n,
|
|
101
|
+
int d) const;
|
|
111
102
|
|
|
112
103
|
~LinearTransform() override {}
|
|
113
104
|
};
|
|
114
105
|
|
|
115
|
-
|
|
116
|
-
|
|
117
106
|
/// Randomly rotate a set of vectors
|
|
118
|
-
struct RandomRotationMatrix: LinearTransform {
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
LinearTransform(d_in, d_out, false) {}
|
|
107
|
+
struct RandomRotationMatrix : LinearTransform {
|
|
108
|
+
/// both d_in > d_out and d_out < d_in are supported
|
|
109
|
+
RandomRotationMatrix(int d_in, int d_out)
|
|
110
|
+
: LinearTransform(d_in, d_out, false) {}
|
|
123
111
|
|
|
124
|
-
|
|
125
|
-
|
|
112
|
+
/// must be called before the transform is used
|
|
113
|
+
void init(int seed);
|
|
126
114
|
|
|
127
|
-
|
|
128
|
-
|
|
115
|
+
// intializes with an arbitrary seed
|
|
116
|
+
void train(idx_t n, const float* x) override;
|
|
129
117
|
|
|
130
|
-
|
|
118
|
+
RandomRotationMatrix() {}
|
|
131
119
|
};
|
|
132
120
|
|
|
133
|
-
|
|
134
121
|
/** Applies a principal component analysis on a set of vectors,
|
|
135
122
|
* with optionally whitening and random rotation. */
|
|
136
|
-
struct PCAMatrix: LinearTransform {
|
|
137
|
-
|
|
123
|
+
struct PCAMatrix : LinearTransform {
|
|
138
124
|
/** after transformation the components are multiplied by
|
|
139
125
|
* eigenvalues^eigen_power
|
|
140
126
|
*
|
|
@@ -143,6 +129,9 @@ struct PCAMatrix: LinearTransform {
|
|
|
143
129
|
*/
|
|
144
130
|
float eigen_power;
|
|
145
131
|
|
|
132
|
+
/// value added to eigenvalues to avoid division by 0 when whitening
|
|
133
|
+
float epsilon;
|
|
134
|
+
|
|
146
135
|
/// random rotation after PCA
|
|
147
136
|
bool random_rotation;
|
|
148
137
|
|
|
@@ -162,22 +151,23 @@ struct PCAMatrix: LinearTransform {
|
|
|
162
151
|
std::vector<float> PCAMat;
|
|
163
152
|
|
|
164
153
|
// the final matrix is computed after random rotation and/or whitening
|
|
165
|
-
explicit PCAMatrix
|
|
166
|
-
|
|
154
|
+
explicit PCAMatrix(
|
|
155
|
+
int d_in = 0,
|
|
156
|
+
int d_out = 0,
|
|
157
|
+
float eigen_power = 0,
|
|
158
|
+
bool random_rotation = false);
|
|
167
159
|
|
|
168
160
|
/// train on n vectors. If n < d_in then the eigenvector matrix
|
|
169
161
|
/// will be completed with 0s
|
|
170
162
|
void train(idx_t n, const float* x) override;
|
|
171
163
|
|
|
172
164
|
/// copy pre-trained PCA matrix
|
|
173
|
-
void copy_from
|
|
165
|
+
void copy_from(const PCAMatrix& other);
|
|
174
166
|
|
|
175
167
|
/// called after mean, PCAMat and eigenvalues are computed
|
|
176
168
|
void prepare_Ab();
|
|
177
|
-
|
|
178
169
|
};
|
|
179
170
|
|
|
180
|
-
|
|
181
171
|
/** ITQ implementation from
|
|
182
172
|
*
|
|
183
173
|
* Iterative quantization: A procrustean approach to learning binary codes
|
|
@@ -187,25 +177,21 @@ struct PCAMatrix: LinearTransform {
|
|
|
187
177
|
* PAMI'12.
|
|
188
178
|
*/
|
|
189
179
|
|
|
190
|
-
struct ITQMatrix: LinearTransform {
|
|
191
|
-
|
|
180
|
+
struct ITQMatrix : LinearTransform {
|
|
192
181
|
int max_iter;
|
|
193
182
|
int seed;
|
|
194
183
|
|
|
195
184
|
// force initialization of the rotation (for debugging)
|
|
196
185
|
std::vector<double> init_rotation;
|
|
197
186
|
|
|
198
|
-
explicit ITQMatrix
|
|
187
|
+
explicit ITQMatrix(int d = 0);
|
|
199
188
|
|
|
200
|
-
void train
|
|
189
|
+
void train(idx_t n, const float* x) override;
|
|
201
190
|
};
|
|
202
191
|
|
|
203
|
-
|
|
204
|
-
|
|
205
192
|
/** The full ITQ transform, including normalizations and PCA transformation
|
|
206
193
|
*/
|
|
207
|
-
struct ITQTransform: VectorTransform {
|
|
208
|
-
|
|
194
|
+
struct ITQTransform : VectorTransform {
|
|
209
195
|
std::vector<float> mean;
|
|
210
196
|
bool do_pca;
|
|
211
197
|
ITQMatrix itq;
|
|
@@ -216,15 +202,13 @@ struct ITQTransform: VectorTransform {
|
|
|
216
202
|
// concatenation of PCA + ITQ transformation
|
|
217
203
|
LinearTransform pca_then_itq;
|
|
218
204
|
|
|
219
|
-
explicit ITQTransform
|
|
220
|
-
|
|
221
|
-
void train (idx_t n, const float *x) override;
|
|
205
|
+
explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
|
|
222
206
|
|
|
223
|
-
void
|
|
207
|
+
void train(idx_t n, const float* x) override;
|
|
224
208
|
|
|
209
|
+
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
225
210
|
};
|
|
226
211
|
|
|
227
|
-
|
|
228
212
|
struct ProductQuantizer;
|
|
229
213
|
|
|
230
214
|
/** Applies a rotation to align the dimensions with a PQ to minimize
|
|
@@ -235,8 +219,7 @@ struct ProductQuantizer;
|
|
|
235
219
|
* Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
|
|
236
220
|
*
|
|
237
221
|
*/
|
|
238
|
-
struct OPQMatrix: LinearTransform {
|
|
239
|
-
|
|
222
|
+
struct OPQMatrix : LinearTransform {
|
|
240
223
|
int M; ///< nb of subquantizers
|
|
241
224
|
int niter; ///< Number of outer training iterations
|
|
242
225
|
int niter_pq; ///< Number of training iterations for the PQ
|
|
@@ -248,46 +231,43 @@ struct OPQMatrix: LinearTransform {
|
|
|
248
231
|
|
|
249
232
|
/// if non-NULL, use this product quantizer for training
|
|
250
233
|
/// should be constructed with (d_out, M, _)
|
|
251
|
-
ProductQuantizer
|
|
234
|
+
ProductQuantizer* pq;
|
|
252
235
|
|
|
253
236
|
/// if d2 != -1, output vectors of this dimension
|
|
254
|
-
explicit OPQMatrix
|
|
237
|
+
explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
|
|
255
238
|
|
|
256
239
|
void train(idx_t n, const float* x) override;
|
|
257
240
|
};
|
|
258
241
|
|
|
259
|
-
|
|
260
242
|
/** remap dimensions for intput vectors, possibly inserting 0s
|
|
261
243
|
* strictly speaking this is also a linear transform but we don't want
|
|
262
244
|
* to compute it with matrix multiplies */
|
|
263
|
-
struct RemapDimensionsTransform: VectorTransform {
|
|
264
|
-
|
|
245
|
+
struct RemapDimensionsTransform : VectorTransform {
|
|
265
246
|
/// map from output dimension to input, size d_out
|
|
266
247
|
/// -1 -> set output to 0
|
|
267
248
|
std::vector<int> map;
|
|
268
249
|
|
|
269
|
-
RemapDimensionsTransform
|
|
250
|
+
RemapDimensionsTransform(int d_in, int d_out, const int* map);
|
|
270
251
|
|
|
271
252
|
/// remap input to output, skipping or inserting dimensions as needed
|
|
272
253
|
/// if uniform: distribute dimensions uniformly
|
|
273
254
|
/// otherwise just take the d_out first ones.
|
|
274
|
-
RemapDimensionsTransform
|
|
255
|
+
RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
|
|
275
256
|
|
|
276
257
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
277
258
|
|
|
278
259
|
/// reverse transform correct only when the mapping is a permutation
|
|
279
260
|
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
|
280
261
|
|
|
281
|
-
RemapDimensionsTransform
|
|
262
|
+
RemapDimensionsTransform() {}
|
|
282
263
|
};
|
|
283
264
|
|
|
284
|
-
|
|
285
265
|
/** per-vector normalization */
|
|
286
|
-
struct NormalizationTransform: VectorTransform {
|
|
266
|
+
struct NormalizationTransform : VectorTransform {
|
|
287
267
|
float norm;
|
|
288
268
|
|
|
289
|
-
explicit NormalizationTransform
|
|
290
|
-
NormalizationTransform
|
|
269
|
+
explicit NormalizationTransform(int d, float norm = 2.0);
|
|
270
|
+
NormalizationTransform();
|
|
291
271
|
|
|
292
272
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
293
273
|
|
|
@@ -296,12 +276,11 @@ struct NormalizationTransform: VectorTransform {
|
|
|
296
276
|
};
|
|
297
277
|
|
|
298
278
|
/** Subtract the mean of each component from the vectors. */
|
|
299
|
-
struct CenteringTransform: VectorTransform {
|
|
300
|
-
|
|
279
|
+
struct CenteringTransform : VectorTransform {
|
|
301
280
|
/// Mean, size d_in = d_out
|
|
302
281
|
std::vector<float> mean;
|
|
303
282
|
|
|
304
|
-
explicit CenteringTransform
|
|
283
|
+
explicit CenteringTransform(int d = 0);
|
|
305
284
|
|
|
306
285
|
/// train on n vectors.
|
|
307
286
|
void train(idx_t n, const float* x) override;
|
|
@@ -310,13 +289,9 @@ struct CenteringTransform: VectorTransform {
|
|
|
310
289
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
|
311
290
|
|
|
312
291
|
/// add the mean
|
|
313
|
-
void reverse_transform
|
|
314
|
-
float *x) const override;
|
|
315
|
-
|
|
292
|
+
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
|
316
293
|
};
|
|
317
294
|
|
|
318
|
-
|
|
319
295
|
} // namespace faiss
|
|
320
296
|
|
|
321
|
-
|
|
322
297
|
#endif
|
|
@@ -14,22 +14,23 @@
|
|
|
14
14
|
|
|
15
15
|
#include <faiss/impl/FaissAssert.h>
|
|
16
16
|
|
|
17
|
+
#include <faiss/Index2Layer.h>
|
|
18
|
+
#include <faiss/IndexAdditiveQuantizer.h>
|
|
17
19
|
#include <faiss/IndexFlat.h>
|
|
18
|
-
#include <faiss/
|
|
19
|
-
#include <faiss/IndexPreTransform.h>
|
|
20
|
-
#include <faiss/IndexLSH.h>
|
|
21
|
-
#include <faiss/IndexPQ.h>
|
|
20
|
+
#include <faiss/IndexHNSW.h>
|
|
22
21
|
#include <faiss/IndexIVF.h>
|
|
22
|
+
#include <faiss/IndexIVFFlat.h>
|
|
23
23
|
#include <faiss/IndexIVFPQ.h>
|
|
24
24
|
#include <faiss/IndexIVFPQR.h>
|
|
25
|
-
#include <faiss/Index2Layer.h>
|
|
26
|
-
#include <faiss/IndexIVFFlat.h>
|
|
27
25
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
28
|
-
#include <faiss/
|
|
29
|
-
#include <faiss/IndexScalarQuantizer.h>
|
|
30
|
-
#include <faiss/IndexHNSW.h>
|
|
26
|
+
#include <faiss/IndexLSH.h>
|
|
31
27
|
#include <faiss/IndexLattice.h>
|
|
32
|
-
#include <faiss/
|
|
28
|
+
#include <faiss/IndexNSG.h>
|
|
29
|
+
#include <faiss/IndexPQ.h>
|
|
30
|
+
#include <faiss/IndexPreTransform.h>
|
|
31
|
+
#include <faiss/IndexScalarQuantizer.h>
|
|
32
|
+
#include <faiss/MetaIndexes.h>
|
|
33
|
+
#include <faiss/VectorTransform.h>
|
|
33
34
|
|
|
34
35
|
namespace faiss {
|
|
35
36
|
|
|
@@ -37,111 +38,115 @@ namespace faiss {
|
|
|
37
38
|
* cloning functions
|
|
38
39
|
**************************************************************/
|
|
39
40
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
Index * clone_index (const Index *index)
|
|
43
|
-
{
|
|
41
|
+
Index* clone_index(const Index* index) {
|
|
44
42
|
Cloner cl;
|
|
45
|
-
return cl.clone_Index
|
|
43
|
+
return cl.clone_Index(index);
|
|
46
44
|
}
|
|
47
45
|
|
|
48
46
|
// assumes there is a copy constructor ready. Always try from most
|
|
49
47
|
// specific to most general. Most indexes don't have complicated
|
|
50
48
|
// structs, the default copy constructor often just works.
|
|
51
|
-
#define TRYCLONE(classname, obj)
|
|
52
|
-
if (const classname
|
|
53
|
-
return new classname(*clo);
|
|
49
|
+
#define TRYCLONE(classname, obj) \
|
|
50
|
+
if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
|
|
51
|
+
return new classname(*clo); \
|
|
54
52
|
} else
|
|
55
53
|
|
|
56
|
-
VectorTransform
|
|
57
|
-
|
|
58
|
-
TRYCLONE
|
|
59
|
-
TRYCLONE
|
|
60
|
-
TRYCLONE
|
|
61
|
-
TRYCLONE
|
|
62
|
-
TRYCLONE
|
|
63
|
-
|
|
64
|
-
{
|
|
65
|
-
FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
|
|
54
|
+
VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
|
|
55
|
+
TRYCLONE(RemapDimensionsTransform, vt)
|
|
56
|
+
TRYCLONE(OPQMatrix, vt)
|
|
57
|
+
TRYCLONE(PCAMatrix, vt)
|
|
58
|
+
TRYCLONE(ITQMatrix, vt)
|
|
59
|
+
TRYCLONE(RandomRotationMatrix, vt)
|
|
60
|
+
TRYCLONE(LinearTransform, vt) {
|
|
61
|
+
FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
|
|
66
62
|
}
|
|
67
63
|
return nullptr;
|
|
68
64
|
}
|
|
69
65
|
|
|
70
|
-
IndexIVF
|
|
71
|
-
|
|
72
|
-
TRYCLONE
|
|
73
|
-
TRYCLONE
|
|
74
|
-
TRYCLONE
|
|
75
|
-
|
|
76
|
-
{
|
|
77
|
-
FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
|
|
66
|
+
IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
|
|
67
|
+
TRYCLONE(IndexIVFPQR, ivf)
|
|
68
|
+
TRYCLONE(IndexIVFPQ, ivf)
|
|
69
|
+
TRYCLONE(IndexIVFFlat, ivf)
|
|
70
|
+
TRYCLONE(IndexIVFScalarQuantizer, ivf) {
|
|
71
|
+
FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
|
|
78
72
|
}
|
|
79
73
|
return nullptr;
|
|
80
74
|
}
|
|
81
75
|
|
|
82
|
-
Index
|
|
83
|
-
|
|
84
|
-
TRYCLONE
|
|
85
|
-
TRYCLONE
|
|
86
|
-
TRYCLONE
|
|
87
|
-
TRYCLONE
|
|
88
|
-
TRYCLONE
|
|
89
|
-
TRYCLONE
|
|
90
|
-
TRYCLONE
|
|
91
|
-
TRYCLONE
|
|
92
|
-
|
|
93
|
-
|
|
76
|
+
Index* Cloner::clone_Index(const Index* index) {
|
|
77
|
+
TRYCLONE(IndexPQ, index)
|
|
78
|
+
TRYCLONE(IndexLSH, index)
|
|
79
|
+
TRYCLONE(IndexFlatL2, index)
|
|
80
|
+
TRYCLONE(IndexFlatIP, index)
|
|
81
|
+
TRYCLONE(IndexFlat, index)
|
|
82
|
+
TRYCLONE(IndexLattice, index)
|
|
83
|
+
TRYCLONE(IndexResidualQuantizer, index)
|
|
84
|
+
TRYCLONE(IndexScalarQuantizer, index)
|
|
85
|
+
TRYCLONE(MultiIndexQuantizer, index)
|
|
86
|
+
TRYCLONE(ResidualCoarseQuantizer, index)
|
|
87
|
+
if (const IndexIVF* ivf = dynamic_cast<const IndexIVF*>(index)) {
|
|
88
|
+
IndexIVF* res = clone_IndexIVF(ivf);
|
|
94
89
|
if (ivf->invlists == nullptr) {
|
|
95
90
|
res->invlists = nullptr;
|
|
96
|
-
} else if (
|
|
97
|
-
|
|
91
|
+
} else if (
|
|
92
|
+
auto* ails = dynamic_cast<const ArrayInvertedLists*>(
|
|
93
|
+
ivf->invlists)) {
|
|
98
94
|
res->invlists = new ArrayInvertedLists(*ails);
|
|
99
95
|
res->own_invlists = true;
|
|
100
96
|
} else {
|
|
101
|
-
FAISS_THROW_MSG(
|
|
97
|
+
FAISS_THROW_MSG(
|
|
98
|
+
"clone not supported for this type of inverted lists");
|
|
102
99
|
}
|
|
103
100
|
res->own_fields = true;
|
|
104
|
-
res->quantizer = clone_Index
|
|
101
|
+
res->quantizer = clone_Index(ivf->quantizer);
|
|
105
102
|
return res;
|
|
106
|
-
} else if (
|
|
107
|
-
|
|
108
|
-
|
|
103
|
+
} else if (
|
|
104
|
+
const IndexPreTransform* ipt =
|
|
105
|
+
dynamic_cast<const IndexPreTransform*>(index)) {
|
|
106
|
+
IndexPreTransform* res = new IndexPreTransform();
|
|
109
107
|
res->d = ipt->d;
|
|
110
108
|
res->ntotal = ipt->ntotal;
|
|
111
109
|
res->is_trained = ipt->is_trained;
|
|
112
110
|
res->metric_type = ipt->metric_type;
|
|
113
111
|
res->metric_arg = ipt->metric_arg;
|
|
114
112
|
|
|
115
|
-
|
|
116
|
-
res->index = clone_Index (ipt->index);
|
|
113
|
+
res->index = clone_Index(ipt->index);
|
|
117
114
|
for (int i = 0; i < ipt->chain.size(); i++)
|
|
118
|
-
res->chain.push_back
|
|
115
|
+
res->chain.push_back(clone_VectorTransform(ipt->chain[i]));
|
|
119
116
|
res->own_fields = true;
|
|
120
117
|
return res;
|
|
121
|
-
} else if (
|
|
122
|
-
|
|
123
|
-
IndexIDMap
|
|
118
|
+
} else if (
|
|
119
|
+
const IndexIDMap* idmap = dynamic_cast<const IndexIDMap*>(index)) {
|
|
120
|
+
IndexIDMap* res = new IndexIDMap(*idmap);
|
|
124
121
|
res->own_fields = true;
|
|
125
|
-
res->index = clone_Index
|
|
122
|
+
res->index = clone_Index(idmap->index);
|
|
126
123
|
return res;
|
|
127
|
-
} else if (const IndexHNSW
|
|
128
|
-
|
|
129
|
-
IndexHNSW *res = new IndexHNSW (*ihnsw);
|
|
124
|
+
} else if (const IndexHNSW* ihnsw = dynamic_cast<const IndexHNSW*>(index)) {
|
|
125
|
+
IndexHNSW* res = new IndexHNSW(*ihnsw);
|
|
130
126
|
res->own_fields = true;
|
|
131
|
-
res->storage = clone_Index
|
|
127
|
+
res->storage = clone_Index(ihnsw->storage);
|
|
132
128
|
return res;
|
|
133
|
-
} else if (const
|
|
134
|
-
|
|
135
|
-
|
|
129
|
+
} else if (const IndexNSG* insg = dynamic_cast<const IndexNSG*>(index)) {
|
|
130
|
+
IndexNSG* res = new IndexNSG(*insg);
|
|
131
|
+
|
|
132
|
+
// copy the dynamic allocated graph
|
|
133
|
+
auto& new_graph = res->nsg.final_graph;
|
|
134
|
+
auto& old_graph = insg->nsg.final_graph;
|
|
135
|
+
new_graph = std::make_shared<nsg::Graph<int>>(*old_graph);
|
|
136
|
+
|
|
137
|
+
res->own_fields = true;
|
|
138
|
+
res->storage = clone_Index(insg->storage);
|
|
139
|
+
return res;
|
|
140
|
+
} else if (
|
|
141
|
+
const Index2Layer* i2l = dynamic_cast<const Index2Layer*>(index)) {
|
|
142
|
+
Index2Layer* res = new Index2Layer(*i2l);
|
|
136
143
|
res->q1.own_fields = true;
|
|
137
|
-
res->q1.quantizer = clone_Index
|
|
144
|
+
res->q1.quantizer = clone_Index(i2l->q1.quantizer);
|
|
138
145
|
return res;
|
|
139
146
|
} else {
|
|
140
|
-
FAISS_THROW_MSG(
|
|
147
|
+
FAISS_THROW_MSG("clone not supported for this type of Index");
|
|
141
148
|
}
|
|
142
149
|
return nullptr;
|
|
143
150
|
}
|
|
144
151
|
|
|
145
|
-
|
|
146
|
-
|
|
147
152
|
} // namespace faiss
|
|
@@ -11,28 +11,23 @@
|
|
|
11
11
|
|
|
12
12
|
#pragma once
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
14
|
namespace faiss {
|
|
17
15
|
|
|
18
16
|
struct Index;
|
|
19
17
|
struct IndexIVF;
|
|
20
18
|
struct VectorTransform;
|
|
21
19
|
|
|
22
|
-
|
|
23
20
|
/* cloning functions */
|
|
24
|
-
Index
|
|
21
|
+
Index* clone_index(const Index*);
|
|
25
22
|
|
|
26
23
|
/** Cloner class, useful to override classes with other cloning
|
|
27
24
|
* functions. The cloning function above just calls
|
|
28
25
|
* Cloner::clone_Index. */
|
|
29
26
|
struct Cloner {
|
|
30
|
-
virtual VectorTransform
|
|
31
|
-
virtual Index
|
|
32
|
-
virtual IndexIVF
|
|
27
|
+
virtual VectorTransform* clone_VectorTransform(const VectorTransform*);
|
|
28
|
+
virtual Index* clone_Index(const Index*);
|
|
29
|
+
virtual IndexIVF* clone_IndexIVF(const IndexIVF*);
|
|
33
30
|
virtual ~Cloner() {}
|
|
34
31
|
};
|
|
35
32
|
|
|
36
|
-
|
|
37
|
-
|
|
38
33
|
} // namespace faiss
|