faiss 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
@@ -0,0 +1,322 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#ifndef FAISS_VECTOR_TRANSFORM_H
|
11
|
+
#define FAISS_VECTOR_TRANSFORM_H
|
12
|
+
|
13
|
+
/** Defines a few objects that apply transformations to a set of
|
14
|
+
* vectors Often these are pre-processing steps.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <vector>
|
18
|
+
#include <stdint.h>
|
19
|
+
|
20
|
+
#include <faiss/Index.h>
|
21
|
+
|
22
|
+
|
23
|
+
namespace faiss {
|
24
|
+
|
25
|
+
|
26
|
+
/** Any transformation applied on a set of vectors */
|
27
|
+
struct VectorTransform {
|
28
|
+
|
29
|
+
typedef Index::idx_t idx_t;
|
30
|
+
|
31
|
+
int d_in; ///! input dimension
|
32
|
+
int d_out; ///! output dimension
|
33
|
+
|
34
|
+
explicit VectorTransform (int d_in = 0, int d_out = 0):
|
35
|
+
d_in(d_in), d_out(d_out), is_trained(true)
|
36
|
+
{}
|
37
|
+
|
38
|
+
|
39
|
+
/// set if the VectorTransform does not require training, or if
|
40
|
+
/// training is done already
|
41
|
+
bool is_trained;
|
42
|
+
|
43
|
+
|
44
|
+
/** Perform training on a representative set of vectors. Does
|
45
|
+
* nothing by default.
|
46
|
+
*
|
47
|
+
* @param n nb of training vectors
|
48
|
+
* @param x training vecors, size n * d
|
49
|
+
*/
|
50
|
+
virtual void train (idx_t n, const float *x);
|
51
|
+
|
52
|
+
/** apply the random roation, return new allocated matrix
|
53
|
+
* @param x size n * d_in
|
54
|
+
* @return size n * d_out
|
55
|
+
*/
|
56
|
+
float *apply (idx_t n, const float * x) const;
|
57
|
+
|
58
|
+
/// same as apply, but result is pre-allocated
|
59
|
+
virtual void apply_noalloc (idx_t n, const float * x,
|
60
|
+
float *xt) const = 0;
|
61
|
+
|
62
|
+
/// reverse transformation. May not be implemented or may return
|
63
|
+
/// approximate result
|
64
|
+
virtual void reverse_transform (idx_t n, const float * xt,
|
65
|
+
float *x) const;
|
66
|
+
|
67
|
+
virtual ~VectorTransform () {}
|
68
|
+
|
69
|
+
};
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
/** Generic linear transformation, with bias term applied on output
|
74
|
+
* y = A * x + b
|
75
|
+
*/
|
76
|
+
struct LinearTransform: VectorTransform {
|
77
|
+
|
78
|
+
bool have_bias; ///! whether to use the bias term
|
79
|
+
|
80
|
+
/// check if matrix A is orthonormal (enables reverse_transform)
|
81
|
+
bool is_orthonormal;
|
82
|
+
|
83
|
+
/// Transformation matrix, size d_out * d_in
|
84
|
+
std::vector<float> A;
|
85
|
+
|
86
|
+
/// bias vector, size d_out
|
87
|
+
std::vector<float> b;
|
88
|
+
|
89
|
+
/// both d_in > d_out and d_out < d_in are supported
|
90
|
+
explicit LinearTransform (int d_in = 0, int d_out = 0,
|
91
|
+
bool have_bias = false);
|
92
|
+
|
93
|
+
/// same as apply, but result is pre-allocated
|
94
|
+
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
95
|
+
|
96
|
+
/// compute x = A^T * (x - b)
|
97
|
+
/// is reverse transform if A has orthonormal lines
|
98
|
+
void transform_transpose (idx_t n, const float * y,
|
99
|
+
float *x) const;
|
100
|
+
|
101
|
+
/// works only if is_orthonormal
|
102
|
+
void reverse_transform (idx_t n, const float * xt,
|
103
|
+
float *x) const override;
|
104
|
+
|
105
|
+
/// compute A^T * A to set the is_orthonormal flag
|
106
|
+
void set_is_orthonormal ();
|
107
|
+
|
108
|
+
bool verbose;
|
109
|
+
void print_if_verbose (const char*name, const std::vector<double> &mat,
|
110
|
+
int n, int d) const;
|
111
|
+
|
112
|
+
~LinearTransform() override {}
|
113
|
+
};
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
/// Randomly rotate a set of vectors
|
118
|
+
struct RandomRotationMatrix: LinearTransform {
|
119
|
+
|
120
|
+
/// both d_in > d_out and d_out < d_in are supported
|
121
|
+
RandomRotationMatrix (int d_in, int d_out):
|
122
|
+
LinearTransform(d_in, d_out, false) {}
|
123
|
+
|
124
|
+
/// must be called before the transform is used
|
125
|
+
void init(int seed);
|
126
|
+
|
127
|
+
// intializes with an arbitrary seed
|
128
|
+
void train(idx_t n, const float* x) override;
|
129
|
+
|
130
|
+
RandomRotationMatrix () {}
|
131
|
+
};
|
132
|
+
|
133
|
+
|
134
|
+
/** Applies a principal component analysis on a set of vectors,
|
135
|
+
* with optionally whitening and random rotation. */
|
136
|
+
struct PCAMatrix: LinearTransform {
|
137
|
+
|
138
|
+
/** after transformation the components are multiplied by
|
139
|
+
* eigenvalues^eigen_power
|
140
|
+
*
|
141
|
+
* =0: no whitening
|
142
|
+
* =-0.5: full whitening
|
143
|
+
*/
|
144
|
+
float eigen_power;
|
145
|
+
|
146
|
+
/// random rotation after PCA
|
147
|
+
bool random_rotation;
|
148
|
+
|
149
|
+
/// ratio between # training vectors and dimension
|
150
|
+
size_t max_points_per_d;
|
151
|
+
|
152
|
+
/// try to distribute output eigenvectors in this many bins
|
153
|
+
int balanced_bins;
|
154
|
+
|
155
|
+
/// Mean, size d_in
|
156
|
+
std::vector<float> mean;
|
157
|
+
|
158
|
+
/// eigenvalues of covariance matrix (= squared singular values)
|
159
|
+
std::vector<float> eigenvalues;
|
160
|
+
|
161
|
+
/// PCA matrix, size d_in * d_in
|
162
|
+
std::vector<float> PCAMat;
|
163
|
+
|
164
|
+
// the final matrix is computed after random rotation and/or whitening
|
165
|
+
explicit PCAMatrix (int d_in = 0, int d_out = 0,
|
166
|
+
float eigen_power = 0, bool random_rotation = false);
|
167
|
+
|
168
|
+
/// train on n vectors. If n < d_in then the eigenvector matrix
|
169
|
+
/// will be completed with 0s
|
170
|
+
void train(idx_t n, const float* x) override;
|
171
|
+
|
172
|
+
/// copy pre-trained PCA matrix
|
173
|
+
void copy_from (const PCAMatrix & other);
|
174
|
+
|
175
|
+
/// called after mean, PCAMat and eigenvalues are computed
|
176
|
+
void prepare_Ab();
|
177
|
+
|
178
|
+
};
|
179
|
+
|
180
|
+
|
181
|
+
/** ITQ implementation from
|
182
|
+
*
|
183
|
+
* Iterative quantization: A procrustean approach to learning binary codes
|
184
|
+
* for large-scale image retrieval,
|
185
|
+
*
|
186
|
+
* Yunchao Gong, Svetlana Lazebnik, Albert Gordo, Florent Perronnin,
|
187
|
+
* PAMI'12.
|
188
|
+
*/
|
189
|
+
|
190
|
+
struct ITQMatrix: LinearTransform {
|
191
|
+
|
192
|
+
int max_iter;
|
193
|
+
int seed;
|
194
|
+
|
195
|
+
// force initialization of the rotation (for debugging)
|
196
|
+
std::vector<double> init_rotation;
|
197
|
+
|
198
|
+
explicit ITQMatrix (int d = 0);
|
199
|
+
|
200
|
+
void train (idx_t n, const float* x) override;
|
201
|
+
};
|
202
|
+
|
203
|
+
|
204
|
+
|
205
|
+
/** The full ITQ transform, including normalizations and PCA transformation
|
206
|
+
*/
|
207
|
+
struct ITQTransform: VectorTransform {
|
208
|
+
|
209
|
+
std::vector<float> mean;
|
210
|
+
bool do_pca;
|
211
|
+
ITQMatrix itq;
|
212
|
+
|
213
|
+
/// max training points per dimension
|
214
|
+
int max_train_per_dim;
|
215
|
+
|
216
|
+
// concatenation of PCA + ITQ transformation
|
217
|
+
LinearTransform pca_then_itq;
|
218
|
+
|
219
|
+
explicit ITQTransform (int d_in = 0, int d_out = 0, bool do_pca = false);
|
220
|
+
|
221
|
+
void train (idx_t n, const float *x) override;
|
222
|
+
|
223
|
+
void apply_noalloc (idx_t n, const float* x, float* xt) const override;
|
224
|
+
|
225
|
+
};
|
226
|
+
|
227
|
+
|
228
|
+
struct ProductQuantizer;
|
229
|
+
|
230
|
+
/** Applies a rotation to align the dimensions with a PQ to minimize
|
231
|
+
* the reconstruction error. Can be used before an IndexPQ or an
|
232
|
+
* IndexIVFPQ. The method is the non-parametric version described in:
|
233
|
+
*
|
234
|
+
* "Optimized Product Quantization for Approximate Nearest Neighbor Search"
|
235
|
+
* Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
|
236
|
+
*
|
237
|
+
*/
|
238
|
+
struct OPQMatrix: LinearTransform {
|
239
|
+
|
240
|
+
int M; ///< nb of subquantizers
|
241
|
+
int niter; ///< Number of outer training iterations
|
242
|
+
int niter_pq; ///< Number of training iterations for the PQ
|
243
|
+
int niter_pq_0; ///< same, for the first outer iteration
|
244
|
+
|
245
|
+
/// if there are too many training points, resample
|
246
|
+
size_t max_train_points;
|
247
|
+
bool verbose;
|
248
|
+
|
249
|
+
/// if non-NULL, use this product quantizer for training
|
250
|
+
/// should be constructed with (d_out, M, _)
|
251
|
+
ProductQuantizer * pq;
|
252
|
+
|
253
|
+
/// if d2 != -1, output vectors of this dimension
|
254
|
+
explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
|
255
|
+
|
256
|
+
void train(idx_t n, const float* x) override;
|
257
|
+
};
|
258
|
+
|
259
|
+
|
260
|
+
/** remap dimensions for intput vectors, possibly inserting 0s
|
261
|
+
* strictly speaking this is also a linear transform but we don't want
|
262
|
+
* to compute it with matrix multiplies */
|
263
|
+
struct RemapDimensionsTransform: VectorTransform {
|
264
|
+
|
265
|
+
/// map from output dimension to input, size d_out
|
266
|
+
/// -1 -> set output to 0
|
267
|
+
std::vector<int> map;
|
268
|
+
|
269
|
+
RemapDimensionsTransform (int d_in, int d_out, const int *map);
|
270
|
+
|
271
|
+
/// remap input to output, skipping or inserting dimensions as needed
|
272
|
+
/// if uniform: distribute dimensions uniformly
|
273
|
+
/// otherwise just take the d_out first ones.
|
274
|
+
RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
|
275
|
+
|
276
|
+
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
277
|
+
|
278
|
+
/// reverse transform correct only when the mapping is a permutation
|
279
|
+
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
280
|
+
|
281
|
+
RemapDimensionsTransform () {}
|
282
|
+
};
|
283
|
+
|
284
|
+
|
285
|
+
/** per-vector normalization */
|
286
|
+
struct NormalizationTransform: VectorTransform {
|
287
|
+
float norm;
|
288
|
+
|
289
|
+
explicit NormalizationTransform (int d, float norm = 2.0);
|
290
|
+
NormalizationTransform ();
|
291
|
+
|
292
|
+
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
293
|
+
|
294
|
+
/// Identity transform since norm is not revertible
|
295
|
+
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
296
|
+
};
|
297
|
+
|
298
|
+
/** Subtract the mean of each component from the vectors. */
|
299
|
+
struct CenteringTransform: VectorTransform {
|
300
|
+
|
301
|
+
/// Mean, size d_in = d_out
|
302
|
+
std::vector<float> mean;
|
303
|
+
|
304
|
+
explicit CenteringTransform (int d = 0);
|
305
|
+
|
306
|
+
/// train on n vectors.
|
307
|
+
void train(idx_t n, const float* x) override;
|
308
|
+
|
309
|
+
/// subtract the mean
|
310
|
+
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
311
|
+
|
312
|
+
/// add the mean
|
313
|
+
void reverse_transform (idx_t n, const float * xt,
|
314
|
+
float *x) const override;
|
315
|
+
|
316
|
+
};
|
317
|
+
|
318
|
+
|
319
|
+
} // namespace faiss
|
320
|
+
|
321
|
+
|
322
|
+
#endif
|
@@ -0,0 +1,83 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// Copyright 2004-present Facebook. All Rights Reserved.
|
9
|
+
// -*- c++ -*-
|
10
|
+
|
11
|
+
#include <cstring>
|
12
|
+
#include "AutoTune.h"
|
13
|
+
#include "AutoTune_c.h"
|
14
|
+
#include "macros_impl.h"
|
15
|
+
|
16
|
+
using faiss::Index;
|
17
|
+
using faiss::ParameterRange;
|
18
|
+
using faiss::ParameterSpace;
|
19
|
+
|
20
|
+
const char* faiss_ParameterRange_name(const FaissParameterRange* range) {
|
21
|
+
return reinterpret_cast<const ParameterRange*>(range)->name.c_str();
|
22
|
+
}
|
23
|
+
|
24
|
+
void faiss_ParameterRange_values(FaissParameterRange* range, double** p_values, size_t* p_size) {
|
25
|
+
auto& values = reinterpret_cast<ParameterRange*>(range)->values;
|
26
|
+
*p_values = values.data();
|
27
|
+
*p_size = values.size();
|
28
|
+
}
|
29
|
+
|
30
|
+
int faiss_ParameterSpace_new(FaissParameterSpace** space) {
|
31
|
+
try {
|
32
|
+
auto new_space = new ParameterSpace();
|
33
|
+
*space = reinterpret_cast<FaissParameterSpace*>(new_space);
|
34
|
+
} CATCH_AND_HANDLE
|
35
|
+
}
|
36
|
+
|
37
|
+
DEFINE_DESTRUCTOR(ParameterSpace)
|
38
|
+
|
39
|
+
size_t faiss_ParameterSpace_n_combinations(const FaissParameterSpace* space) {
|
40
|
+
return reinterpret_cast<const ParameterSpace*>(space)->n_combinations();
|
41
|
+
}
|
42
|
+
|
43
|
+
int faiss_ParameterSpace_combination_name(const FaissParameterSpace* space, size_t cno, char* char_buffer, size_t size) {
|
44
|
+
try {
|
45
|
+
auto rep = reinterpret_cast<const ParameterSpace*>(space)->combination_name(cno);
|
46
|
+
strncpy(char_buffer, rep.c_str(), size);
|
47
|
+
} CATCH_AND_HANDLE
|
48
|
+
}
|
49
|
+
|
50
|
+
int faiss_ParameterSpace_set_index_parameters(const FaissParameterSpace* space, FaissIndex* cindex, const char* param_string) {
|
51
|
+
try {
|
52
|
+
auto index = reinterpret_cast<Index*>(cindex);
|
53
|
+
reinterpret_cast<const ParameterSpace*>(space)->set_index_parameters(index, param_string);
|
54
|
+
} CATCH_AND_HANDLE
|
55
|
+
}
|
56
|
+
|
57
|
+
/// set a combination of parameters on an index
|
58
|
+
int faiss_ParameterSpace_set_index_parameters_cno(const FaissParameterSpace* space, FaissIndex* cindex, size_t cno) {
|
59
|
+
try {
|
60
|
+
auto index = reinterpret_cast<Index*>(cindex);
|
61
|
+
reinterpret_cast<const ParameterSpace*>(space)->set_index_parameters(index, cno);
|
62
|
+
} CATCH_AND_HANDLE
|
63
|
+
}
|
64
|
+
|
65
|
+
int faiss_ParameterSpace_set_index_parameter(const FaissParameterSpace* space, FaissIndex* cindex, const char * name, double value) {
|
66
|
+
try {
|
67
|
+
auto index = reinterpret_cast<Index*>(cindex);
|
68
|
+
reinterpret_cast<const ParameterSpace*>(space)->set_index_parameter(index, name, value);
|
69
|
+
} CATCH_AND_HANDLE
|
70
|
+
}
|
71
|
+
|
72
|
+
void faiss_ParameterSpace_display(const FaissParameterSpace* space) {
|
73
|
+
reinterpret_cast<const ParameterSpace*>(space)->display();
|
74
|
+
}
|
75
|
+
|
76
|
+
int faiss_ParameterSpace_add_range(FaissParameterSpace* space, const char* name, FaissParameterRange** p_range) {
|
77
|
+
try {
|
78
|
+
ParameterRange& range = reinterpret_cast<ParameterSpace*>(space)->add_range(name);
|
79
|
+
if (p_range) {
|
80
|
+
*p_range = reinterpret_cast<FaissParameterRange*>(&range);
|
81
|
+
}
|
82
|
+
} CATCH_AND_HANDLE
|
83
|
+
}
|
@@ -0,0 +1,64 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// Copyright 2004-present Facebook. All Rights Reserved.
|
9
|
+
// -*- c -*-
|
10
|
+
|
11
|
+
#ifndef FAISS_AUTO_TUNE_C_H
|
12
|
+
#define FAISS_AUTO_TUNE_C_H
|
13
|
+
|
14
|
+
#include "faiss_c.h"
|
15
|
+
#include "Index_c.h"
|
16
|
+
|
17
|
+
#ifdef __cplusplus
|
18
|
+
extern "C" {
|
19
|
+
#endif
|
20
|
+
|
21
|
+
/// possible values of a parameter, sorted from least to most expensive/accurate
|
22
|
+
FAISS_DECLARE_CLASS(ParameterRange)
|
23
|
+
|
24
|
+
FAISS_DECLARE_GETTER(ParameterRange, const char*, name)
|
25
|
+
|
26
|
+
/// Getter for the values in the range. The output values are invalidated
|
27
|
+
/// upon any other modification of the range.
|
28
|
+
void faiss_ParameterRange_values(FaissParameterRange*, double**, size_t*);
|
29
|
+
|
30
|
+
/** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
|
31
|
+
*/
|
32
|
+
FAISS_DECLARE_CLASS(ParameterSpace)
|
33
|
+
|
34
|
+
/// Parameter space default constructor
|
35
|
+
int faiss_ParameterSpace_new(FaissParameterSpace** space);
|
36
|
+
|
37
|
+
/// nb of combinations, = product of values sizes
|
38
|
+
size_t faiss_ParameterSpace_n_combinations(const FaissParameterSpace*);
|
39
|
+
|
40
|
+
/// get string representation of the combination
|
41
|
+
/// by writing it to the given character buffer.
|
42
|
+
/// A buffer size of 1000 ensures that the full name is collected.
|
43
|
+
int faiss_ParameterSpace_combination_name(const FaissParameterSpace*, size_t, char*, size_t);
|
44
|
+
|
45
|
+
/// set a combination of parameters described by a string
|
46
|
+
int faiss_ParameterSpace_set_index_parameters(const FaissParameterSpace*, FaissIndex*, const char *);
|
47
|
+
|
48
|
+
/// set a combination of parameters on an index
|
49
|
+
int faiss_ParameterSpace_set_index_parameters_cno(const FaissParameterSpace*, FaissIndex*, size_t);
|
50
|
+
|
51
|
+
/// set one of the parameters
|
52
|
+
int faiss_ParameterSpace_set_index_parameter(const FaissParameterSpace*, FaissIndex*, const char *, double);
|
53
|
+
|
54
|
+
/// print a description on stdout
|
55
|
+
void faiss_ParameterSpace_display(const FaissParameterSpace*);
|
56
|
+
|
57
|
+
/// add a new parameter (or return it if it exists)
|
58
|
+
int faiss_ParameterSpace_add_range(FaissParameterSpace*, const char*, FaissParameterRange**);
|
59
|
+
|
60
|
+
#ifdef __cplusplus
|
61
|
+
}
|
62
|
+
#endif
|
63
|
+
|
64
|
+
#endif
|