faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,322 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #ifndef FAISS_VECTOR_TRANSFORM_H
11
+ #define FAISS_VECTOR_TRANSFORM_H
12
+
13
+ /** Defines a few objects that apply transformations to a set of
14
+ * vectors Often these are pre-processing steps.
15
+ */
16
+
17
+ #include <vector>
18
+ #include <stdint.h>
19
+
20
+ #include <faiss/Index.h>
21
+
22
+
23
+ namespace faiss {
24
+
25
+
26
+ /** Any transformation applied on a set of vectors */
27
+ struct VectorTransform {
28
+
29
+ typedef Index::idx_t idx_t;
30
+
31
+ int d_in; ///! input dimension
32
+ int d_out; ///! output dimension
33
+
34
+ explicit VectorTransform (int d_in = 0, int d_out = 0):
35
+ d_in(d_in), d_out(d_out), is_trained(true)
36
+ {}
37
+
38
+
39
+ /// set if the VectorTransform does not require training, or if
40
+ /// training is done already
41
+ bool is_trained;
42
+
43
+
44
+ /** Perform training on a representative set of vectors. Does
45
+ * nothing by default.
46
+ *
47
+ * @param n nb of training vectors
48
+ * @param x training vecors, size n * d
49
+ */
50
+ virtual void train (idx_t n, const float *x);
51
+
52
+ /** apply the random roation, return new allocated matrix
53
+ * @param x size n * d_in
54
+ * @return size n * d_out
55
+ */
56
+ float *apply (idx_t n, const float * x) const;
57
+
58
+ /// same as apply, but result is pre-allocated
59
+ virtual void apply_noalloc (idx_t n, const float * x,
60
+ float *xt) const = 0;
61
+
62
+ /// reverse transformation. May not be implemented or may return
63
+ /// approximate result
64
+ virtual void reverse_transform (idx_t n, const float * xt,
65
+ float *x) const;
66
+
67
+ virtual ~VectorTransform () {}
68
+
69
+ };
70
+
71
+
72
+
73
+ /** Generic linear transformation, with bias term applied on output
74
+ * y = A * x + b
75
+ */
76
+ struct LinearTransform: VectorTransform {
77
+
78
+ bool have_bias; ///! whether to use the bias term
79
+
80
+ /// check if matrix A is orthonormal (enables reverse_transform)
81
+ bool is_orthonormal;
82
+
83
+ /// Transformation matrix, size d_out * d_in
84
+ std::vector<float> A;
85
+
86
+ /// bias vector, size d_out
87
+ std::vector<float> b;
88
+
89
+ /// both d_in > d_out and d_out < d_in are supported
90
+ explicit LinearTransform (int d_in = 0, int d_out = 0,
91
+ bool have_bias = false);
92
+
93
+ /// same as apply, but result is pre-allocated
94
+ void apply_noalloc(idx_t n, const float* x, float* xt) const override;
95
+
96
+ /// compute x = A^T * (x - b)
97
+ /// is reverse transform if A has orthonormal lines
98
+ void transform_transpose (idx_t n, const float * y,
99
+ float *x) const;
100
+
101
+ /// works only if is_orthonormal
102
+ void reverse_transform (idx_t n, const float * xt,
103
+ float *x) const override;
104
+
105
+ /// compute A^T * A to set the is_orthonormal flag
106
+ void set_is_orthonormal ();
107
+
108
+ bool verbose;
109
+ void print_if_verbose (const char*name, const std::vector<double> &mat,
110
+ int n, int d) const;
111
+
112
+ ~LinearTransform() override {}
113
+ };
114
+
115
+
116
+
117
+ /// Randomly rotate a set of vectors
118
+ struct RandomRotationMatrix: LinearTransform {
119
+
120
+ /// both d_in > d_out and d_out < d_in are supported
121
+ RandomRotationMatrix (int d_in, int d_out):
122
+ LinearTransform(d_in, d_out, false) {}
123
+
124
+ /// must be called before the transform is used
125
+ void init(int seed);
126
+
127
+ // intializes with an arbitrary seed
128
+ void train(idx_t n, const float* x) override;
129
+
130
+ RandomRotationMatrix () {}
131
+ };
132
+
133
+
134
+ /** Applies a principal component analysis on a set of vectors,
135
+ * with optionally whitening and random rotation. */
136
+ struct PCAMatrix: LinearTransform {
137
+
138
+ /** after transformation the components are multiplied by
139
+ * eigenvalues^eigen_power
140
+ *
141
+ * =0: no whitening
142
+ * =-0.5: full whitening
143
+ */
144
+ float eigen_power;
145
+
146
+ /// random rotation after PCA
147
+ bool random_rotation;
148
+
149
+ /// ratio between # training vectors and dimension
150
+ size_t max_points_per_d;
151
+
152
+ /// try to distribute output eigenvectors in this many bins
153
+ int balanced_bins;
154
+
155
+ /// Mean, size d_in
156
+ std::vector<float> mean;
157
+
158
+ /// eigenvalues of covariance matrix (= squared singular values)
159
+ std::vector<float> eigenvalues;
160
+
161
+ /// PCA matrix, size d_in * d_in
162
+ std::vector<float> PCAMat;
163
+
164
+ // the final matrix is computed after random rotation and/or whitening
165
+ explicit PCAMatrix (int d_in = 0, int d_out = 0,
166
+ float eigen_power = 0, bool random_rotation = false);
167
+
168
+ /// train on n vectors. If n < d_in then the eigenvector matrix
169
+ /// will be completed with 0s
170
+ void train(idx_t n, const float* x) override;
171
+
172
+ /// copy pre-trained PCA matrix
173
+ void copy_from (const PCAMatrix & other);
174
+
175
+ /// called after mean, PCAMat and eigenvalues are computed
176
+ void prepare_Ab();
177
+
178
+ };
179
+
180
+
181
+ /** ITQ implementation from
182
+ *
183
+ * Iterative quantization: A procrustean approach to learning binary codes
184
+ * for large-scale image retrieval,
185
+ *
186
+ * Yunchao Gong, Svetlana Lazebnik, Albert Gordo, Florent Perronnin,
187
+ * PAMI'12.
188
+ */
189
+
190
+ struct ITQMatrix: LinearTransform {
191
+
192
+ int max_iter;
193
+ int seed;
194
+
195
+ // force initialization of the rotation (for debugging)
196
+ std::vector<double> init_rotation;
197
+
198
+ explicit ITQMatrix (int d = 0);
199
+
200
+ void train (idx_t n, const float* x) override;
201
+ };
202
+
203
+
204
+
205
+ /** The full ITQ transform, including normalizations and PCA transformation
206
+ */
207
+ struct ITQTransform: VectorTransform {
208
+
209
+ std::vector<float> mean;
210
+ bool do_pca;
211
+ ITQMatrix itq;
212
+
213
+ /// max training points per dimension
214
+ int max_train_per_dim;
215
+
216
+ // concatenation of PCA + ITQ transformation
217
+ LinearTransform pca_then_itq;
218
+
219
+ explicit ITQTransform (int d_in = 0, int d_out = 0, bool do_pca = false);
220
+
221
+ void train (idx_t n, const float *x) override;
222
+
223
+ void apply_noalloc (idx_t n, const float* x, float* xt) const override;
224
+
225
+ };
226
+
227
+
228
+ struct ProductQuantizer;
229
+
230
+ /** Applies a rotation to align the dimensions with a PQ to minimize
231
+ * the reconstruction error. Can be used before an IndexPQ or an
232
+ * IndexIVFPQ. The method is the non-parametric version described in:
233
+ *
234
+ * "Optimized Product Quantization for Approximate Nearest Neighbor Search"
235
+ * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
236
+ *
237
+ */
238
+ struct OPQMatrix: LinearTransform {
239
+
240
+ int M; ///< nb of subquantizers
241
+ int niter; ///< Number of outer training iterations
242
+ int niter_pq; ///< Number of training iterations for the PQ
243
+ int niter_pq_0; ///< same, for the first outer iteration
244
+
245
+ /// if there are too many training points, resample
246
+ size_t max_train_points;
247
+ bool verbose;
248
+
249
+ /// if non-NULL, use this product quantizer for training
250
+ /// should be constructed with (d_out, M, _)
251
+ ProductQuantizer * pq;
252
+
253
+ /// if d2 != -1, output vectors of this dimension
254
+ explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
255
+
256
+ void train(idx_t n, const float* x) override;
257
+ };
258
+
259
+
260
+ /** remap dimensions for intput vectors, possibly inserting 0s
261
+ * strictly speaking this is also a linear transform but we don't want
262
+ * to compute it with matrix multiplies */
263
+ struct RemapDimensionsTransform: VectorTransform {
264
+
265
+ /// map from output dimension to input, size d_out
266
+ /// -1 -> set output to 0
267
+ std::vector<int> map;
268
+
269
+ RemapDimensionsTransform (int d_in, int d_out, const int *map);
270
+
271
+ /// remap input to output, skipping or inserting dimensions as needed
272
+ /// if uniform: distribute dimensions uniformly
273
+ /// otherwise just take the d_out first ones.
274
+ RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
275
+
276
+ void apply_noalloc(idx_t n, const float* x, float* xt) const override;
277
+
278
+ /// reverse transform correct only when the mapping is a permutation
279
+ void reverse_transform(idx_t n, const float* xt, float* x) const override;
280
+
281
+ RemapDimensionsTransform () {}
282
+ };
283
+
284
+
285
+ /** per-vector normalization */
286
+ struct NormalizationTransform: VectorTransform {
287
+ float norm;
288
+
289
+ explicit NormalizationTransform (int d, float norm = 2.0);
290
+ NormalizationTransform ();
291
+
292
+ void apply_noalloc(idx_t n, const float* x, float* xt) const override;
293
+
294
+ /// Identity transform since norm is not revertible
295
+ void reverse_transform(idx_t n, const float* xt, float* x) const override;
296
+ };
297
+
298
+ /** Subtract the mean of each component from the vectors. */
299
+ struct CenteringTransform: VectorTransform {
300
+
301
+ /// Mean, size d_in = d_out
302
+ std::vector<float> mean;
303
+
304
+ explicit CenteringTransform (int d = 0);
305
+
306
+ /// train on n vectors.
307
+ void train(idx_t n, const float* x) override;
308
+
309
+ /// subtract the mean
310
+ void apply_noalloc(idx_t n, const float* x, float* xt) const override;
311
+
312
+ /// add the mean
313
+ void reverse_transform (idx_t n, const float * xt,
314
+ float *x) const override;
315
+
316
+ };
317
+
318
+
319
+ } // namespace faiss
320
+
321
+
322
+ #endif
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // Copyright 2004-present Facebook. All Rights Reserved.
9
+ // -*- c++ -*-
10
+
11
+ #include <cstring>
12
+ #include "AutoTune.h"
13
+ #include "AutoTune_c.h"
14
+ #include "macros_impl.h"
15
+
16
+ using faiss::Index;
17
+ using faiss::ParameterRange;
18
+ using faiss::ParameterSpace;
19
+
20
+ const char* faiss_ParameterRange_name(const FaissParameterRange* range) {
21
+ return reinterpret_cast<const ParameterRange*>(range)->name.c_str();
22
+ }
23
+
24
+ void faiss_ParameterRange_values(FaissParameterRange* range, double** p_values, size_t* p_size) {
25
+ auto& values = reinterpret_cast<ParameterRange*>(range)->values;
26
+ *p_values = values.data();
27
+ *p_size = values.size();
28
+ }
29
+
30
+ int faiss_ParameterSpace_new(FaissParameterSpace** space) {
31
+ try {
32
+ auto new_space = new ParameterSpace();
33
+ *space = reinterpret_cast<FaissParameterSpace*>(new_space);
34
+ } CATCH_AND_HANDLE
35
+ }
36
+
37
+ DEFINE_DESTRUCTOR(ParameterSpace)
38
+
39
+ size_t faiss_ParameterSpace_n_combinations(const FaissParameterSpace* space) {
40
+ return reinterpret_cast<const ParameterSpace*>(space)->n_combinations();
41
+ }
42
+
43
+ int faiss_ParameterSpace_combination_name(const FaissParameterSpace* space, size_t cno, char* char_buffer, size_t size) {
44
+ try {
45
+ auto rep = reinterpret_cast<const ParameterSpace*>(space)->combination_name(cno);
46
+ strncpy(char_buffer, rep.c_str(), size);
47
+ } CATCH_AND_HANDLE
48
+ }
49
+
50
+ int faiss_ParameterSpace_set_index_parameters(const FaissParameterSpace* space, FaissIndex* cindex, const char* param_string) {
51
+ try {
52
+ auto index = reinterpret_cast<Index*>(cindex);
53
+ reinterpret_cast<const ParameterSpace*>(space)->set_index_parameters(index, param_string);
54
+ } CATCH_AND_HANDLE
55
+ }
56
+
57
+ /// set a combination of parameters on an index
58
+ int faiss_ParameterSpace_set_index_parameters_cno(const FaissParameterSpace* space, FaissIndex* cindex, size_t cno) {
59
+ try {
60
+ auto index = reinterpret_cast<Index*>(cindex);
61
+ reinterpret_cast<const ParameterSpace*>(space)->set_index_parameters(index, cno);
62
+ } CATCH_AND_HANDLE
63
+ }
64
+
65
+ int faiss_ParameterSpace_set_index_parameter(const FaissParameterSpace* space, FaissIndex* cindex, const char * name, double value) {
66
+ try {
67
+ auto index = reinterpret_cast<Index*>(cindex);
68
+ reinterpret_cast<const ParameterSpace*>(space)->set_index_parameter(index, name, value);
69
+ } CATCH_AND_HANDLE
70
+ }
71
+
72
+ void faiss_ParameterSpace_display(const FaissParameterSpace* space) {
73
+ reinterpret_cast<const ParameterSpace*>(space)->display();
74
+ }
75
+
76
+ int faiss_ParameterSpace_add_range(FaissParameterSpace* space, const char* name, FaissParameterRange** p_range) {
77
+ try {
78
+ ParameterRange& range = reinterpret_cast<ParameterSpace*>(space)->add_range(name);
79
+ if (p_range) {
80
+ *p_range = reinterpret_cast<FaissParameterRange*>(&range);
81
+ }
82
+ } CATCH_AND_HANDLE
83
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // Copyright 2004-present Facebook. All Rights Reserved.
9
+ // -*- c -*-
10
+
11
+ #ifndef FAISS_AUTO_TUNE_C_H
12
+ #define FAISS_AUTO_TUNE_C_H
13
+
14
+ #include "faiss_c.h"
15
+ #include "Index_c.h"
16
+
17
+ #ifdef __cplusplus
18
+ extern "C" {
19
+ #endif
20
+
21
+ /// possible values of a parameter, sorted from least to most expensive/accurate
22
+ FAISS_DECLARE_CLASS(ParameterRange)
23
+
24
+ FAISS_DECLARE_GETTER(ParameterRange, const char*, name)
25
+
26
+ /// Getter for the values in the range. The output values are invalidated
27
+ /// upon any other modification of the range.
28
+ void faiss_ParameterRange_values(FaissParameterRange*, double**, size_t*);
29
+
30
+ /** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
31
+ */
32
+ FAISS_DECLARE_CLASS(ParameterSpace)
33
+
34
+ /// Parameter space default constructor
35
+ int faiss_ParameterSpace_new(FaissParameterSpace** space);
36
+
37
+ /// nb of combinations, = product of values sizes
38
+ size_t faiss_ParameterSpace_n_combinations(const FaissParameterSpace*);
39
+
40
+ /// get string representation of the combination
41
+ /// by writing it to the given character buffer.
42
+ /// A buffer size of 1000 ensures that the full name is collected.
43
+ int faiss_ParameterSpace_combination_name(const FaissParameterSpace*, size_t, char*, size_t);
44
+
45
+ /// set a combination of parameters described by a string
46
+ int faiss_ParameterSpace_set_index_parameters(const FaissParameterSpace*, FaissIndex*, const char *);
47
+
48
+ /// set a combination of parameters on an index
49
+ int faiss_ParameterSpace_set_index_parameters_cno(const FaissParameterSpace*, FaissIndex*, size_t);
50
+
51
+ /// set one of the parameters
52
+ int faiss_ParameterSpace_set_index_parameter(const FaissParameterSpace*, FaissIndex*, const char *, double);
53
+
54
+ /// print a description on stdout
55
+ void faiss_ParameterSpace_display(const FaissParameterSpace*);
56
+
57
+ /// add a new parameter (or return it if it exists)
58
+ int faiss_ParameterSpace_add_range(FaissParameterSpace*, const char*, FaissParameterRange**);
59
+
60
+ #ifdef __cplusplus
61
+ }
62
+ #endif
63
+
64
+ #endif