faiss 0.1.5 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +12 -0
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +6 -2
  6. data/ext/faiss/index.cpp +114 -43
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss.rb +0 -5
  15. data/lib/faiss/version.rb +1 -1
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +24 -10
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -14,67 +14,55 @@
14
14
  * vectors Often these are pre-processing steps.
15
15
  */
16
16
 
17
- #include <vector>
18
17
  #include <stdint.h>
18
+ #include <vector>
19
19
 
20
20
  #include <faiss/Index.h>
21
21
 
22
-
23
22
  namespace faiss {
24
23
 
25
-
26
24
  /** Any transformation applied on a set of vectors */
27
25
  struct VectorTransform {
28
-
29
26
  typedef Index::idx_t idx_t;
30
27
 
31
- int d_in; ///! input dimension
32
- int d_out; ///! output dimension
33
-
34
- explicit VectorTransform (int d_in = 0, int d_out = 0):
35
- d_in(d_in), d_out(d_out), is_trained(true)
36
- {}
28
+ int d_in; ///! input dimension
29
+ int d_out; ///! output dimension
37
30
 
31
+ explicit VectorTransform(int d_in = 0, int d_out = 0)
32
+ : d_in(d_in), d_out(d_out), is_trained(true) {}
38
33
 
39
34
  /// set if the VectorTransform does not require training, or if
40
35
  /// training is done already
41
36
  bool is_trained;
42
37
 
43
-
44
38
  /** Perform training on a representative set of vectors. Does
45
39
  * nothing by default.
46
40
  *
47
41
  * @param n nb of training vectors
48
42
  * @param x training vecors, size n * d
49
43
  */
50
- virtual void train (idx_t n, const float *x);
44
+ virtual void train(idx_t n, const float* x);
51
45
 
52
- /** apply the random roation, return new allocated matrix
46
+ /** apply the random rotation, return new allocated matrix
53
47
  * @param x size n * d_in
54
48
  * @return size n * d_out
55
49
  */
56
- float *apply (idx_t n, const float * x) const;
50
+ float* apply(idx_t n, const float* x) const;
57
51
 
58
52
  /// same as apply, but result is pre-allocated
59
- virtual void apply_noalloc (idx_t n, const float * x,
60
- float *xt) const = 0;
53
+ virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
61
54
 
62
55
  /// reverse transformation. May not be implemented or may return
63
56
  /// approximate result
64
- virtual void reverse_transform (idx_t n, const float * xt,
65
- float *x) const;
66
-
67
- virtual ~VectorTransform () {}
57
+ virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
68
58
 
59
+ virtual ~VectorTransform() {}
69
60
  };
70
61
 
71
-
72
-
73
62
  /** Generic linear transformation, with bias term applied on output
74
63
  * y = A * x + b
75
64
  */
76
- struct LinearTransform: VectorTransform {
77
-
65
+ struct LinearTransform : VectorTransform {
78
66
  bool have_bias; ///! whether to use the bias term
79
67
 
80
68
  /// check if matrix A is orthonormal (enables reverse_transform)
@@ -83,58 +71,56 @@ struct LinearTransform: VectorTransform {
83
71
  /// Transformation matrix, size d_out * d_in
84
72
  std::vector<float> A;
85
73
 
86
- /// bias vector, size d_out
74
+ /// bias vector, size d_out
87
75
  std::vector<float> b;
88
76
 
89
77
  /// both d_in > d_out and d_out < d_in are supported
90
- explicit LinearTransform (int d_in = 0, int d_out = 0,
91
- bool have_bias = false);
78
+ explicit LinearTransform(
79
+ int d_in = 0,
80
+ int d_out = 0,
81
+ bool have_bias = false);
92
82
 
93
83
  /// same as apply, but result is pre-allocated
94
84
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
95
85
 
96
86
  /// compute x = A^T * (x - b)
97
87
  /// is reverse transform if A has orthonormal lines
98
- void transform_transpose (idx_t n, const float * y,
99
- float *x) const;
88
+ void transform_transpose(idx_t n, const float* y, float* x) const;
100
89
 
101
90
  /// works only if is_orthonormal
102
- void reverse_transform (idx_t n, const float * xt,
103
- float *x) const override;
91
+ void reverse_transform(idx_t n, const float* xt, float* x) const override;
104
92
 
105
93
  /// compute A^T * A to set the is_orthonormal flag
106
- void set_is_orthonormal ();
94
+ void set_is_orthonormal();
107
95
 
108
96
  bool verbose;
109
- void print_if_verbose (const char*name, const std::vector<double> &mat,
110
- int n, int d) const;
97
+ void print_if_verbose(
98
+ const char* name,
99
+ const std::vector<double>& mat,
100
+ int n,
101
+ int d) const;
111
102
 
112
103
  ~LinearTransform() override {}
113
104
  };
114
105
 
115
-
116
-
117
106
  /// Randomly rotate a set of vectors
118
- struct RandomRotationMatrix: LinearTransform {
119
-
120
- /// both d_in > d_out and d_out < d_in are supported
121
- RandomRotationMatrix (int d_in, int d_out):
122
- LinearTransform(d_in, d_out, false) {}
107
+ struct RandomRotationMatrix : LinearTransform {
108
+ /// both d_in > d_out and d_out < d_in are supported
109
+ RandomRotationMatrix(int d_in, int d_out)
110
+ : LinearTransform(d_in, d_out, false) {}
123
111
 
124
- /// must be called before the transform is used
125
- void init(int seed);
112
+ /// must be called before the transform is used
113
+ void init(int seed);
126
114
 
127
- // intializes with an arbitrary seed
128
- void train(idx_t n, const float* x) override;
115
+ // intializes with an arbitrary seed
116
+ void train(idx_t n, const float* x) override;
129
117
 
130
- RandomRotationMatrix () {}
118
+ RandomRotationMatrix() {}
131
119
  };
132
120
 
133
-
134
121
  /** Applies a principal component analysis on a set of vectors,
135
122
  * with optionally whitening and random rotation. */
136
- struct PCAMatrix: LinearTransform {
137
-
123
+ struct PCAMatrix : LinearTransform {
138
124
  /** after transformation the components are multiplied by
139
125
  * eigenvalues^eigen_power
140
126
  *
@@ -162,22 +148,23 @@ struct PCAMatrix: LinearTransform {
162
148
  std::vector<float> PCAMat;
163
149
 
164
150
  // the final matrix is computed after random rotation and/or whitening
165
- explicit PCAMatrix (int d_in = 0, int d_out = 0,
166
- float eigen_power = 0, bool random_rotation = false);
151
+ explicit PCAMatrix(
152
+ int d_in = 0,
153
+ int d_out = 0,
154
+ float eigen_power = 0,
155
+ bool random_rotation = false);
167
156
 
168
157
  /// train on n vectors. If n < d_in then the eigenvector matrix
169
158
  /// will be completed with 0s
170
159
  void train(idx_t n, const float* x) override;
171
160
 
172
161
  /// copy pre-trained PCA matrix
173
- void copy_from (const PCAMatrix & other);
162
+ void copy_from(const PCAMatrix& other);
174
163
 
175
164
  /// called after mean, PCAMat and eigenvalues are computed
176
165
  void prepare_Ab();
177
-
178
166
  };
179
167
 
180
-
181
168
  /** ITQ implementation from
182
169
  *
183
170
  * Iterative quantization: A procrustean approach to learning binary codes
@@ -187,25 +174,21 @@ struct PCAMatrix: LinearTransform {
187
174
  * PAMI'12.
188
175
  */
189
176
 
190
- struct ITQMatrix: LinearTransform {
191
-
177
+ struct ITQMatrix : LinearTransform {
192
178
  int max_iter;
193
179
  int seed;
194
180
 
195
181
  // force initialization of the rotation (for debugging)
196
182
  std::vector<double> init_rotation;
197
183
 
198
- explicit ITQMatrix (int d = 0);
184
+ explicit ITQMatrix(int d = 0);
199
185
 
200
- void train (idx_t n, const float* x) override;
186
+ void train(idx_t n, const float* x) override;
201
187
  };
202
188
 
203
-
204
-
205
189
  /** The full ITQ transform, including normalizations and PCA transformation
206
190
  */
207
- struct ITQTransform: VectorTransform {
208
-
191
+ struct ITQTransform : VectorTransform {
209
192
  std::vector<float> mean;
210
193
  bool do_pca;
211
194
  ITQMatrix itq;
@@ -216,15 +199,13 @@ struct ITQTransform: VectorTransform {
216
199
  // concatenation of PCA + ITQ transformation
217
200
  LinearTransform pca_then_itq;
218
201
 
219
- explicit ITQTransform (int d_in = 0, int d_out = 0, bool do_pca = false);
220
-
221
- void train (idx_t n, const float *x) override;
202
+ explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
222
203
 
223
- void apply_noalloc (idx_t n, const float* x, float* xt) const override;
204
+ void train(idx_t n, const float* x) override;
224
205
 
206
+ void apply_noalloc(idx_t n, const float* x, float* xt) const override;
225
207
  };
226
208
 
227
-
228
209
  struct ProductQuantizer;
229
210
 
230
211
  /** Applies a rotation to align the dimensions with a PQ to minimize
@@ -235,8 +216,7 @@ struct ProductQuantizer;
235
216
  * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
236
217
  *
237
218
  */
238
- struct OPQMatrix: LinearTransform {
239
-
219
+ struct OPQMatrix : LinearTransform {
240
220
  int M; ///< nb of subquantizers
241
221
  int niter; ///< Number of outer training iterations
242
222
  int niter_pq; ///< Number of training iterations for the PQ
@@ -248,46 +228,43 @@ struct OPQMatrix: LinearTransform {
248
228
 
249
229
  /// if non-NULL, use this product quantizer for training
250
230
  /// should be constructed with (d_out, M, _)
251
- ProductQuantizer * pq;
231
+ ProductQuantizer* pq;
252
232
 
253
233
  /// if d2 != -1, output vectors of this dimension
254
- explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
234
+ explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
255
235
 
256
236
  void train(idx_t n, const float* x) override;
257
237
  };
258
238
 
259
-
260
239
  /** remap dimensions for intput vectors, possibly inserting 0s
261
240
  * strictly speaking this is also a linear transform but we don't want
262
241
  * to compute it with matrix multiplies */
263
- struct RemapDimensionsTransform: VectorTransform {
264
-
242
+ struct RemapDimensionsTransform : VectorTransform {
265
243
  /// map from output dimension to input, size d_out
266
244
  /// -1 -> set output to 0
267
245
  std::vector<int> map;
268
246
 
269
- RemapDimensionsTransform (int d_in, int d_out, const int *map);
247
+ RemapDimensionsTransform(int d_in, int d_out, const int* map);
270
248
 
271
249
  /// remap input to output, skipping or inserting dimensions as needed
272
250
  /// if uniform: distribute dimensions uniformly
273
251
  /// otherwise just take the d_out first ones.
274
- RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
252
+ RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
275
253
 
276
254
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
277
255
 
278
256
  /// reverse transform correct only when the mapping is a permutation
279
257
  void reverse_transform(idx_t n, const float* xt, float* x) const override;
280
258
 
281
- RemapDimensionsTransform () {}
259
+ RemapDimensionsTransform() {}
282
260
  };
283
261
 
284
-
285
262
  /** per-vector normalization */
286
- struct NormalizationTransform: VectorTransform {
263
+ struct NormalizationTransform : VectorTransform {
287
264
  float norm;
288
265
 
289
- explicit NormalizationTransform (int d, float norm = 2.0);
290
- NormalizationTransform ();
266
+ explicit NormalizationTransform(int d, float norm = 2.0);
267
+ NormalizationTransform();
291
268
 
292
269
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
293
270
 
@@ -296,12 +273,11 @@ struct NormalizationTransform: VectorTransform {
296
273
  };
297
274
 
298
275
  /** Subtract the mean of each component from the vectors. */
299
- struct CenteringTransform: VectorTransform {
300
-
276
+ struct CenteringTransform : VectorTransform {
301
277
  /// Mean, size d_in = d_out
302
278
  std::vector<float> mean;
303
279
 
304
- explicit CenteringTransform (int d = 0);
280
+ explicit CenteringTransform(int d = 0);
305
281
 
306
282
  /// train on n vectors.
307
283
  void train(idx_t n, const float* x) override;
@@ -310,13 +286,9 @@ struct CenteringTransform: VectorTransform {
310
286
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
311
287
 
312
288
  /// add the mean
313
- void reverse_transform (idx_t n, const float * xt,
314
- float *x) const override;
315
-
289
+ void reverse_transform(idx_t n, const float* xt, float* x) const override;
316
290
  };
317
291
 
318
-
319
292
  } // namespace faiss
320
293
 
321
-
322
294
  #endif
@@ -14,22 +14,23 @@
14
14
 
15
15
  #include <faiss/impl/FaissAssert.h>
16
16
 
17
+ #include <faiss/Index2Layer.h>
17
18
  #include <faiss/IndexFlat.h>
18
- #include <faiss/VectorTransform.h>
19
- #include <faiss/IndexPreTransform.h>
20
- #include <faiss/IndexLSH.h>
21
- #include <faiss/IndexPQ.h>
19
+ #include <faiss/IndexHNSW.h>
22
20
  #include <faiss/IndexIVF.h>
21
+ #include <faiss/IndexIVFFlat.h>
23
22
  #include <faiss/IndexIVFPQ.h>
24
23
  #include <faiss/IndexIVFPQR.h>
25
- #include <faiss/Index2Layer.h>
26
- #include <faiss/IndexIVFFlat.h>
27
24
  #include <faiss/IndexIVFSpectralHash.h>
28
- #include <faiss/MetaIndexes.h>
29
- #include <faiss/IndexScalarQuantizer.h>
30
- #include <faiss/IndexHNSW.h>
25
+ #include <faiss/IndexLSH.h>
31
26
  #include <faiss/IndexLattice.h>
32
- #include <faiss/Index2Layer.h>
27
+ #include <faiss/IndexNSG.h>
28
+ #include <faiss/IndexPQ.h>
29
+ #include <faiss/IndexPreTransform.h>
30
+ #include <faiss/IndexResidual.h>
31
+ #include <faiss/IndexScalarQuantizer.h>
32
+ #include <faiss/MetaIndexes.h>
33
+ #include <faiss/VectorTransform.h>
33
34
 
34
35
  namespace faiss {
35
36
 
@@ -37,111 +38,114 @@ namespace faiss {
37
38
  * cloning functions
38
39
  **************************************************************/
39
40
 
40
-
41
-
42
- Index * clone_index (const Index *index)
43
- {
41
+ Index* clone_index(const Index* index) {
44
42
  Cloner cl;
45
- return cl.clone_Index (index);
43
+ return cl.clone_Index(index);
46
44
  }
47
45
 
48
46
  // assumes there is a copy constructor ready. Always try from most
49
47
  // specific to most general. Most indexes don't have complicated
50
48
  // structs, the default copy constructor often just works.
51
- #define TRYCLONE(classname, obj) \
52
- if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
53
- return new classname(*clo); \
49
+ #define TRYCLONE(classname, obj) \
50
+ if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
51
+ return new classname(*clo); \
54
52
  } else
55
53
 
56
- VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
57
- {
58
- TRYCLONE (RemapDimensionsTransform, vt)
59
- TRYCLONE (OPQMatrix, vt)
60
- TRYCLONE (PCAMatrix, vt)
61
- TRYCLONE (ITQMatrix, vt)
62
- TRYCLONE (RandomRotationMatrix, vt)
63
- TRYCLONE (LinearTransform, vt)
64
- {
65
- FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
54
+ VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
55
+ TRYCLONE(RemapDimensionsTransform, vt)
56
+ TRYCLONE(OPQMatrix, vt)
57
+ TRYCLONE(PCAMatrix, vt)
58
+ TRYCLONE(ITQMatrix, vt)
59
+ TRYCLONE(RandomRotationMatrix, vt)
60
+ TRYCLONE(LinearTransform, vt) {
61
+ FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
66
62
  }
67
63
  return nullptr;
68
64
  }
69
65
 
70
- IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
71
- {
72
- TRYCLONE (IndexIVFPQR, ivf)
73
- TRYCLONE (IndexIVFPQ, ivf)
74
- TRYCLONE (IndexIVFFlat, ivf)
75
- TRYCLONE (IndexIVFScalarQuantizer, ivf)
76
- {
77
- FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
66
+ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
67
+ TRYCLONE(IndexIVFPQR, ivf)
68
+ TRYCLONE(IndexIVFPQ, ivf)
69
+ TRYCLONE(IndexIVFFlat, ivf)
70
+ TRYCLONE(IndexIVFScalarQuantizer, ivf) {
71
+ FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
78
72
  }
79
73
  return nullptr;
80
74
  }
81
75
 
82
- Index *Cloner::clone_Index (const Index *index)
83
- {
84
- TRYCLONE (IndexPQ, index)
85
- TRYCLONE (IndexLSH, index)
86
- TRYCLONE (IndexFlatL2, index)
87
- TRYCLONE (IndexFlatIP, index)
88
- TRYCLONE (IndexFlat, index)
89
- TRYCLONE (IndexLattice, index)
90
- TRYCLONE (IndexScalarQuantizer, index)
91
- TRYCLONE (MultiIndexQuantizer, index)
92
- if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
93
- IndexIVF *res = clone_IndexIVF (ivf);
76
+ Index* Cloner::clone_Index(const Index* index) {
77
+ TRYCLONE(IndexPQ, index)
78
+ TRYCLONE(IndexLSH, index)
79
+ TRYCLONE(IndexFlatL2, index)
80
+ TRYCLONE(IndexFlatIP, index)
81
+ TRYCLONE(IndexFlat, index)
82
+ TRYCLONE(IndexLattice, index)
83
+ TRYCLONE(IndexResidual, index)
84
+ TRYCLONE(IndexScalarQuantizer, index)
85
+ TRYCLONE(MultiIndexQuantizer, index)
86
+ if (const IndexIVF* ivf = dynamic_cast<const IndexIVF*>(index)) {
87
+ IndexIVF* res = clone_IndexIVF(ivf);
94
88
  if (ivf->invlists == nullptr) {
95
89
  res->invlists = nullptr;
96
- } else if (auto *ails = dynamic_cast<const ArrayInvertedLists*>
97
- (ivf->invlists)) {
90
+ } else if (
91
+ auto* ails = dynamic_cast<const ArrayInvertedLists*>(
92
+ ivf->invlists)) {
98
93
  res->invlists = new ArrayInvertedLists(*ails);
99
94
  res->own_invlists = true;
100
95
  } else {
101
- FAISS_THROW_MSG( "clone not supported for this type of inverted lists");
96
+ FAISS_THROW_MSG(
97
+ "clone not supported for this type of inverted lists");
102
98
  }
103
99
  res->own_fields = true;
104
- res->quantizer = clone_Index (ivf->quantizer);
100
+ res->quantizer = clone_Index(ivf->quantizer);
105
101
  return res;
106
- } else if (const IndexPreTransform * ipt =
107
- dynamic_cast<const IndexPreTransform*> (index)) {
108
- IndexPreTransform *res = new IndexPreTransform ();
102
+ } else if (
103
+ const IndexPreTransform* ipt =
104
+ dynamic_cast<const IndexPreTransform*>(index)) {
105
+ IndexPreTransform* res = new IndexPreTransform();
109
106
  res->d = ipt->d;
110
107
  res->ntotal = ipt->ntotal;
111
108
  res->is_trained = ipt->is_trained;
112
109
  res->metric_type = ipt->metric_type;
113
110
  res->metric_arg = ipt->metric_arg;
114
111
 
115
-
116
- res->index = clone_Index (ipt->index);
112
+ res->index = clone_Index(ipt->index);
117
113
  for (int i = 0; i < ipt->chain.size(); i++)
118
- res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
114
+ res->chain.push_back(clone_VectorTransform(ipt->chain[i]));
119
115
  res->own_fields = true;
120
116
  return res;
121
- } else if (const IndexIDMap *idmap =
122
- dynamic_cast<const IndexIDMap*> (index)) {
123
- IndexIDMap *res = new IndexIDMap (*idmap);
117
+ } else if (
118
+ const IndexIDMap* idmap = dynamic_cast<const IndexIDMap*>(index)) {
119
+ IndexIDMap* res = new IndexIDMap(*idmap);
124
120
  res->own_fields = true;
125
- res->index = clone_Index (idmap->index);
121
+ res->index = clone_Index(idmap->index);
126
122
  return res;
127
- } else if (const IndexHNSW *ihnsw =
128
- dynamic_cast<const IndexHNSW*> (index)) {
129
- IndexHNSW *res = new IndexHNSW (*ihnsw);
123
+ } else if (const IndexHNSW* ihnsw = dynamic_cast<const IndexHNSW*>(index)) {
124
+ IndexHNSW* res = new IndexHNSW(*ihnsw);
130
125
  res->own_fields = true;
131
- res->storage = clone_Index (ihnsw->storage);
126
+ res->storage = clone_Index(ihnsw->storage);
132
127
  return res;
133
- } else if (const Index2Layer *i2l =
134
- dynamic_cast<const Index2Layer*> (index)) {
135
- Index2Layer *res = new Index2Layer (*i2l);
128
+ } else if (const IndexNSG* insg = dynamic_cast<const IndexNSG*>(index)) {
129
+ IndexNSG* res = new IndexNSG(*insg);
130
+
131
+ // copy the dynamic allocated graph
132
+ auto& new_graph = res->nsg.final_graph;
133
+ auto& old_graph = insg->nsg.final_graph;
134
+ new_graph = std::make_shared<nsg::Graph<int>>(*old_graph);
135
+
136
+ res->own_fields = true;
137
+ res->storage = clone_Index(insg->storage);
138
+ return res;
139
+ } else if (
140
+ const Index2Layer* i2l = dynamic_cast<const Index2Layer*>(index)) {
141
+ Index2Layer* res = new Index2Layer(*i2l);
136
142
  res->q1.own_fields = true;
137
- res->q1.quantizer = clone_Index (i2l->q1.quantizer);
143
+ res->q1.quantizer = clone_Index(i2l->q1.quantizer);
138
144
  return res;
139
145
  } else {
140
- FAISS_THROW_MSG( "clone not supported for this type of Index");
146
+ FAISS_THROW_MSG("clone not supported for this type of Index");
141
147
  }
142
148
  return nullptr;
143
149
  }
144
150
 
145
-
146
-
147
151
  } // namespace faiss