faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -14,67 +14,55 @@
14
14
  * vectors Often these are pre-processing steps.
15
15
  */
16
16
 
17
- #include <vector>
18
17
  #include <stdint.h>
18
+ #include <vector>
19
19
 
20
20
  #include <faiss/Index.h>
21
21
 
22
-
23
22
  namespace faiss {
24
23
 
25
-
26
24
  /** Any transformation applied on a set of vectors */
27
25
  struct VectorTransform {
28
-
29
26
  typedef Index::idx_t idx_t;
30
27
 
31
- int d_in; ///! input dimension
32
- int d_out; ///! output dimension
33
-
34
- explicit VectorTransform (int d_in = 0, int d_out = 0):
35
- d_in(d_in), d_out(d_out), is_trained(true)
36
- {}
28
+ int d_in; ///! input dimension
29
+ int d_out; ///! output dimension
37
30
 
31
+ explicit VectorTransform(int d_in = 0, int d_out = 0)
32
+ : d_in(d_in), d_out(d_out), is_trained(true) {}
38
33
 
39
34
  /// set if the VectorTransform does not require training, or if
40
35
  /// training is done already
41
36
  bool is_trained;
42
37
 
43
-
44
38
  /** Perform training on a representative set of vectors. Does
45
39
  * nothing by default.
46
40
  *
47
41
  * @param n nb of training vectors
48
42
  * @param x training vecors, size n * d
49
43
  */
50
- virtual void train (idx_t n, const float *x);
44
+ virtual void train(idx_t n, const float* x);
51
45
 
52
- /** apply the random roation, return new allocated matrix
46
+ /** apply the random rotation, return new allocated matrix
53
47
  * @param x size n * d_in
54
48
  * @return size n * d_out
55
49
  */
56
- float *apply (idx_t n, const float * x) const;
50
+ float* apply(idx_t n, const float* x) const;
57
51
 
58
52
  /// same as apply, but result is pre-allocated
59
- virtual void apply_noalloc (idx_t n, const float * x,
60
- float *xt) const = 0;
53
+ virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
61
54
 
62
55
  /// reverse transformation. May not be implemented or may return
63
56
  /// approximate result
64
- virtual void reverse_transform (idx_t n, const float * xt,
65
- float *x) const;
66
-
67
- virtual ~VectorTransform () {}
57
+ virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
68
58
 
59
+ virtual ~VectorTransform() {}
69
60
  };
70
61
 
71
-
72
-
73
62
  /** Generic linear transformation, with bias term applied on output
74
63
  * y = A * x + b
75
64
  */
76
- struct LinearTransform: VectorTransform {
77
-
65
+ struct LinearTransform : VectorTransform {
78
66
  bool have_bias; ///! whether to use the bias term
79
67
 
80
68
  /// check if matrix A is orthonormal (enables reverse_transform)
@@ -83,58 +71,56 @@ struct LinearTransform: VectorTransform {
83
71
  /// Transformation matrix, size d_out * d_in
84
72
  std::vector<float> A;
85
73
 
86
- /// bias vector, size d_out
74
+ /// bias vector, size d_out
87
75
  std::vector<float> b;
88
76
 
89
77
  /// both d_in > d_out and d_out < d_in are supported
90
- explicit LinearTransform (int d_in = 0, int d_out = 0,
91
- bool have_bias = false);
78
+ explicit LinearTransform(
79
+ int d_in = 0,
80
+ int d_out = 0,
81
+ bool have_bias = false);
92
82
 
93
83
  /// same as apply, but result is pre-allocated
94
84
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
95
85
 
96
86
  /// compute x = A^T * (x - b)
97
87
  /// is reverse transform if A has orthonormal lines
98
- void transform_transpose (idx_t n, const float * y,
99
- float *x) const;
88
+ void transform_transpose(idx_t n, const float* y, float* x) const;
100
89
 
101
90
  /// works only if is_orthonormal
102
- void reverse_transform (idx_t n, const float * xt,
103
- float *x) const override;
91
+ void reverse_transform(idx_t n, const float* xt, float* x) const override;
104
92
 
105
93
  /// compute A^T * A to set the is_orthonormal flag
106
- void set_is_orthonormal ();
94
+ void set_is_orthonormal();
107
95
 
108
96
  bool verbose;
109
- void print_if_verbose (const char*name, const std::vector<double> &mat,
110
- int n, int d) const;
97
+ void print_if_verbose(
98
+ const char* name,
99
+ const std::vector<double>& mat,
100
+ int n,
101
+ int d) const;
111
102
 
112
103
  ~LinearTransform() override {}
113
104
  };
114
105
 
115
-
116
-
117
106
  /// Randomly rotate a set of vectors
118
- struct RandomRotationMatrix: LinearTransform {
119
-
120
- /// both d_in > d_out and d_out < d_in are supported
121
- RandomRotationMatrix (int d_in, int d_out):
122
- LinearTransform(d_in, d_out, false) {}
107
+ struct RandomRotationMatrix : LinearTransform {
108
+ /// both d_in > d_out and d_out < d_in are supported
109
+ RandomRotationMatrix(int d_in, int d_out)
110
+ : LinearTransform(d_in, d_out, false) {}
123
111
 
124
- /// must be called before the transform is used
125
- void init(int seed);
112
+ /// must be called before the transform is used
113
+ void init(int seed);
126
114
 
127
- // intializes with an arbitrary seed
128
- void train(idx_t n, const float* x) override;
115
+ // intializes with an arbitrary seed
116
+ void train(idx_t n, const float* x) override;
129
117
 
130
- RandomRotationMatrix () {}
118
+ RandomRotationMatrix() {}
131
119
  };
132
120
 
133
-
134
121
  /** Applies a principal component analysis on a set of vectors,
135
122
  * with optionally whitening and random rotation. */
136
- struct PCAMatrix: LinearTransform {
137
-
123
+ struct PCAMatrix : LinearTransform {
138
124
  /** after transformation the components are multiplied by
139
125
  * eigenvalues^eigen_power
140
126
  *
@@ -143,6 +129,9 @@ struct PCAMatrix: LinearTransform {
143
129
  */
144
130
  float eigen_power;
145
131
 
132
+ /// value added to eigenvalues to avoid division by 0 when whitening
133
+ float epsilon;
134
+
146
135
  /// random rotation after PCA
147
136
  bool random_rotation;
148
137
 
@@ -162,22 +151,23 @@ struct PCAMatrix: LinearTransform {
162
151
  std::vector<float> PCAMat;
163
152
 
164
153
  // the final matrix is computed after random rotation and/or whitening
165
- explicit PCAMatrix (int d_in = 0, int d_out = 0,
166
- float eigen_power = 0, bool random_rotation = false);
154
+ explicit PCAMatrix(
155
+ int d_in = 0,
156
+ int d_out = 0,
157
+ float eigen_power = 0,
158
+ bool random_rotation = false);
167
159
 
168
160
  /// train on n vectors. If n < d_in then the eigenvector matrix
169
161
  /// will be completed with 0s
170
162
  void train(idx_t n, const float* x) override;
171
163
 
172
164
  /// copy pre-trained PCA matrix
173
- void copy_from (const PCAMatrix & other);
165
+ void copy_from(const PCAMatrix& other);
174
166
 
175
167
  /// called after mean, PCAMat and eigenvalues are computed
176
168
  void prepare_Ab();
177
-
178
169
  };
179
170
 
180
-
181
171
  /** ITQ implementation from
182
172
  *
183
173
  * Iterative quantization: A procrustean approach to learning binary codes
@@ -187,25 +177,21 @@ struct PCAMatrix: LinearTransform {
187
177
  * PAMI'12.
188
178
  */
189
179
 
190
- struct ITQMatrix: LinearTransform {
191
-
180
+ struct ITQMatrix : LinearTransform {
192
181
  int max_iter;
193
182
  int seed;
194
183
 
195
184
  // force initialization of the rotation (for debugging)
196
185
  std::vector<double> init_rotation;
197
186
 
198
- explicit ITQMatrix (int d = 0);
187
+ explicit ITQMatrix(int d = 0);
199
188
 
200
- void train (idx_t n, const float* x) override;
189
+ void train(idx_t n, const float* x) override;
201
190
  };
202
191
 
203
-
204
-
205
192
  /** The full ITQ transform, including normalizations and PCA transformation
206
193
  */
207
- struct ITQTransform: VectorTransform {
208
-
194
+ struct ITQTransform : VectorTransform {
209
195
  std::vector<float> mean;
210
196
  bool do_pca;
211
197
  ITQMatrix itq;
@@ -216,15 +202,13 @@ struct ITQTransform: VectorTransform {
216
202
  // concatenation of PCA + ITQ transformation
217
203
  LinearTransform pca_then_itq;
218
204
 
219
- explicit ITQTransform (int d_in = 0, int d_out = 0, bool do_pca = false);
220
-
221
- void train (idx_t n, const float *x) override;
205
+ explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
222
206
 
223
- void apply_noalloc (idx_t n, const float* x, float* xt) const override;
207
+ void train(idx_t n, const float* x) override;
224
208
 
209
+ void apply_noalloc(idx_t n, const float* x, float* xt) const override;
225
210
  };
226
211
 
227
-
228
212
  struct ProductQuantizer;
229
213
 
230
214
  /** Applies a rotation to align the dimensions with a PQ to minimize
@@ -235,8 +219,7 @@ struct ProductQuantizer;
235
219
  * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
236
220
  *
237
221
  */
238
- struct OPQMatrix: LinearTransform {
239
-
222
+ struct OPQMatrix : LinearTransform {
240
223
  int M; ///< nb of subquantizers
241
224
  int niter; ///< Number of outer training iterations
242
225
  int niter_pq; ///< Number of training iterations for the PQ
@@ -248,46 +231,43 @@ struct OPQMatrix: LinearTransform {
248
231
 
249
232
  /// if non-NULL, use this product quantizer for training
250
233
  /// should be constructed with (d_out, M, _)
251
- ProductQuantizer * pq;
234
+ ProductQuantizer* pq;
252
235
 
253
236
  /// if d2 != -1, output vectors of this dimension
254
- explicit OPQMatrix (int d = 0, int M = 1, int d2 = -1);
237
+ explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
255
238
 
256
239
  void train(idx_t n, const float* x) override;
257
240
  };
258
241
 
259
-
260
242
  /** remap dimensions for intput vectors, possibly inserting 0s
261
243
  * strictly speaking this is also a linear transform but we don't want
262
244
  * to compute it with matrix multiplies */
263
- struct RemapDimensionsTransform: VectorTransform {
264
-
245
+ struct RemapDimensionsTransform : VectorTransform {
265
246
  /// map from output dimension to input, size d_out
266
247
  /// -1 -> set output to 0
267
248
  std::vector<int> map;
268
249
 
269
- RemapDimensionsTransform (int d_in, int d_out, const int *map);
250
+ RemapDimensionsTransform(int d_in, int d_out, const int* map);
270
251
 
271
252
  /// remap input to output, skipping or inserting dimensions as needed
272
253
  /// if uniform: distribute dimensions uniformly
273
254
  /// otherwise just take the d_out first ones.
274
- RemapDimensionsTransform (int d_in, int d_out, bool uniform = true);
255
+ RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
275
256
 
276
257
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
277
258
 
278
259
  /// reverse transform correct only when the mapping is a permutation
279
260
  void reverse_transform(idx_t n, const float* xt, float* x) const override;
280
261
 
281
- RemapDimensionsTransform () {}
262
+ RemapDimensionsTransform() {}
282
263
  };
283
264
 
284
-
285
265
  /** per-vector normalization */
286
- struct NormalizationTransform: VectorTransform {
266
+ struct NormalizationTransform : VectorTransform {
287
267
  float norm;
288
268
 
289
- explicit NormalizationTransform (int d, float norm = 2.0);
290
- NormalizationTransform ();
269
+ explicit NormalizationTransform(int d, float norm = 2.0);
270
+ NormalizationTransform();
291
271
 
292
272
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
293
273
 
@@ -296,12 +276,11 @@ struct NormalizationTransform: VectorTransform {
296
276
  };
297
277
 
298
278
  /** Subtract the mean of each component from the vectors. */
299
- struct CenteringTransform: VectorTransform {
300
-
279
+ struct CenteringTransform : VectorTransform {
301
280
  /// Mean, size d_in = d_out
302
281
  std::vector<float> mean;
303
282
 
304
- explicit CenteringTransform (int d = 0);
283
+ explicit CenteringTransform(int d = 0);
305
284
 
306
285
  /// train on n vectors.
307
286
  void train(idx_t n, const float* x) override;
@@ -310,13 +289,9 @@ struct CenteringTransform: VectorTransform {
310
289
  void apply_noalloc(idx_t n, const float* x, float* xt) const override;
311
290
 
312
291
  /// add the mean
313
- void reverse_transform (idx_t n, const float * xt,
314
- float *x) const override;
315
-
292
+ void reverse_transform(idx_t n, const float* xt, float* x) const override;
316
293
  };
317
294
 
318
-
319
295
  } // namespace faiss
320
296
 
321
-
322
297
  #endif
@@ -14,22 +14,23 @@
14
14
 
15
15
  #include <faiss/impl/FaissAssert.h>
16
16
 
17
+ #include <faiss/Index2Layer.h>
18
+ #include <faiss/IndexAdditiveQuantizer.h>
17
19
  #include <faiss/IndexFlat.h>
18
- #include <faiss/VectorTransform.h>
19
- #include <faiss/IndexPreTransform.h>
20
- #include <faiss/IndexLSH.h>
21
- #include <faiss/IndexPQ.h>
20
+ #include <faiss/IndexHNSW.h>
22
21
  #include <faiss/IndexIVF.h>
22
+ #include <faiss/IndexIVFFlat.h>
23
23
  #include <faiss/IndexIVFPQ.h>
24
24
  #include <faiss/IndexIVFPQR.h>
25
- #include <faiss/Index2Layer.h>
26
- #include <faiss/IndexIVFFlat.h>
27
25
  #include <faiss/IndexIVFSpectralHash.h>
28
- #include <faiss/MetaIndexes.h>
29
- #include <faiss/IndexScalarQuantizer.h>
30
- #include <faiss/IndexHNSW.h>
26
+ #include <faiss/IndexLSH.h>
31
27
  #include <faiss/IndexLattice.h>
32
- #include <faiss/Index2Layer.h>
28
+ #include <faiss/IndexNSG.h>
29
+ #include <faiss/IndexPQ.h>
30
+ #include <faiss/IndexPreTransform.h>
31
+ #include <faiss/IndexScalarQuantizer.h>
32
+ #include <faiss/MetaIndexes.h>
33
+ #include <faiss/VectorTransform.h>
33
34
 
34
35
  namespace faiss {
35
36
 
@@ -37,111 +38,115 @@ namespace faiss {
37
38
  * cloning functions
38
39
  **************************************************************/
39
40
 
40
-
41
-
42
- Index * clone_index (const Index *index)
43
- {
41
+ Index* clone_index(const Index* index) {
44
42
  Cloner cl;
45
- return cl.clone_Index (index);
43
+ return cl.clone_Index(index);
46
44
  }
47
45
 
48
46
  // assumes there is a copy constructor ready. Always try from most
49
47
  // specific to most general. Most indexes don't have complicated
50
48
  // structs, the default copy constructor often just works.
51
- #define TRYCLONE(classname, obj) \
52
- if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
53
- return new classname(*clo); \
49
+ #define TRYCLONE(classname, obj) \
50
+ if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
51
+ return new classname(*clo); \
54
52
  } else
55
53
 
56
- VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
57
- {
58
- TRYCLONE (RemapDimensionsTransform, vt)
59
- TRYCLONE (OPQMatrix, vt)
60
- TRYCLONE (PCAMatrix, vt)
61
- TRYCLONE (ITQMatrix, vt)
62
- TRYCLONE (RandomRotationMatrix, vt)
63
- TRYCLONE (LinearTransform, vt)
64
- {
65
- FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
54
+ VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
55
+ TRYCLONE(RemapDimensionsTransform, vt)
56
+ TRYCLONE(OPQMatrix, vt)
57
+ TRYCLONE(PCAMatrix, vt)
58
+ TRYCLONE(ITQMatrix, vt)
59
+ TRYCLONE(RandomRotationMatrix, vt)
60
+ TRYCLONE(LinearTransform, vt) {
61
+ FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
66
62
  }
67
63
  return nullptr;
68
64
  }
69
65
 
70
- IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
71
- {
72
- TRYCLONE (IndexIVFPQR, ivf)
73
- TRYCLONE (IndexIVFPQ, ivf)
74
- TRYCLONE (IndexIVFFlat, ivf)
75
- TRYCLONE (IndexIVFScalarQuantizer, ivf)
76
- {
77
- FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
66
+ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
67
+ TRYCLONE(IndexIVFPQR, ivf)
68
+ TRYCLONE(IndexIVFPQ, ivf)
69
+ TRYCLONE(IndexIVFFlat, ivf)
70
+ TRYCLONE(IndexIVFScalarQuantizer, ivf) {
71
+ FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
78
72
  }
79
73
  return nullptr;
80
74
  }
81
75
 
82
- Index *Cloner::clone_Index (const Index *index)
83
- {
84
- TRYCLONE (IndexPQ, index)
85
- TRYCLONE (IndexLSH, index)
86
- TRYCLONE (IndexFlatL2, index)
87
- TRYCLONE (IndexFlatIP, index)
88
- TRYCLONE (IndexFlat, index)
89
- TRYCLONE (IndexLattice, index)
90
- TRYCLONE (IndexScalarQuantizer, index)
91
- TRYCLONE (MultiIndexQuantizer, index)
92
- if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
93
- IndexIVF *res = clone_IndexIVF (ivf);
76
+ Index* Cloner::clone_Index(const Index* index) {
77
+ TRYCLONE(IndexPQ, index)
78
+ TRYCLONE(IndexLSH, index)
79
+ TRYCLONE(IndexFlatL2, index)
80
+ TRYCLONE(IndexFlatIP, index)
81
+ TRYCLONE(IndexFlat, index)
82
+ TRYCLONE(IndexLattice, index)
83
+ TRYCLONE(IndexResidualQuantizer, index)
84
+ TRYCLONE(IndexScalarQuantizer, index)
85
+ TRYCLONE(MultiIndexQuantizer, index)
86
+ TRYCLONE(ResidualCoarseQuantizer, index)
87
+ if (const IndexIVF* ivf = dynamic_cast<const IndexIVF*>(index)) {
88
+ IndexIVF* res = clone_IndexIVF(ivf);
94
89
  if (ivf->invlists == nullptr) {
95
90
  res->invlists = nullptr;
96
- } else if (auto *ails = dynamic_cast<const ArrayInvertedLists*>
97
- (ivf->invlists)) {
91
+ } else if (
92
+ auto* ails = dynamic_cast<const ArrayInvertedLists*>(
93
+ ivf->invlists)) {
98
94
  res->invlists = new ArrayInvertedLists(*ails);
99
95
  res->own_invlists = true;
100
96
  } else {
101
- FAISS_THROW_MSG( "clone not supported for this type of inverted lists");
97
+ FAISS_THROW_MSG(
98
+ "clone not supported for this type of inverted lists");
102
99
  }
103
100
  res->own_fields = true;
104
- res->quantizer = clone_Index (ivf->quantizer);
101
+ res->quantizer = clone_Index(ivf->quantizer);
105
102
  return res;
106
- } else if (const IndexPreTransform * ipt =
107
- dynamic_cast<const IndexPreTransform*> (index)) {
108
- IndexPreTransform *res = new IndexPreTransform ();
103
+ } else if (
104
+ const IndexPreTransform* ipt =
105
+ dynamic_cast<const IndexPreTransform*>(index)) {
106
+ IndexPreTransform* res = new IndexPreTransform();
109
107
  res->d = ipt->d;
110
108
  res->ntotal = ipt->ntotal;
111
109
  res->is_trained = ipt->is_trained;
112
110
  res->metric_type = ipt->metric_type;
113
111
  res->metric_arg = ipt->metric_arg;
114
112
 
115
-
116
- res->index = clone_Index (ipt->index);
113
+ res->index = clone_Index(ipt->index);
117
114
  for (int i = 0; i < ipt->chain.size(); i++)
118
- res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
115
+ res->chain.push_back(clone_VectorTransform(ipt->chain[i]));
119
116
  res->own_fields = true;
120
117
  return res;
121
- } else if (const IndexIDMap *idmap =
122
- dynamic_cast<const IndexIDMap*> (index)) {
123
- IndexIDMap *res = new IndexIDMap (*idmap);
118
+ } else if (
119
+ const IndexIDMap* idmap = dynamic_cast<const IndexIDMap*>(index)) {
120
+ IndexIDMap* res = new IndexIDMap(*idmap);
124
121
  res->own_fields = true;
125
- res->index = clone_Index (idmap->index);
122
+ res->index = clone_Index(idmap->index);
126
123
  return res;
127
- } else if (const IndexHNSW *ihnsw =
128
- dynamic_cast<const IndexHNSW*> (index)) {
129
- IndexHNSW *res = new IndexHNSW (*ihnsw);
124
+ } else if (const IndexHNSW* ihnsw = dynamic_cast<const IndexHNSW*>(index)) {
125
+ IndexHNSW* res = new IndexHNSW(*ihnsw);
130
126
  res->own_fields = true;
131
- res->storage = clone_Index (ihnsw->storage);
127
+ res->storage = clone_Index(ihnsw->storage);
132
128
  return res;
133
- } else if (const Index2Layer *i2l =
134
- dynamic_cast<const Index2Layer*> (index)) {
135
- Index2Layer *res = new Index2Layer (*i2l);
129
+ } else if (const IndexNSG* insg = dynamic_cast<const IndexNSG*>(index)) {
130
+ IndexNSG* res = new IndexNSG(*insg);
131
+
132
+ // copy the dynamic allocated graph
133
+ auto& new_graph = res->nsg.final_graph;
134
+ auto& old_graph = insg->nsg.final_graph;
135
+ new_graph = std::make_shared<nsg::Graph<int>>(*old_graph);
136
+
137
+ res->own_fields = true;
138
+ res->storage = clone_Index(insg->storage);
139
+ return res;
140
+ } else if (
141
+ const Index2Layer* i2l = dynamic_cast<const Index2Layer*>(index)) {
142
+ Index2Layer* res = new Index2Layer(*i2l);
136
143
  res->q1.own_fields = true;
137
- res->q1.quantizer = clone_Index (i2l->q1.quantizer);
144
+ res->q1.quantizer = clone_Index(i2l->q1.quantizer);
138
145
  return res;
139
146
  } else {
140
- FAISS_THROW_MSG( "clone not supported for this type of Index");
147
+ FAISS_THROW_MSG("clone not supported for this type of Index");
141
148
  }
142
149
  return nullptr;
143
150
  }
144
151
 
145
-
146
-
147
152
  } // namespace faiss
@@ -11,28 +11,23 @@
11
11
 
12
12
  #pragma once
13
13
 
14
-
15
-
16
14
  namespace faiss {
17
15
 
18
16
  struct Index;
19
17
  struct IndexIVF;
20
18
  struct VectorTransform;
21
19
 
22
-
23
20
  /* cloning functions */
24
- Index *clone_index (const Index *);
21
+ Index* clone_index(const Index*);
25
22
 
26
23
  /** Cloner class, useful to override classes with other cloning
27
24
  * functions. The cloning function above just calls
28
25
  * Cloner::clone_Index. */
29
26
  struct Cloner {
30
- virtual VectorTransform *clone_VectorTransform (const VectorTransform *);
31
- virtual Index *clone_Index (const Index *);
32
- virtual IndexIVF *clone_IndexIVF (const IndexIVF *);
27
+ virtual VectorTransform* clone_VectorTransform(const VectorTransform*);
28
+ virtual Index* clone_Index(const Index*);
29
+ virtual IndexIVF* clone_IndexIVF(const IndexIVF*);
33
30
  virtual ~Cloner() {}
34
31
  };
35
32
 
36
-
37
-
38
33
  } // namespace faiss